mirror of
https://github.com/gchq/CyberChef.git
synced 2025-04-20 23:06:16 -04:00
Added parsing of headers.
This commit is contained in:
parent
c3994aa8e3
commit
7f97afd3e0
3 changed files with 172 additions and 78 deletions
|
@ -7,8 +7,9 @@
|
||||||
import Operation from "../Operation";
|
import Operation from "../Operation";
|
||||||
import OperationError from "../errors/OperationError";
|
import OperationError from "../errors/OperationError";
|
||||||
import Utils from "../Utils";
|
import Utils from "../Utils";
|
||||||
import {fromHex} from "../lib/Hex.mjs";
|
import { fromHex } from "../lib/Hex.mjs";
|
||||||
import { fromBase64 } from "../lib/Base64";
|
import { fromBase64 } from "../lib/Base64";
|
||||||
|
import cptable from "../vendor/js-codepage/cptable.js";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* MIME Decoding operation
|
* MIME Decoding operation
|
||||||
|
@ -23,24 +24,11 @@ class MIMEDecoding extends Operation {
|
||||||
|
|
||||||
this.name = "MIME Decoding";
|
this.name = "MIME Decoding";
|
||||||
this.module = "Default";
|
this.module = "Default";
|
||||||
this.description = "";
|
this.description = "Enables the decoding of MIME message header extensions for non-ASCII text";
|
||||||
this.infoURL = "";
|
this.infoURL = "https://tools.ietf.org/html/rfc2047";
|
||||||
this.inputType = "byteArray";
|
this.inputType = "byteArray";
|
||||||
this.outputType = "string";
|
this.outputType = "string";
|
||||||
this.args = [
|
this.args = [];
|
||||||
/* Example arguments. See the project wiki for full details.
|
|
||||||
{
|
|
||||||
name: "First arg",
|
|
||||||
type: "string",
|
|
||||||
value: "Don't Panic"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Second arg",
|
|
||||||
type: "number",
|
|
||||||
value: 42
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -49,73 +37,135 @@ class MIMEDecoding extends Operation {
|
||||||
* @returns {string}
|
* @returns {string}
|
||||||
*/
|
*/
|
||||||
run(input, args) {
|
run(input, args) {
|
||||||
|
const mimeEncodedText = Utils.byteArrayToUtf8(input);
|
||||||
|
const encodedHeaders = mimeEncodedText.replace(/\r\n/g, "\n");
|
||||||
|
|
||||||
let mimeEncodedText = Utils.byteArrayToUtf8(input)
|
const decodedHeader = this.decodeHeaders(encodedHeaders);
|
||||||
|
|
||||||
let parsedString = "";
|
return decodedHeader;
|
||||||
let currentPos = 0;
|
|
||||||
let pastPosition = 0;
|
|
||||||
while (currentPos >= 0) {
|
|
||||||
|
|
||||||
// Find starting text
|
|
||||||
currentPos = mimeEncodedText.indexOf("=?", pastPosition);
|
|
||||||
console.log('CURRENT POSITION', currentPos);
|
|
||||||
if (currentPos < 0) break;
|
|
||||||
|
|
||||||
// Add existing unparsed string
|
|
||||||
let fillerText = mimeEncodedText.substring(pastPosition, currentPos);
|
|
||||||
console.log("PROCESSING RANGE", pastPosition, ' ' ,currentPos)
|
|
||||||
console.log('FILLER TEXT: ', fillerText);
|
|
||||||
if (fillerText.indexOf('\r') > 0) console.log('CR detected', fillerText.indexOf('\r'));
|
|
||||||
if (fillerText.indexOf('\n') > 0) console.log('LF detected', fillerText.indexOf('\n'));
|
|
||||||
if (fillerText.indexOf('\r\n') > 0) console.log('CRLF detected', fillerText.indexOf('\r\n'));
|
|
||||||
if (fillerText.indexOf('\x20') > 0) console.log('SPACE detected', fillerText.indexOf('\x20'));
|
|
||||||
if (fillerText.indexOf('\n\x20') > 0) console.log('newline SPACE detected', fillerText.indexOf('\x20'));
|
|
||||||
|
|
||||||
if (fillerText !== '\r\n')
|
|
||||||
parsedString += fillerText
|
|
||||||
|
|
||||||
pastPosition = currentPos;
|
|
||||||
|
|
||||||
// find ending text
|
|
||||||
currentPos = mimeEncodedText.indexOf("?=", pastPosition);
|
|
||||||
|
|
||||||
// Process block
|
|
||||||
let encodedTextBlock = mimeEncodedText.substring(pastPosition + 2, currentPos);
|
|
||||||
pastPosition = currentPos + 2;
|
|
||||||
|
|
||||||
parsedString += this.parseEncodedWord(encodedTextBlock);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return parsedString;
|
/**
|
||||||
|
* Decode MIME header strings
|
||||||
|
*
|
||||||
|
* @param headerString
|
||||||
|
*/
|
||||||
|
decodeHeaders(headerString) {
|
||||||
|
// No encoded words detected
|
||||||
|
let i = headerString.indexOf("=?");
|
||||||
|
if (i === -1) return headerString;
|
||||||
|
|
||||||
throw new OperationError("Test");
|
let decodedHeaders = headerString.slice(0, i);
|
||||||
}
|
let header = headerString.slice(i);
|
||||||
|
|
||||||
parseEncodedWord(encodedWord) {
|
let isBetweenWords = false;
|
||||||
let [charset, encoding, encodedBlock] = encodedWord.split('?');
|
let start, cur, charset, encoding, j, end, text;
|
||||||
|
while (header.length > -1) {
|
||||||
|
start = header.indexOf("=?");
|
||||||
|
if (start === -1) break;
|
||||||
|
cur = start + "=?".length;
|
||||||
|
|
||||||
console.log('CURRENT BLOCK TO PROCESS', encodedBlock);
|
i = header.slice(cur).indexOf("?");
|
||||||
console.log('CURRENT CHARSET', charset);
|
if (i === -1) break;
|
||||||
|
|
||||||
let encodedText = '';
|
charset = header.slice(cur, cur + i);
|
||||||
if (encoding.toLowerCase() === 'b') {
|
cur += i + "?".length;
|
||||||
encodedText = fromBase64(encodedBlock);
|
|
||||||
|
if (header.length < cur + "Q??=".length) break;
|
||||||
|
|
||||||
|
encoding = header[cur];
|
||||||
|
cur += 1;
|
||||||
|
|
||||||
|
if (header[cur] !== "?") break;
|
||||||
|
|
||||||
|
cur += 1;
|
||||||
|
|
||||||
|
j = header.slice(cur).indexOf("?=");
|
||||||
|
if (j === -1) break;
|
||||||
|
|
||||||
|
text = header.slice(cur, cur + j);
|
||||||
|
end = cur + j + "?=".length;
|
||||||
|
|
||||||
|
if (encoding.toLowerCase() === "b") {
|
||||||
|
text = fromBase64(text);
|
||||||
|
} else if (encoding.toLowerCase() === "q") {
|
||||||
|
text = this.parseQEncodedWord(text);
|
||||||
} else {
|
} else {
|
||||||
encodedText = encodedBlock;
|
isBetweenWords = false;
|
||||||
let encodedChars = encodedText.indexOf("=");
|
decodedHeaders += header.slice(0, start + 2);
|
||||||
if (encodedChars > 0) {
|
header = header.slice(start + 2);
|
||||||
let extractedHex = encodedText.substring(encodedChars + 1, encodedChars + 3);
|
|
||||||
console.log("EXTRACTED HEX", extractedHex)
|
|
||||||
encodedText = encodedText.replace(`=${extractedHex}`, Utils.byteArrayToChars(fromHex(`=${extractedHex}`)))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
encodedText = encodedText.replace("_", " ");
|
if (start > 0 && (!isBetweenWords || header.slice(0, start).search(/\S/g) > -1)) {
|
||||||
|
decodedHeaders += header.slice(0, start);
|
||||||
}
|
}
|
||||||
|
|
||||||
return encodedText;
|
decodedHeaders += this.convertFromCharset(charset, text);
|
||||||
|
|
||||||
|
header = header.slice(end);
|
||||||
|
isBetweenWords = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (header.length > 0) {
|
||||||
|
decodedHeaders += header;
|
||||||
|
}
|
||||||
|
|
||||||
|
return decodedHeaders;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts decoded text for supported charsets.
|
||||||
|
* Supports UTF-8, US-ASCII, ISO-8859-*
|
||||||
|
*
|
||||||
|
* @param encodedWord
|
||||||
|
*/
|
||||||
|
convertFromCharset(charset, encodedText) {
|
||||||
|
charset = charset.toLowerCase();
|
||||||
|
const parsedCharset = charset.split("-");
|
||||||
|
|
||||||
|
if (parsedCharset.length === 2 && parsedCharset[0] === "utf" && charset === "utf-8") {
|
||||||
|
return cptable.utils.decode(65001, encodedText);
|
||||||
|
} else if (parsedCharset.length === 2 && charset === "us-ascii") {
|
||||||
|
return cptable.utils.decode(20127, encodedText);
|
||||||
|
} else if (parsedCharset.length === 3 && parsedCharset[0] === "iso" && parsedCharset[1] === "8859") {
|
||||||
|
const isoCharset = parseInt(parsedCharset[2], 10);
|
||||||
|
if (isoCharset >= 1 && isoCharset <= 16) {
|
||||||
|
return cptable.utils.decode(28590 + isoCharset, encodedText);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new OperationError("Unhandled Charset");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parses a Q encoded word
|
||||||
|
*
|
||||||
|
* @param encodedWord
|
||||||
|
*/
|
||||||
|
parseQEncodedWord(encodedWord) {
|
||||||
|
let decodedWord = "";
|
||||||
|
for (let i = 0; i < encodedWord.length; i++) {
|
||||||
|
if (encodedWord[i] === "_") {
|
||||||
|
decodedWord += " ";
|
||||||
|
// Parse hex encoding
|
||||||
|
} else if (encodedWord[i] === "=") {
|
||||||
|
if ((i + 2) >= encodedWord.length) throw new OperationError("Incorrectly Encoded Word");
|
||||||
|
const decodedHex = Utils.byteArrayToChars(fromHex(encodedWord.substring(i + 1, i + 3)));
|
||||||
|
decodedWord += decodedHex;
|
||||||
|
i += 2;
|
||||||
|
} else if (
|
||||||
|
(encodedWord[i].charCodeAt(0) >= " ".charCodeAt(0) && encodedWord[i].charCodeAt(0) <= "~".charCodeAt(0)) ||
|
||||||
|
encodedWord[i] === "\n" ||
|
||||||
|
encodedWord[i] === "\r" ||
|
||||||
|
encodedWord[i] === "\t") {
|
||||||
|
decodedWord += encodedWord[i];
|
||||||
|
} else {
|
||||||
|
throw new OperationError("Incorrectly Encoded Word");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return decodedWord;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export default MIMEDecoding;
|
export default MIMEDecoding;
|
||||||
|
|
|
@ -11,10 +11,7 @@
|
||||||
* @license Apache-2.0
|
* @license Apache-2.0
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import {
|
import { setLongTestFailure, logTestReport } from "../lib/utils.mjs";
|
||||||
setLongTestFailure,
|
|
||||||
logTestReport,
|
|
||||||
} from "../lib/utils.mjs";
|
|
||||||
|
|
||||||
import TestRegister from "../lib/TestRegister.mjs";
|
import TestRegister from "../lib/TestRegister.mjs";
|
||||||
import "./tests/AESKeyWrap.mjs";
|
import "./tests/AESKeyWrap.mjs";
|
||||||
|
@ -104,6 +101,7 @@ import "./tests/LZNT1Decompress.mjs";
|
||||||
import "./tests/LZString.mjs";
|
import "./tests/LZString.mjs";
|
||||||
import "./tests/Magic.mjs";
|
import "./tests/Magic.mjs";
|
||||||
import "./tests/Media.mjs";
|
import "./tests/Media.mjs";
|
||||||
|
import "./tests/MIMEDecoding";
|
||||||
import "./tests/Modhex.mjs";
|
import "./tests/Modhex.mjs";
|
||||||
import "./tests/MorseCode.mjs";
|
import "./tests/MorseCode.mjs";
|
||||||
import "./tests/MS.mjs";
|
import "./tests/MS.mjs";
|
||||||
|
@ -167,14 +165,14 @@ const testStatus = {
|
||||||
allTestsPassing: true,
|
allTestsPassing: true,
|
||||||
counts: {
|
counts: {
|
||||||
total: 0,
|
total: 0,
|
||||||
}
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
setLongTestFailure();
|
setLongTestFailure();
|
||||||
|
|
||||||
const logOpsTestReport = logTestReport.bind(null, testStatus);
|
const logOpsTestReport = logTestReport.bind(null, testStatus);
|
||||||
|
|
||||||
(async function() {
|
(async function () {
|
||||||
const results = await TestRegister.runTests();
|
const results = await TestRegister.runTests();
|
||||||
logOpsTestReport(results);
|
logOpsTestReport(results);
|
||||||
})();
|
})();
|
||||||
|
|
46
tests/operations/tests/MIMEDecoding.mjs
Normal file
46
tests/operations/tests/MIMEDecoding.mjs
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
/**
|
||||||
|
* MIME Header Decoding tests
|
||||||
|
*
|
||||||
|
* @author mshwed [m@ttshwed.com]
|
||||||
|
* @copyright Crown Copyright 2019
|
||||||
|
* @license Apache-2.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
import TestRegister from "../../lib/TestRegister.mjs";
|
||||||
|
|
||||||
|
TestRegister.addTests([
|
||||||
|
{
|
||||||
|
name: "Encoded =?",
|
||||||
|
input: "=?=?utf-8?q?test?=",
|
||||||
|
expectedOutput: "=?test",
|
||||||
|
recipeConfig: [
|
||||||
|
{
|
||||||
|
"op": "MIME Decoding",
|
||||||
|
"args": []
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "UTF-8 Encodings Multiple Headers",
|
||||||
|
input: "=?utf-8?q?=C3=89ric?= <eric@example.org>, =?utf-8?q?Ana=C3=AFs?= <anais@example.org>",
|
||||||
|
expectedOutput: "Éric <eric@example.org>, Anaïs <anais@example.org>",
|
||||||
|
recipeConfig: [
|
||||||
|
{
|
||||||
|
"op": "MIME Decoding",
|
||||||
|
"args": []
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "UTF-8 Encodings Single Header",
|
||||||
|
input: "=?utf-8?q?=C2=A1Hola,?= =?utf-8?q?_se=C3=B1or!?=",
|
||||||
|
expectedOutput: "¡Hola, señor!",
|
||||||
|
recipeConfig: [
|
||||||
|
{
|
||||||
|
"op": "MIME Decoding",
|
||||||
|
"args": []
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
|
||||||
|
]);
|
Loading…
Add table
Add a link
Reference in a new issue