Added parsing of headers.

This commit is contained in:
mshwed 2019-09-03 20:33:57 -04:00 committed by mshwed
parent c3994aa8e3
commit 7f97afd3e0
3 changed files with 172 additions and 78 deletions

View file

@ -9,6 +9,7 @@ import OperationError from "../errors/OperationError";
import Utils from "../Utils";
import { fromHex } from "../lib/Hex.mjs";
import { fromBase64 } from "../lib/Base64";
import cptable from "../vendor/js-codepage/cptable.js";
/**
* MIME Decoding operation
@ -23,24 +24,11 @@ class MIMEDecoding extends Operation {
this.name = "MIME Decoding";
this.module = "Default";
this.description = "";
this.infoURL = "";
this.description = "Enables the decoding of MIME message header extensions for non-ASCII text";
this.infoURL = "https://tools.ietf.org/html/rfc2047";
this.inputType = "byteArray";
this.outputType = "string";
this.args = [
/* Example arguments. See the project wiki for full details.
{
name: "First arg",
type: "string",
value: "Don't Panic"
},
{
name: "Second arg",
type: "number",
value: 42
}
*/
];
this.args = [];
}
/**
@ -49,73 +37,135 @@ class MIMEDecoding extends Operation {
* @returns {string}
*/
run(input, args) {
const mimeEncodedText = Utils.byteArrayToUtf8(input);
const encodedHeaders = mimeEncodedText.replace(/\r\n/g, "\n");
let mimeEncodedText = Utils.byteArrayToUtf8(input)
const decodedHeader = this.decodeHeaders(encodedHeaders);
let parsedString = "";
let currentPos = 0;
let pastPosition = 0;
while (currentPos >= 0) {
// Find starting text
currentPos = mimeEncodedText.indexOf("=?", pastPosition);
console.log('CURRENT POSITION', currentPos);
if (currentPos < 0) break;
// Add existing unparsed string
let fillerText = mimeEncodedText.substring(pastPosition, currentPos);
console.log("PROCESSING RANGE", pastPosition, ' ' ,currentPos)
console.log('FILLER TEXT: ', fillerText);
if (fillerText.indexOf('\r') > 0) console.log('CR detected', fillerText.indexOf('\r'));
if (fillerText.indexOf('\n') > 0) console.log('LF detected', fillerText.indexOf('\n'));
if (fillerText.indexOf('\r\n') > 0) console.log('CRLF detected', fillerText.indexOf('\r\n'));
if (fillerText.indexOf('\x20') > 0) console.log('SPACE detected', fillerText.indexOf('\x20'));
if (fillerText.indexOf('\n\x20') > 0) console.log('newline SPACE detected', fillerText.indexOf('\x20'));
if (fillerText !== '\r\n')
parsedString += fillerText
pastPosition = currentPos;
// find ending text
currentPos = mimeEncodedText.indexOf("?=", pastPosition);
// Process block
let encodedTextBlock = mimeEncodedText.substring(pastPosition + 2, currentPos);
pastPosition = currentPos + 2;
parsedString += this.parseEncodedWord(encodedTextBlock);
return decodedHeader;
}
return parsedString;
/**
* Decode MIME header strings
*
* @param headerString
*/
decodeHeaders(headerString) {
// No encoded words detected
let i = headerString.indexOf("=?");
if (i === -1) return headerString;
throw new OperationError("Test");
}
let decodedHeaders = headerString.slice(0, i);
let header = headerString.slice(i);
parseEncodedWord(encodedWord) {
let [charset, encoding, encodedBlock] = encodedWord.split('?');
let isBetweenWords = false;
let start, cur, charset, encoding, j, end, text;
while (header.length > -1) {
start = header.indexOf("=?");
if (start === -1) break;
cur = start + "=?".length;
console.log('CURRENT BLOCK TO PROCESS', encodedBlock);
console.log('CURRENT CHARSET', charset);
i = header.slice(cur).indexOf("?");
if (i === -1) break;
let encodedText = '';
if (encoding.toLowerCase() === 'b') {
encodedText = fromBase64(encodedBlock);
charset = header.slice(cur, cur + i);
cur += i + "?".length;
if (header.length < cur + "Q??=".length) break;
encoding = header[cur];
cur += 1;
if (header[cur] !== "?") break;
cur += 1;
j = header.slice(cur).indexOf("?=");
if (j === -1) break;
text = header.slice(cur, cur + j);
end = cur + j + "?=".length;
if (encoding.toLowerCase() === "b") {
text = fromBase64(text);
} else if (encoding.toLowerCase() === "q") {
text = this.parseQEncodedWord(text);
} else {
encodedText = encodedBlock;
let encodedChars = encodedText.indexOf("=");
if (encodedChars > 0) {
let extractedHex = encodedText.substring(encodedChars + 1, encodedChars + 3);
console.log("EXTRACTED HEX", extractedHex)
encodedText = encodedText.replace(`=${extractedHex}`, Utils.byteArrayToChars(fromHex(`=${extractedHex}`)))
isBetweenWords = false;
decodedHeaders += header.slice(0, start + 2);
header = header.slice(start + 2);
}
encodedText = encodedText.replace("_", " ");
if (start > 0 && (!isBetweenWords || header.slice(0, start).search(/\S/g) > -1)) {
decodedHeaders += header.slice(0, start);
}
return encodedText;
decodedHeaders += this.convertFromCharset(charset, text);
header = header.slice(end);
isBetweenWords = true;
}
if (header.length > 0) {
decodedHeaders += header;
}
return decodedHeaders;
}
/**
* Converts decoded text for supported charsets.
* Supports UTF-8, US-ASCII, ISO-8859-*
*
* @param encodedWord
*/
convertFromCharset(charset, encodedText) {
charset = charset.toLowerCase();
const parsedCharset = charset.split("-");
if (parsedCharset.length === 2 && parsedCharset[0] === "utf" && charset === "utf-8") {
return cptable.utils.decode(65001, encodedText);
} else if (parsedCharset.length === 2 && charset === "us-ascii") {
return cptable.utils.decode(20127, encodedText);
} else if (parsedCharset.length === 3 && parsedCharset[0] === "iso" && parsedCharset[1] === "8859") {
const isoCharset = parseInt(parsedCharset[2], 10);
if (isoCharset >= 1 && isoCharset <= 16) {
return cptable.utils.decode(28590 + isoCharset, encodedText);
}
}
throw new OperationError("Unhandled Charset");
}
/**
* Parses a Q encoded word
*
* @param encodedWord
*/
parseQEncodedWord(encodedWord) {
let decodedWord = "";
for (let i = 0; i < encodedWord.length; i++) {
if (encodedWord[i] === "_") {
decodedWord += " ";
// Parse hex encoding
} else if (encodedWord[i] === "=") {
if ((i + 2) >= encodedWord.length) throw new OperationError("Incorrectly Encoded Word");
const decodedHex = Utils.byteArrayToChars(fromHex(encodedWord.substring(i + 1, i + 3)));
decodedWord += decodedHex;
i += 2;
} else if (
(encodedWord[i].charCodeAt(0) >= " ".charCodeAt(0) && encodedWord[i].charCodeAt(0) <= "~".charCodeAt(0)) ||
encodedWord[i] === "\n" ||
encodedWord[i] === "\r" ||
encodedWord[i] === "\t") {
decodedWord += encodedWord[i];
} else {
throw new OperationError("Incorrectly Encoded Word");
}
}
return decodedWord;
}
}
export default MIMEDecoding;

View file

@ -11,10 +11,7 @@
* @license Apache-2.0
*/
import {
setLongTestFailure,
logTestReport,
} from "../lib/utils.mjs";
import { setLongTestFailure, logTestReport } from "../lib/utils.mjs";
import TestRegister from "../lib/TestRegister.mjs";
import "./tests/AESKeyWrap.mjs";
@ -104,6 +101,7 @@ import "./tests/LZNT1Decompress.mjs";
import "./tests/LZString.mjs";
import "./tests/Magic.mjs";
import "./tests/Media.mjs";
import "./tests/MIMEDecoding";
import "./tests/Modhex.mjs";
import "./tests/MorseCode.mjs";
import "./tests/MS.mjs";
@ -167,7 +165,7 @@ const testStatus = {
allTestsPassing: true,
counts: {
total: 0,
}
},
};
setLongTestFailure();

View file

@ -0,0 +1,46 @@
/**
* MIME Header Decoding tests
*
* @author mshwed [m@ttshwed.com]
* @copyright Crown Copyright 2019
* @license Apache-2.0
*/
import TestRegister from "../../lib/TestRegister.mjs";
TestRegister.addTests([
{
name: "Encoded =?",
input: "=?=?utf-8?q?test?=",
expectedOutput: "=?test",
recipeConfig: [
{
"op": "MIME Decoding",
"args": []
}
]
},
{
name: "UTF-8 Encodings Multiple Headers",
input: "=?utf-8?q?=C3=89ric?= <eric@example.org>, =?utf-8?q?Ana=C3=AFs?= <anais@example.org>",
expectedOutput: "Éric <eric@example.org>, Anaïs <anais@example.org>",
recipeConfig: [
{
"op": "MIME Decoding",
"args": []
}
]
},
{
name: "UTF-8 Encodings Single Header",
input: "=?utf-8?q?=C2=A1Hola,?= =?utf-8?q?_se=C3=B1or!?=",
expectedOutput: "¡Hola, señor!",
recipeConfig: [
{
"op": "MIME Decoding",
"args": []
}
]
},
]);