Added parsing of headers.

2025-06-30 09:52:32 -04:00 · 2019-09-03 20:33:57 -04:00 · 2019-09-03 20:33:57 -04:00 · 7f97afd3e0
commit 7f97afd3e0
parent c3994aa8e3
3 changed files with 172 additions and 78 deletions
--- a/src/core/operations/MIMEDecoding.mjs
+++ b/src/core/operations/MIMEDecoding.mjs
@ -7,8 +7,9 @@
 import Operation from "../Operation";
 import OperationError from "../errors/OperationError";
 import Utils from "../Utils";
-import {fromHex} from "../lib/Hex.mjs";
+import { fromHex } from "../lib/Hex.mjs";
 import { fromBase64 } from "../lib/Base64";
 import cptable from "../vendor/js-codepage/cptable.js";
 /**
 * MIME Decoding operation
@ -23,24 +24,11 @@ class MIMEDecoding extends Operation {
        this.name = "MIME Decoding";
        this.module = "Default";
-        this.description = "";
+        this.description = "Enables the decoding of MIME message header extensions for non-ASCII text";
-        this.infoURL = "";
+        this.infoURL = "https://tools.ietf.org/html/rfc2047";
        this.inputType = "byteArray";
        this.outputType = "string";
-        this.args = [
+        this.args = [];
            /* Example arguments. See the project wiki for full details.
            {
                name: "First arg",
                type: "string",
                value: "Don't Panic"
            },
            {
                name: "Second arg",
                type: "number",
                value: 42
            }
            */
        ];
    }
    /**
@ -49,73 +37,135 @@ class MIMEDecoding extends Operation {
     * @returns {string}
     */
    run(input, args) {
        const mimeEncodedText = Utils.byteArrayToUtf8(input);
        const encodedHeaders = mimeEncodedText.replace(/\r\n/g, "\n");
-        let mimeEncodedText = Utils.byteArrayToUtf8(input)
+        const decodedHeader = this.decodeHeaders(encodedHeaders);
-        let parsedString = "";
+        return decodedHeader;
        let currentPos = 0;
        let pastPosition = 0;
        while (currentPos >= 0) {
            // Find starting text
            currentPos = mimeEncodedText.indexOf("=?", pastPosition);
            console.log('CURRENT POSITION', currentPos);
            if (currentPos < 0) break;
            // Add existing unparsed string
            let fillerText = mimeEncodedText.substring(pastPosition, currentPos);
            console.log("PROCESSING RANGE", pastPosition, ' ' ,currentPos)
            console.log('FILLER TEXT: ', fillerText);
            if (fillerText.indexOf('\r') > 0) console.log('CR detected', fillerText.indexOf('\r'));
            if (fillerText.indexOf('\n') > 0) console.log('LF detected', fillerText.indexOf('\n'));
            if (fillerText.indexOf('\r\n') > 0) console.log('CRLF detected', fillerText.indexOf('\r\n'));
            if (fillerText.indexOf('\x20') > 0) console.log('SPACE detected', fillerText.indexOf('\x20'));
            if (fillerText.indexOf('\n\x20') > 0) console.log('newline SPACE detected', fillerText.indexOf('\x20'));
            if (fillerText !== '\r\n')
                parsedString += fillerText
            pastPosition = currentPos;
            // find ending text
            currentPos = mimeEncodedText.indexOf("?=", pastPosition);
            // Process block
            let encodedTextBlock = mimeEncodedText.substring(pastPosition + 2, currentPos);
            pastPosition = currentPos + 2;
            parsedString += this.parseEncodedWord(encodedTextBlock);
    }
-        return parsedString;
+    /**
     * Decode MIME header strings
     * 
     * @param headerString
     */
    decodeHeaders(headerString) {
        // No encoded words detected
        let i = headerString.indexOf("=?");
        if (i === -1) return headerString;
-        throw new OperationError("Test");
+        let decodedHeaders = headerString.slice(0, i);
-    }
+        let header = headerString.slice(i);
-    parseEncodedWord(encodedWord) {
+        let isBetweenWords = false;
-        let [charset, encoding, encodedBlock] = encodedWord.split('?');
+        let start, cur, charset, encoding, j, end, text;
        while (header.length > -1) {
            start = header.indexOf("=?");
            if (start === -1) break;
            cur = start + "=?".length;
-        console.log('CURRENT BLOCK TO PROCESS', encodedBlock);
+            i = header.slice(cur).indexOf("?");
-        console.log('CURRENT CHARSET', charset);
+            if (i === -1) break;
-        let encodedText = '';
+            charset = header.slice(cur, cur + i);
-        if (encoding.toLowerCase() === 'b') {
+            cur += i + "?".length;
-            encodedText = fromBase64(encodedBlock);
+
            if (header.length < cur + "Q??=".length) break;
            encoding = header[cur];
            cur += 1;
            if (header[cur] !== "?") break;
            cur += 1;
            j = header.slice(cur).indexOf("?=");
            if (j === -1) break;
            text = header.slice(cur, cur + j);
            end = cur + j + "?=".length;
            if (encoding.toLowerCase() === "b") {
                text = fromBase64(text);
            } else if (encoding.toLowerCase() === "q") {
                text = this.parseQEncodedWord(text);
            } else {
-            encodedText = encodedBlock;
+                isBetweenWords = false;
-            let encodedChars = encodedText.indexOf("=");
+                decodedHeaders += header.slice(0, start + 2);
-            if (encodedChars > 0) {
+                header = header.slice(start + 2);
                let extractedHex = encodedText.substring(encodedChars + 1, encodedChars + 3);
                console.log("EXTRACTED HEX", extractedHex)
                encodedText = encodedText.replace(`=${extractedHex}`, Utils.byteArrayToChars(fromHex(`=${extractedHex}`)))
            }
-            encodedText = encodedText.replace("_", " ");
+            if (start > 0 && (!isBetweenWords || header.slice(0, start).search(/\S/g) > -1)) {
                decodedHeaders += header.slice(0, start);
            }
-        return encodedText;
+            decodedHeaders += this.convertFromCharset(charset, text);
            header = header.slice(end);
            isBetweenWords = true;
        }
        if (header.length > 0) {
            decodedHeaders += header;
        }
        return decodedHeaders;
    }
    /**
     * Converts decoded text for supported charsets.
     * Supports UTF-8, US-ASCII, ISO-8859-*
     *
     * @param encodedWord
     */
    convertFromCharset(charset, encodedText) {
        charset = charset.toLowerCase();
        const parsedCharset = charset.split("-");
        if (parsedCharset.length === 2 && parsedCharset[0] === "utf" && charset === "utf-8") {
            return cptable.utils.decode(65001, encodedText);
        } else if (parsedCharset.length === 2 && charset === "us-ascii") {
            return cptable.utils.decode(20127, encodedText);
        } else if (parsedCharset.length === 3 && parsedCharset[0] === "iso" && parsedCharset[1] === "8859") {
            const isoCharset = parseInt(parsedCharset[2], 10);
            if (isoCharset >= 1 && isoCharset <= 16) {
                return cptable.utils.decode(28590 + isoCharset, encodedText);
            }
        }
        throw new OperationError("Unhandled Charset");
    }
    /**
     * Parses a Q encoded word
     *
     * @param encodedWord
     */
    parseQEncodedWord(encodedWord) {
        let decodedWord = "";
        for (let i = 0; i < encodedWord.length; i++) {
            if (encodedWord[i] === "_") {
                decodedWord += " ";
            // Parse hex encoding
            } else if (encodedWord[i] === "=") {
                if ((i + 2) >= encodedWord.length) throw new OperationError("Incorrectly Encoded Word");
                const decodedHex = Utils.byteArrayToChars(fromHex(encodedWord.substring(i + 1, i + 3)));
                decodedWord += decodedHex;
                i += 2;
            } else if (
                (encodedWord[i].charCodeAt(0) >= " ".charCodeAt(0) && encodedWord[i].charCodeAt(0) <= "~".charCodeAt(0)) ||
                encodedWord[i] === "\n" ||
                encodedWord[i] === "\r" ||
                encodedWord[i] === "\t") {
                decodedWord += encodedWord[i];
            } else {
                throw new OperationError("Incorrectly Encoded Word");
            }
        }
        return decodedWord;
    }
 }
 export default MIMEDecoding;
--- a/tests/operations/index.mjs
+++ b/tests/operations/index.mjs
@ -11,10 +11,7 @@
 * @license Apache-2.0
 */
-import {
+import { setLongTestFailure, logTestReport } from "../lib/utils.mjs";
    setLongTestFailure,
    logTestReport,
 } from "../lib/utils.mjs";
 import TestRegister from "../lib/TestRegister.mjs";
 import "./tests/AESKeyWrap.mjs";
@ -104,6 +101,7 @@ import "./tests/LZNT1Decompress.mjs";
 import "./tests/LZString.mjs";
 import "./tests/Magic.mjs";
 import "./tests/Media.mjs";
 import "./tests/MIMEDecoding";
 import "./tests/Modhex.mjs";
 import "./tests/MorseCode.mjs";
 import "./tests/MS.mjs";
@ -167,14 +165,14 @@ const testStatus = {
    allTestsPassing: true,
    counts: {
        total: 0,
-    }
+    },
 };
 setLongTestFailure();
 const logOpsTestReport = logTestReport.bind(null, testStatus);
-(async function() {
+(async function () {
    const results = await TestRegister.runTests();
    logOpsTestReport(results);
 })();
--- a/tests/operations/tests/MIMEDecoding.mjs
+++ b/tests/operations/tests/MIMEDecoding.mjs
@ -0,0 +1,46 @@
 /**
 * MIME Header Decoding tests
 * 
 * @author mshwed [m@ttshwed.com]
 * @copyright Crown Copyright 2019
 * @license Apache-2.0
 */
 import TestRegister from "../../lib/TestRegister.mjs";
 TestRegister.addTests([
    {
        name: "Encoded =?",
        input: "=?=?utf-8?q?test?=",
        expectedOutput: "=?test",
        recipeConfig: [
            {
                "op": "MIME Decoding",
                "args": []
            }
        ]
    },
    {
        name: "UTF-8 Encodings Multiple Headers",
        input: "=?utf-8?q?=C3=89ric?= <eric@example.org>, =?utf-8?q?Ana=C3=AFs?= <anais@example.org>",
        expectedOutput: "Éric <eric@example.org>, Anaïs <anais@example.org>",
        recipeConfig: [
            {
                "op": "MIME Decoding",
                "args": []
            }
        ]
    },
    {
        name: "UTF-8 Encodings Single Header",
        input: "=?utf-8?q?=C2=A1Hola,?= =?utf-8?q?_se=C3=B1or!?=",
        expectedOutput: "¡Hola, señor!",
        recipeConfig: [
            {
                "op": "MIME Decoding",
                "args": []
            }
        ]
    },
 ]);