adding decode mime encoded words and removing duplicate code

2025-07-01 02:12:29 -04:00 · 2018-11-27 22:46:08 -05:00 · 2018-11-27 22:46:08 -05:00 · e2ee627d09
commit e2ee627d09
parent fa5d2b130f
3 changed files with 43 additions and 278 deletions
--- a/src/core/config/Categories.json
+++ b/src/core/config/Categories.json
@ -55,7 +55,8 @@
            "To Braille",
            "From Braille",
            "Parse TLV",
-            "Parse Internet Message Format"
+            "Parse Internet Message Format",
+            "Decode Mime Encoded Words"
        ]
    },
    {
--- a/src/core/operations/DecodeMimeEncodedWords.mjs
+++ b/src/core/operations/DecodeMimeEncodedWords.mjs
@ -0,0 +1,36 @@
+/**
+ * @author bwhitn [brian.m.whitney@outlook.com]
+ * @copyright Crown Copyright 2016
+ * @license Apache-2.0
+ */
+
+import Operation from "../Operation";
+import OperationError from "../errors/OperationError";
+import Mime from "../lib/Mime";
+import Utils from "../Utils";
+
+class DecodeMimeEncodedWords extends Operation {
+
+    /**
+     * DecodeMimeEncodedWords constructor
+     */
+    constructor() {
+        super();
+        this.name = "Decode Mime Encoded Words";
+        this.module = "Default";
+        this.description = ["Parser an IMF formatted messages following RFC5322.",
+            "<br><br>",
+            "Parses an IMF formated message. These often have the file extention &quot;.eml&quote; and contain the email headers and body. The output will be a file list of the headers and mime parts.",
+        ].join("\n");
+        this.infoURL = "https://tools.ietf.org/html/rfc2047";
+        this.inputType = "string";
+        this.outputType = "string";
+        this.args = [];
+    }
+
+    run(input, args) {
+        return Mime.replaceEncodedWord(input);
+    }
+}
+
+export default DecodeMimeEncodedWords;
--- a/src/core/operations/ParseIMF.mjs
+++ b/src/core/operations/ParseIMF.mjs
@ -6,25 +6,9 @@

 import Operation from "../Operation";
 import OperationError from "../errors/OperationError";
-import cptable from "../vendor/js-codepage/cptable.js";
-import {fromBase64} from "../lib/Base64";
-import {decodeQuotedPrintable} from "../lib/QuotedPrintable";
-import {MIME_FORMAT} from "../lib/ChrEnc";
 import Mime from "../lib/Mime";
 import Utils from "../Utils";

-/**
- *
- *
- * @constant
- * @default
- *
-const BODY_FILE_TYPE = {
-    "text/plain": "txt",
-    "text/html": "htm",
-    "application/rtf": "rtf",
-} */
-
 class ParseIMF extends Operation {

    /**
@ -34,9 +18,9 @@ class ParseIMF extends Operation {
        super();
        this.name = "Parse Internet Message Format";
        this.module = "Default";
-        this.description = ["Parser an IMF formatted messages following RFC5322.",
+        this.description = ["Parse an IMF formatted messages following RFC5322.",
            "<br><br>",
-            "Parses an IMF formated message. These often have the file extention &quot;.eml&quote; and contain the email headers and body. The output will be a file list of the headers and mime parts.",
+            "Parses an IMF formated message. These often have the file extention &quot;.eml&quote; and contain the email headers and body. The output will be a file list of the root header and decoded mime parts.",
        ].join("\n");
        this.infoURL = "https://tools.ietf.org/html/rfc5322";
        this.inputType = "string";
@ -44,7 +28,7 @@ class ParseIMF extends Operation {
        this.presentType = "html";
        this.args = [
            {
-                "name": "Decode Quoted Words",
+                "name": "Decode Encoded-Words",
                "type": "boolean",
                "value": false
            }
@ -56,55 +40,6 @@ class ParseIMF extends Operation {
        return mimeObj.decodeMime(args[0]);
    }

-    /**
-     * Basic Email Parser that displays the header and mime sections as files.
-     * Args 0 boolean decode quoted words
-     *
-     * @param {string} input
-     * @param {Object[]} args
-     * @returns {File[]}
-     *
-     // NOTE: Liberties taken include:
-     // No checks are made to verify quoted words are valid encodings e.g. underscore vs escape
-     // This attempts to decode mime reguardless if it is \r\n (correct newline) or \n (incorrect)
-     // Both Base64 and QuotedPrintable is used for decode. UUEncode is not available right now
-     // and is a standardized encoding format.
-    run(input, args) {
-        // TODO Later: no uuencode function. See if we can fix this.
-        // TODO: content-type can be omitted and would mean us-ascii charset and text/plain.
-        if (!input) {
-            return [];
-        }
-        let emlObj = ParseIMF.splitParse(input);
-        if (!emlObj.body) { throw new OperationError("No body was found");}
-        if (args[0]) {
-            emlObj.rawHeader = ParseIMF.replaceDecodeWord(emlObj.rawHeader);
-        }
-        let retval = [new File([emlObj.rawHeader], "Header", {type: "text/plain"})];
-        let retfiles = ParseIMF.walkMime(emlObj, input.indexOf("\r") >= 0);
-        retfiles.forEach(function(fileObj){
-            let file = null;
-            if (fileObj.name !== null) {
-                file = new File([fileObj.data], fileObj.name, {type: fileObj.type});
-            } else {
-                let name = null;
-                if ("subject" in emlObj.header) {
-                    name = emlObj.header["subject"][0].concat(".");
-                } else {
-                    name = "Undefined.";
-                }
-                if (fileObj.type in BODY_FILE_TYPE) {
-                    name = name.concat(BODY_FILE_TYPE[fileObj.type]);
-                } else {
-                    name = name.concat("bin");
-                }
-                file = new File([fileObj.data], name, {type: fileObj.type});
-            }
-            retval.push(file);
-        });
-        return retval;
-    } */
-
    /**
     * Displays the files in HTML for web apps.
     *
@ -114,213 +49,6 @@ class ParseIMF extends Operation {
    async present(files) {
        return await Utils.displayFilesAsHTML(files);
    }
-
-    /**
-     * Walks a MIME document and returns an array of Mime data and header objects.
-     *
-     * @param {string} input
-     * @param {object} header
-     * @returns {object[]}
-     *
-    static walkMime(parentObj, rn) {
-        let new_line_length = rn ? 2 : 1;
-        let contType = null, fileName = null, charEnc = null, contDispoObj = null;
-        if (parentObj.header.hasOwnProperty("content-type")) {
-            let contTypeObj = ParseIMF.decodeComplexField(parentObj.header["content-type"][0]);
-            if (parentObj.header.hasOwnProperty("content-disposition")) {
-                contDispoObj = ParseIMF.decodeComplexField(parentObj.header["content-disposition"][0])
-                if (contDispoObj != null && contDispoObj.hasOwnProperty("filename")) {
-                    fileName = contDispoObj.filename;
-                }
-            }
-            if (contTypeObj != null) {
-                if (contTypeObj.hasOwnProperty("value")) {
-                    contType = contTypeObj.value[0];
-                }
-                if (contTypeObj.hasOwnProperty("charset")) {
-                    charEnc = contTypeObj.charset;
-                }
-                if (fileName == null && contTypeObj.hasOwnProperty("name")) {
-                    fileName = contTypeObj.name;
-                }
-            }
-            if (contType.startsWith("multipart/")) {
-                let content_boundary = null;
-                let output_sections = [];
-                if (contTypeObj.hasOwnProperty("boundary")) {
-                    content_boundary = contTypeObj.boundary;
-                }
-                let mime_parts = ParseIMF.splitMultipart(parentObj.body, content_boundary, new_line_length);
-                mime_parts.forEach(function(mime_part){
-                    let mimeObj = ParseIMF.splitParse(mime_part);
-                    if (!mimeObj.body) {
-                        return [];
-                    }
-                    let parts = ParseIMF.walkMime(mimeObj, rn);
-                    parts.forEach(function(part){
-                        output_sections.push(part);
-                    });
-                });
-                return output_sections;
-            }
-            if (parentObj.header.hasOwnProperty("content-transfer-encoding")) {
-                let contEncObj = ParseIMF.decodeComplexField(parentObj.header["content-transfer-encoding"][0]);
-                let contTran = null;
-                if (contEncObj != null && contEncObj.hasOwnProperty("value")) {
-                        contTran = contEncObj.value[0];
-                }
-                if (contTran != null) {
-                    parentObj.body = ParseIMF.decodeMimeData(parentObj.body, charEnc, contTran);
-                }
-            }
-            return [{type: contType, data: parentObj.body, name: fileName}];
-        }
-        throw new OperationError("Invalid Mime section");
 }

-    /**
-     * Takes a string and decodes quoted words inside them
-     * These take the form of =?utf-8?Q?Hello?=
-     *
-     * @param {string} input
-     * @returns {string}
-     *
-    static replaceDecodeWord(input) {
-        return input.replace(/=\?([^?]+)\?(Q|B)\?([^?]+)\?=/g, function (a, charEnc, contEnc, input) {
-            contEnc = (contEnc === "B") ? "base64" : "quoted-printable";
-            if (contEnc === "quoted-printable") {
-                input = input.replace(/_/g, " ");
-            }
-            return ParseIMF.decodeMimeData(input, charEnc, contEnc);
-        });
-    }
-
-
-    /**
-     * Breaks the header from the body and parses the header. The returns an
-     * object or null. The object contains the raw header, decoded body, and
-     * parsed header object.
-     *
-     * @param {string} input
-     * @returns {object}
-     *
-    static splitParse(input) {
-        const emlRegex = /(?:\r?\n){2}/g;
-        let matchobj = emlRegex.exec(input);
-        if (matchobj) {
-            let splitEmail = [input.substring(0,matchobj.index), input.substring(emlRegex.lastIndex)];
-            const sectionRegex = /([A-Za-z-]+):\s+([\x00-\xff]+?)(?=$|\r?\n\S)/g;
-            let headerObj = {}, section;
-            while ((section = sectionRegex.exec(splitEmail[0]))) {
-                let fieldName = section[1].toLowerCase();
-                let fieldValue = ParseIMF.replaceDecodeWord(section[2].replace(/\n|\r/g, " "));
-                if (fieldName in headerObj) {
-                    headerObj[fieldName].push(fieldValue);
-                } else {
-                    headerObj[fieldName] = [fieldValue];
-                }
-            }
-            return {rawHeader:splitEmail[0], body: splitEmail[1],  header: headerObj};
-        }
-        return null;
-    }
-
-    /**
-     * Return decoded MIME data given the character encoding and content encoding.
-     *
-     * @param {string} input
-     * @param {string} charEnc
-     * @param {string} contEnc
-     * @returns {string}
-     *
-    static decodeMimeData(input, charEnc, contEnc) {
-        switch (contEnc) {
-            case "base64":
-                input = fromBase64(input);
-                break;
-            case "quoted-printable":
-                input = Utils.byteArrayToUtf8(decodeQuotedPrintable(input));
-                break;
-        }
-        if (charEnc && MIME_FORMAT.hasOwnProperty(charEnc.toLowerCase())) {
-            input = cptable.utils.decode(MIME_FORMAT[charEnc.toLowerCase()], input);
-        }
-        return input;
-    }
-
-    /**
-     * Parses a complex header field and returns an object that contains
-     * normalized keys with corresponding values along with single values under
-     * a value array.
-     *
-     * @param {string} field
-     * @returns {object}
-     *
-    static decodeComplexField(field) {
-        let fieldSplit = field.split(/;\s+/g);
-        let retVal = {};
-        fieldSplit.forEach(function(item){
-            if (item.indexOf("=") >= 0) {
-                let eq = item.indexOf("=");
-                let kv = null;
-                if (item.length > eq) {
-                    kv = [item.substring(0, eq), item.substring(eq + 1).trim()];
-                } else {
-                    throw OperationError("Not a valid header entry");
-                }
-                if ((kv[1].startsWith("\'") && kv[1].endsWith("\'"))
-                    || (kv[1].startsWith("\"") && kv[1].endsWith("\""))) {
-                    kv[1] = (/(['"])(.+)\1/.exec(kv[1]))[2];
-                }
-                retVal[kv[0].toLowerCase()] = kv[1];
-            } else {
-                item = item.trim().toLowerCase();
-                if (retVal.hasOwnProperty("value")) {
-                    retVal.value.push(item);
-                } else {
-                    retVal.value = [item];
-                }
-            }
-        });
-        return retVal;
-    }
-
-    /**
-     * Splits a Mime document by the current boundaries and attempts to account
-     * for the current new line size which can be either the standard \r\n or \n.
-     *
-     * @param {string} input
-     * @param {string} boundary
-     * @param {string} new_line_length
-     * @return {string[]}
-     *
-    static splitMultipart(input, boundary, new_line_length) {
-        let output = [];
-        let newline = new_line_length === 2 ? "\r\n" : "\n";
-        const boundary_str = "--".concat(boundary, newline);
-        let last = input.indexOf("--".concat(boundary, "--", newline)) - new_line_length;
-        if (last < 0) {
-            last = input.indexOf("--".concat(boundary, "--")) - new_line_length;
-        }
-        let start = 0;
-        while(true) {
-            let start = input.indexOf(boundary_str, start);
-            if (start >= 0) {
-                start = start + boundary_str.length;
-            } else {
-                break;
-            }
-            let end = input.indexOf(boundary_str, start) - new_line_length;
-            if (end > start) {
-                output.push(input.substring(start, end));
-            } else {
-                output.push(input.substring(start, last));
-                break;
-            }
-            start = end;
-        }
-        return output;
-    } */
-}
-
-export default ParseIMF
+export default ParseIMF;