diff --git a/src/core/config/Categories.json b/src/core/config/Categories.json index 32d64b08..4fc4c578 100755 --- a/src/core/config/Categories.json +++ b/src/core/config/Categories.json @@ -55,7 +55,8 @@ "To Braille", "From Braille", "Parse TLV", - "Parse Internet Message Format" + "Parse Internet Message Format", + "Decode Mime Encoded Words" ] }, { diff --git a/src/core/operations/DecodeMimeEncodedWords.mjs b/src/core/operations/DecodeMimeEncodedWords.mjs new file mode 100644 index 00000000..988d3a4f --- /dev/null +++ b/src/core/operations/DecodeMimeEncodedWords.mjs @@ -0,0 +1,36 @@ +/** + * @author bwhitn [brian.m.whitney@outlook.com] + * @copyright Crown Copyright 2016 + * @license Apache-2.0 + */ + +import Operation from "../Operation"; +import OperationError from "../errors/OperationError"; +import Mime from "../lib/Mime"; +import Utils from "../Utils"; + +class DecodeMimeEncodedWords extends Operation { + + /** + * DecodeMimeEncodedWords constructor + */ + constructor() { + super(); + this.name = "Decode Mime Encoded Words"; + this.module = "Default"; + this.description = ["Parser an IMF formatted messages following RFC5322.", + "

", + "Parses an IMF formated message. These often have the file extention ".eml"e; and contain the email headers and body. The output will be a file list of the headers and mime parts.", + ].join("\n"); + this.infoURL = "https://tools.ietf.org/html/rfc2047"; + this.inputType = "string"; + this.outputType = "string"; + this.args = []; + } + + run(input, args) { + return Mime.replaceEncodedWord(input); + } +} + +export default DecodeMimeEncodedWords; diff --git a/src/core/operations/ParseIMF.mjs b/src/core/operations/ParseIMF.mjs index 8bd9a87f..0bfb6c1e 100644 --- a/src/core/operations/ParseIMF.mjs +++ b/src/core/operations/ParseIMF.mjs @@ -6,37 +6,21 @@ import Operation from "../Operation"; import OperationError from "../errors/OperationError"; -import cptable from "../vendor/js-codepage/cptable.js"; -import {fromBase64} from "../lib/Base64"; -import {decodeQuotedPrintable} from "../lib/QuotedPrintable"; -import {MIME_FORMAT} from "../lib/ChrEnc"; import Mime from "../lib/Mime"; import Utils from "../Utils"; -/** - * - * - * @constant - * @default - * -const BODY_FILE_TYPE = { - "text/plain": "txt", - "text/html": "htm", - "application/rtf": "rtf", -} */ - class ParseIMF extends Operation { /** - * Internet MessageFormat constructor + * Internet Message Format constructor */ constructor() { super(); this.name = "Parse Internet Message Format"; this.module = "Default"; - this.description = ["Parser an IMF formatted messages following RFC5322.", + this.description = ["Parse an IMF formatted messages following RFC5322.", "

", - "Parses an IMF formated message. These often have the file extention ".eml"e; and contain the email headers and body. The output will be a file list of the headers and mime parts.", + "Parses an IMF formated message. These often have the file extention ".eml"e; and contain the email headers and body. The output will be a file list of the root header and decoded mime parts.", ].join("\n"); this.infoURL = "https://tools.ietf.org/html/rfc5322"; this.inputType = "string"; @@ -44,7 +28,7 @@ class ParseIMF extends Operation { this.presentType = "html"; this.args = [ { - "name": "Decode Quoted Words", + "name": "Decode Encoded-Words", "type": "boolean", "value": false } @@ -56,55 +40,6 @@ class ParseIMF extends Operation { return mimeObj.decodeMime(args[0]); } - /** - * Basic Email Parser that displays the header and mime sections as files. - * Args 0 boolean decode quoted words - * - * @param {string} input - * @param {Object[]} args - * @returns {File[]} - * - // NOTE: Liberties taken include: - // No checks are made to verify quoted words are valid encodings e.g. underscore vs escape - // This attempts to decode mime reguardless if it is \r\n (correct newline) or \n (incorrect) - // Both Base64 and QuotedPrintable is used for decode. UUEncode is not available right now - // and is a standardized encoding format. - run(input, args) { - // TODO Later: no uuencode function. See if we can fix this. - // TODO: content-type can be omitted and would mean us-ascii charset and text/plain. - if (!input) { - return []; - } - let emlObj = ParseIMF.splitParse(input); - if (!emlObj.body) { throw new OperationError("No body was found");} - if (args[0]) { - emlObj.rawHeader = ParseIMF.replaceDecodeWord(emlObj.rawHeader); - } - let retval = [new File([emlObj.rawHeader], "Header", {type: "text/plain"})]; - let retfiles = ParseIMF.walkMime(emlObj, input.indexOf("\r") >= 0); - retfiles.forEach(function(fileObj){ - let file = null; - if (fileObj.name !== null) { - file = new File([fileObj.data], fileObj.name, {type: fileObj.type}); - } else { - let name = null; - if ("subject" in emlObj.header) { - name = emlObj.header["subject"][0].concat("."); - } else { - name = "Undefined."; - } - if (fileObj.type in BODY_FILE_TYPE) { - name = name.concat(BODY_FILE_TYPE[fileObj.type]); - } else { - name = name.concat("bin"); - } - file = new File([fileObj.data], name, {type: fileObj.type}); - } - retval.push(file); - }); - return retval; - } */ - /** * Displays the files in HTML for web apps. * @@ -114,213 +49,6 @@ class ParseIMF extends Operation { async present(files) { return await Utils.displayFilesAsHTML(files); } - - /** - * Walks a MIME document and returns an array of Mime data and header objects. - * - * @param {string} input - * @param {object} header - * @returns {object[]} - * - static walkMime(parentObj, rn) { - let new_line_length = rn ? 2 : 1; - let contType = null, fileName = null, charEnc = null, contDispoObj = null; - if (parentObj.header.hasOwnProperty("content-type")) { - let contTypeObj = ParseIMF.decodeComplexField(parentObj.header["content-type"][0]); - if (parentObj.header.hasOwnProperty("content-disposition")) { - contDispoObj = ParseIMF.decodeComplexField(parentObj.header["content-disposition"][0]) - if (contDispoObj != null && contDispoObj.hasOwnProperty("filename")) { - fileName = contDispoObj.filename; - } - } - if (contTypeObj != null) { - if (contTypeObj.hasOwnProperty("value")) { - contType = contTypeObj.value[0]; - } - if (contTypeObj.hasOwnProperty("charset")) { - charEnc = contTypeObj.charset; - } - if (fileName == null && contTypeObj.hasOwnProperty("name")) { - fileName = contTypeObj.name; - } - } - if (contType.startsWith("multipart/")) { - let content_boundary = null; - let output_sections = []; - if (contTypeObj.hasOwnProperty("boundary")) { - content_boundary = contTypeObj.boundary; - } - let mime_parts = ParseIMF.splitMultipart(parentObj.body, content_boundary, new_line_length); - mime_parts.forEach(function(mime_part){ - let mimeObj = ParseIMF.splitParse(mime_part); - if (!mimeObj.body) { - return []; - } - let parts = ParseIMF.walkMime(mimeObj, rn); - parts.forEach(function(part){ - output_sections.push(part); - }); - }); - return output_sections; - } - if (parentObj.header.hasOwnProperty("content-transfer-encoding")) { - let contEncObj = ParseIMF.decodeComplexField(parentObj.header["content-transfer-encoding"][0]); - let contTran = null; - if (contEncObj != null && contEncObj.hasOwnProperty("value")) { - contTran = contEncObj.value[0]; - } - if (contTran != null) { - parentObj.body = ParseIMF.decodeMimeData(parentObj.body, charEnc, contTran); - } - } - return [{type: contType, data: parentObj.body, name: fileName}]; - } - throw new OperationError("Invalid Mime section"); - } - - /** - * Takes a string and decodes quoted words inside them - * These take the form of =?utf-8?Q?Hello?= - * - * @param {string} input - * @returns {string} - * - static replaceDecodeWord(input) { - return input.replace(/=\?([^?]+)\?(Q|B)\?([^?]+)\?=/g, function (a, charEnc, contEnc, input) { - contEnc = (contEnc === "B") ? "base64" : "quoted-printable"; - if (contEnc === "quoted-printable") { - input = input.replace(/_/g, " "); - } - return ParseIMF.decodeMimeData(input, charEnc, contEnc); - }); - } - - - /** - * Breaks the header from the body and parses the header. The returns an - * object or null. The object contains the raw header, decoded body, and - * parsed header object. - * - * @param {string} input - * @returns {object} - * - static splitParse(input) { - const emlRegex = /(?:\r?\n){2}/g; - let matchobj = emlRegex.exec(input); - if (matchobj) { - let splitEmail = [input.substring(0,matchobj.index), input.substring(emlRegex.lastIndex)]; - const sectionRegex = /([A-Za-z-]+):\s+([\x00-\xff]+?)(?=$|\r?\n\S)/g; - let headerObj = {}, section; - while ((section = sectionRegex.exec(splitEmail[0]))) { - let fieldName = section[1].toLowerCase(); - let fieldValue = ParseIMF.replaceDecodeWord(section[2].replace(/\n|\r/g, " ")); - if (fieldName in headerObj) { - headerObj[fieldName].push(fieldValue); - } else { - headerObj[fieldName] = [fieldValue]; - } - } - return {rawHeader:splitEmail[0], body: splitEmail[1], header: headerObj}; - } - return null; - } - - /** - * Return decoded MIME data given the character encoding and content encoding. - * - * @param {string} input - * @param {string} charEnc - * @param {string} contEnc - * @returns {string} - * - static decodeMimeData(input, charEnc, contEnc) { - switch (contEnc) { - case "base64": - input = fromBase64(input); - break; - case "quoted-printable": - input = Utils.byteArrayToUtf8(decodeQuotedPrintable(input)); - break; - } - if (charEnc && MIME_FORMAT.hasOwnProperty(charEnc.toLowerCase())) { - input = cptable.utils.decode(MIME_FORMAT[charEnc.toLowerCase()], input); - } - return input; - } - - /** - * Parses a complex header field and returns an object that contains - * normalized keys with corresponding values along with single values under - * a value array. - * - * @param {string} field - * @returns {object} - * - static decodeComplexField(field) { - let fieldSplit = field.split(/;\s+/g); - let retVal = {}; - fieldSplit.forEach(function(item){ - if (item.indexOf("=") >= 0) { - let eq = item.indexOf("="); - let kv = null; - if (item.length > eq) { - kv = [item.substring(0, eq), item.substring(eq + 1).trim()]; - } else { - throw OperationError("Not a valid header entry"); - } - if ((kv[1].startsWith("\'") && kv[1].endsWith("\'")) - || (kv[1].startsWith("\"") && kv[1].endsWith("\""))) { - kv[1] = (/(['"])(.+)\1/.exec(kv[1]))[2]; - } - retVal[kv[0].toLowerCase()] = kv[1]; - } else { - item = item.trim().toLowerCase(); - if (retVal.hasOwnProperty("value")) { - retVal.value.push(item); - } else { - retVal.value = [item]; - } - } - }); - return retVal; - } - - /** - * Splits a Mime document by the current boundaries and attempts to account - * for the current new line size which can be either the standard \r\n or \n. - * - * @param {string} input - * @param {string} boundary - * @param {string} new_line_length - * @return {string[]} - * - static splitMultipart(input, boundary, new_line_length) { - let output = []; - let newline = new_line_length === 2 ? "\r\n" : "\n"; - const boundary_str = "--".concat(boundary, newline); - let last = input.indexOf("--".concat(boundary, "--", newline)) - new_line_length; - if (last < 0) { - last = input.indexOf("--".concat(boundary, "--")) - new_line_length; - } - let start = 0; - while(true) { - let start = input.indexOf(boundary_str, start); - if (start >= 0) { - start = start + boundary_str.length; - } else { - break; - } - let end = input.indexOf(boundary_str, start) - new_line_length; - if (end > start) { - output.push(input.substring(start, end)); - } else { - output.push(input.substring(start, last)); - break; - } - start = end; - } - return output; - } */ } -export default ParseIMF +export default ParseIMF;