diff --git a/src/core/lib/Mime.mjs b/src/core/lib/Mime.mjs index e69de29b..e813155f 100644 --- a/src/core/lib/Mime.mjs +++ b/src/core/lib/Mime.mjs @@ -0,0 +1,292 @@ +/** + * @author bwhitn [brian.m.whitney@outlook.com] + * @copyright Crown Copyright 2016 + * @license Apache-2.0 + */ + +import OperationError from "../errors/OperationError"; +import cptable from "../vendor/js-codepage/cptable.js"; +import {fromBase64} from "../lib/Base64"; +import {decodeQuotedPrintable} from "../lib/QuotedPrintable"; +import {MIME_FORMAT} from "../lib/ChrEnc"; +import Utils from "../Utils"; + +/** + * + * + * @constant + * @default + */ +const BODY_FILE_TYPE = { + "text/plain": "txt", + "text/html": "htm", + "application/rtf": "rtf", +} + +class Mime { + /** + * Internet MessageFormat constructor + */ + constructor(input) { + this.input = input; + this.rn = input.indexOf("\r") >= 0; + } + + /** + * Basic Email Parser that displays the header and mime sections as files. + * Args 0 boolean decode quoted words + * + * @param {string} input + * @param {boolean} decodeWords + * @returns {File[]} + */ + // NOTE: Liberties taken include: + // No checks are made to verify quoted words are valid encodings e.g. underscore vs escape + // This attempts to decode mime reguardless if it is \r\n (correct newline) or \n (incorrect) + // Both Base64 and QuotedPrintable is used for decode. UUEncode is not available right now + // and is a standardized encoding format. + decodeMime(decodeWords) { + // TODO Later: no uuencode function. See if we can fix this. + // TODO: content-type can be omitted and would mean us-ascii charset and text/plain. + if (!this.input) { + return []; + } + let emlObj = Mime._splitParse(this.input); + if (!emlObj.body) { throw new OperationError("No body was found");} + if (decodeWords) { + emlObj.rawHeader = Mime.replaceEncodedWord(emlObj.rawHeader); + } + let retval = [new File([emlObj.rawHeader], "Header", {type: "text/plain"})]; + let retfiles = this._walkMime(emlObj); + retfiles.forEach(function(fileObj){ + let file = null; + if (fileObj.name !== null) { + file = new File([fileObj.data], fileObj.name, {type: fileObj.type}); + } else { + let name = null; + if ("subject" in emlObj.header) { + name = emlObj.header["subject"][0].concat("."); + } else { + name = "Undefined."; + } + if (fileObj.type in BODY_FILE_TYPE) { + name = name.concat(BODY_FILE_TYPE[fileObj.type]); + } else { + name = name.concat("bin"); + } + file = new File([fileObj.data], name, {type: fileObj.type}); + } + retval.push(file); + }); + return retval; + } + + /** + * Walks a MIME document and returns an array of Mime data and header objects. + * + * @param {string} input + * @param {object} header + * @returns {object[]} + */ + _walkMime(parentObj) { + let new_line_length = this.rn ? 2 : 1; + let contType = null, fileName = null, charEnc = null, contDispoObj = null; + if (parentObj.header.hasOwnProperty("content-type")) { + let contTypeObj = Mime._decodeComplexField(parentObj.header["content-type"][0]); + if (parentObj.header.hasOwnProperty("content-disposition")) { + contDispoObj = Mime._decodeComplexField(parentObj.header["content-disposition"][0]) + if (contDispoObj != null && contDispoObj.hasOwnProperty("filename")) { + fileName = contDispoObj.filename; + } + } + if (contTypeObj != null) { + if (contTypeObj.hasOwnProperty("value")) { + contType = contTypeObj.value[0]; + } + if (contTypeObj.hasOwnProperty("charset")) { + charEnc = contTypeObj.charset; + } + if (fileName == null && contTypeObj.hasOwnProperty("name")) { + fileName = contTypeObj.name; + } + } + if (contType.startsWith("multipart/")) { + let content_boundary = null; + let output_sections = []; + if (contTypeObj.hasOwnProperty("boundary")) { + content_boundary = contTypeObj.boundary; + } + let mime_parts = Mime._splitMultipart(parentObj.body, content_boundary, new_line_length); + mime_parts.forEach(function(mime_part){ + let mimeObj = Mime._splitParse(mime_part); + if (!mimeObj.body) { + return []; + } + let parts = this._walkMime(mimeObj); + parts.forEach(function(part){ + output_sections.push(part); + }, this); + }, this); + return output_sections; + } + if (parentObj.header.hasOwnProperty("content-transfer-encoding")) { + let contEncObj = Mime._decodeComplexField(parentObj.header["content-transfer-encoding"][0]); + let contTran = null; + if (contEncObj != null && contEncObj.hasOwnProperty("value")) { + contTran = contEncObj.value[0]; + } + if (contTran != null) { + parentObj.body = Mime._decodeMimeData(parentObj.body, charEnc, contTran); + } + } + return [{type: contType, data: parentObj.body, name: fileName}]; + } + throw new OperationError("Invalid Mime section"); + } + + /** + * Takes a string and decodes quoted words inside them + * These take the form of =?utf-8?Q?Hello?= + * + * @param {string} input + * @returns {string} + */ + static replaceEncodedWord(input) { + return input.replace(/=\?([^?]+)\?(Q|B)\?([^?]+)\?=/g, function (a, charEnc, contEnc, input) { + contEnc = (contEnc === "B") ? "base64" : "quoted-printable"; + if (contEnc === "quoted-printable") { + input = input.replace(/_/g, " "); + } + return Mime._decodeMimeData(input, charEnc, contEnc); + }); + } + + + /** + * Breaks the header from the body and parses the header. The returns an + * object or null. The object contains the raw header, decoded body, and + * parsed header object. + * + * @param {string} input + * @returns {object} + */ + static _splitParse(input) { + const emlRegex = /(?:\r?\n){2}/g; + let matchobj = emlRegex.exec(input); + if (matchobj) { + let splitEmail = [input.substring(0,matchobj.index), input.substring(emlRegex.lastIndex)]; + const sectionRegex = /([A-Za-z-]+):\s+([\x00-\xff]+?)(?=$|\r?\n\S)/g; + let headerObj = {}, section; + while ((section = sectionRegex.exec(splitEmail[0]))) { + let fieldName = section[1].toLowerCase(); + let fieldValue = Mime.replaceEncodedWord(section[2].replace(/\n|\r/g, " ")); + if (fieldName in headerObj) { + headerObj[fieldName].push(fieldValue); + } else { + headerObj[fieldName] = [fieldValue]; + } + } + return {rawHeader:splitEmail[0], body: splitEmail[1], header: headerObj}; + } + return null; + } + + /** + * Return decoded MIME data given the character encoding and content encoding. + * + * @param {string} input + * @param {string} charEnc + * @param {string} contEnc + * @returns {string} + */ + static _decodeMimeData(input, charEnc, contEnc) { + switch (contEnc) { + case "base64": + input = fromBase64(input); + break; + case "quoted-printable": + input = Utils.byteArrayToUtf8(decodeQuotedPrintable(input)); + break; + } + if (charEnc && MIME_FORMAT.hasOwnProperty(charEnc.toLowerCase())) { + input = cptable.utils.decode(MIME_FORMAT[charEnc.toLowerCase()], input); + } + return input; + } + + /** + * Parses a complex header field and returns an object that contains + * normalized keys with corresponding values along with single values under + * a value array. + * + * @param {string} field + * @returns {object} + */ + static _decodeComplexField(field) { + let fieldSplit = field.split(/;\s+/g); + let retVal = {}; + fieldSplit.forEach(function(item){ + if (item.indexOf("=") >= 0) { + let eq = item.indexOf("="); + let kv = null; + if (item.length > eq) { + kv = [item.substring(0, eq), item.substring(eq + 1).trim()]; + } else { + throw OperationError("Not a valid header entry"); + } + if ((kv[1].startsWith("\'") && kv[1].endsWith("\'")) + || (kv[1].startsWith("\"") && kv[1].endsWith("\""))) { + kv[1] = (/(['"])(.+)\1/.exec(kv[1]))[2]; + } + retVal[kv[0].toLowerCase()] = kv[1]; + } else { + item = item.trim().toLowerCase(); + if (retVal.hasOwnProperty("value")) { + retVal.value.push(item); + } else { + retVal.value = [item]; + } + } + }); + return retVal; + } + + /** + * Splits a Mime document by the current boundaries and attempts to account + * for the current new line size which can be either the standard \r\n or \n. + * + * @param {string} input + * @param {string} boundary + * @param {string} new_line_length + * @return {string[]} + */ + static _splitMultipart(input, boundary, new_line_length) { + let output = []; + let newline = new_line_length === 2 ? "\r\n" : "\n"; + const boundary_str = "--".concat(boundary, newline); + let last = input.indexOf("--".concat(boundary, "--", newline)) - new_line_length; + if (last < 0) { + last = input.indexOf("--".concat(boundary, "--")) - new_line_length; + } + let start = 0; + while(true) { + let start = input.indexOf(boundary_str, start); + if (start >= 0) { + start = start + boundary_str.length; + } else { + break; + } + let end = input.indexOf(boundary_str, start) - new_line_length; + if (end > start) { + output.push(input.substring(start, end)); + } else { + output.push(input.substring(start, last)); + break; + } + start = end; + } + return output; + } +} + +export default Mime; diff --git a/src/core/operations/ParseIMF.mjs b/src/core/operations/ParseIMF.mjs index d45910b1..8bd9a87f 100644 --- a/src/core/operations/ParseIMF.mjs +++ b/src/core/operations/ParseIMF.mjs @@ -10,21 +10,20 @@ import cptable from "../vendor/js-codepage/cptable.js"; import {fromBase64} from "../lib/Base64"; import {decodeQuotedPrintable} from "../lib/QuotedPrintable"; import {MIME_FORMAT} from "../lib/ChrEnc"; +import Mime from "../lib/Mime"; import Utils from "../Utils"; /** - * Return the conetent encoding for a mime section from a header object. - * CONTENT_TYPE returns the content type of a mime header from a header object. - * Returns the filename from a mime header object. - * Returns the boundary value for the mime section from a header object. + * + * * @constant * @default - */ + * const BODY_FILE_TYPE = { "text/plain": "txt", "text/html": "htm", "application/rtf": "rtf", -} +} */ class ParseIMF extends Operation { @@ -52,6 +51,11 @@ class ParseIMF extends Operation { ]; } + run(input, args) { + let mimeObj = new Mime(input); + return mimeObj.decodeMime(args[0]); + } + /** * Basic Email Parser that displays the header and mime sections as files. * Args 0 boolean decode quoted words @@ -59,7 +63,7 @@ class ParseIMF extends Operation { * @param {string} input * @param {Object[]} args * @returns {File[]} - */ + * // NOTE: Liberties taken include: // No checks are made to verify quoted words are valid encodings e.g. underscore vs escape // This attempts to decode mime reguardless if it is \r\n (correct newline) or \n (incorrect) @@ -99,7 +103,7 @@ class ParseIMF extends Operation { retval.push(file); }); return retval; - } + } */ /** * Displays the files in HTML for web apps. @@ -117,7 +121,7 @@ class ParseIMF extends Operation { * @param {string} input * @param {object} header * @returns {object[]} - */ + * static walkMime(parentObj, rn) { let new_line_length = rn ? 2 : 1; let contType = null, fileName = null, charEnc = null, contDispoObj = null; @@ -180,7 +184,7 @@ class ParseIMF extends Operation { * * @param {string} input * @returns {string} - */ + * static replaceDecodeWord(input) { return input.replace(/=\?([^?]+)\?(Q|B)\?([^?]+)\?=/g, function (a, charEnc, contEnc, input) { contEnc = (contEnc === "B") ? "base64" : "quoted-printable"; @@ -199,7 +203,7 @@ class ParseIMF extends Operation { * * @param {string} input * @returns {object} - */ + * static splitParse(input) { const emlRegex = /(?:\r?\n){2}/g; let matchobj = emlRegex.exec(input); @@ -228,7 +232,7 @@ class ParseIMF extends Operation { * @param {string} charEnc * @param {string} contEnc * @returns {string} - */ + * static decodeMimeData(input, charEnc, contEnc) { switch (contEnc) { case "base64": @@ -237,10 +241,6 @@ class ParseIMF extends Operation { case "quoted-printable": input = Utils.byteArrayToUtf8(decodeQuotedPrintable(input)); break; - case "7bit": - case "8bit": - default: - break; } if (charEnc && MIME_FORMAT.hasOwnProperty(charEnc.toLowerCase())) { input = cptable.utils.decode(MIME_FORMAT[charEnc.toLowerCase()], input); @@ -249,12 +249,13 @@ class ParseIMF extends Operation { } /** - * Parse a complex header field and return an object that contains normalized - * keys with corresponding values and single values under a value array. + * Parses a complex header field and returns an object that contains + * normalized keys with corresponding values along with single values under + * a value array. * * @param {string} field * @returns {object} - */ + * static decodeComplexField(field) { let fieldSplit = field.split(/;\s+/g); let retVal = {}; @@ -285,14 +286,14 @@ class ParseIMF extends Operation { } /** - * Splits a Mime document by the current boundaries and try to account for - * the current new line size which can be either the standard \r\n or \n. + * Splits a Mime document by the current boundaries and attempts to account + * for the current new line size which can be either the standard \r\n or \n. * * @param {string} input * @param {string} boundary * @param {string} new_line_length * @return {string[]} - */ + * static splitMultipart(input, boundary, new_line_length) { let output = []; let newline = new_line_length === 2 ? "\r\n" : "\n"; @@ -319,7 +320,7 @@ class ParseIMF extends Operation { start = end; } return output; - } + } */ } export default ParseIMF