adding decode mime encoded words and removing duplicate code

This commit is contained in:
bwhitn 2018-11-27 22:46:08 -05:00
parent fa5d2b130f
commit e2ee627d09
3 changed files with 43 additions and 278 deletions

View file

@ -55,7 +55,8 @@
"To Braille", "To Braille",
"From Braille", "From Braille",
"Parse TLV", "Parse TLV",
"Parse Internet Message Format" "Parse Internet Message Format",
"Decode Mime Encoded Words"
] ]
}, },
{ {

View file

@ -0,0 +1,36 @@
/**
* @author bwhitn [brian.m.whitney@outlook.com]
* @copyright Crown Copyright 2016
* @license Apache-2.0
*/
import Operation from "../Operation";
import OperationError from "../errors/OperationError";
import Mime from "../lib/Mime";
import Utils from "../Utils";
class DecodeMimeEncodedWords extends Operation {
/**
* DecodeMimeEncodedWords constructor
*/
constructor() {
super();
this.name = "Decode Mime Encoded Words";
this.module = "Default";
this.description = ["Parser an IMF formatted messages following RFC5322.",
"<br><br>",
"Parses an IMF formated message. These often have the file extention &quot;.eml&quote; and contain the email headers and body. The output will be a file list of the headers and mime parts.",
].join("\n");
this.infoURL = "https://tools.ietf.org/html/rfc2047";
this.inputType = "string";
this.outputType = "string";
this.args = [];
}
run(input, args) {
return Mime.replaceEncodedWord(input);
}
}
export default DecodeMimeEncodedWords;

View file

@ -6,37 +6,21 @@
import Operation from "../Operation"; import Operation from "../Operation";
import OperationError from "../errors/OperationError"; import OperationError from "../errors/OperationError";
import cptable from "../vendor/js-codepage/cptable.js";
import {fromBase64} from "../lib/Base64";
import {decodeQuotedPrintable} from "../lib/QuotedPrintable";
import {MIME_FORMAT} from "../lib/ChrEnc";
import Mime from "../lib/Mime"; import Mime from "../lib/Mime";
import Utils from "../Utils"; import Utils from "../Utils";
/**
*
*
* @constant
* @default
*
const BODY_FILE_TYPE = {
"text/plain": "txt",
"text/html": "htm",
"application/rtf": "rtf",
} */
class ParseIMF extends Operation { class ParseIMF extends Operation {
/** /**
* Internet MessageFormat constructor * Internet Message Format constructor
*/ */
constructor() { constructor() {
super(); super();
this.name = "Parse Internet Message Format"; this.name = "Parse Internet Message Format";
this.module = "Default"; this.module = "Default";
this.description = ["Parser an IMF formatted messages following RFC5322.", this.description = ["Parse an IMF formatted messages following RFC5322.",
"<br><br>", "<br><br>",
"Parses an IMF formated message. These often have the file extention &quot;.eml&quote; and contain the email headers and body. The output will be a file list of the headers and mime parts.", "Parses an IMF formated message. These often have the file extention &quot;.eml&quote; and contain the email headers and body. The output will be a file list of the root header and decoded mime parts.",
].join("\n"); ].join("\n");
this.infoURL = "https://tools.ietf.org/html/rfc5322"; this.infoURL = "https://tools.ietf.org/html/rfc5322";
this.inputType = "string"; this.inputType = "string";
@ -44,7 +28,7 @@ class ParseIMF extends Operation {
this.presentType = "html"; this.presentType = "html";
this.args = [ this.args = [
{ {
"name": "Decode Quoted Words", "name": "Decode Encoded-Words",
"type": "boolean", "type": "boolean",
"value": false "value": false
} }
@ -56,55 +40,6 @@ class ParseIMF extends Operation {
return mimeObj.decodeMime(args[0]); return mimeObj.decodeMime(args[0]);
} }
/**
* Basic Email Parser that displays the header and mime sections as files.
* Args 0 boolean decode quoted words
*
* @param {string} input
* @param {Object[]} args
* @returns {File[]}
*
// NOTE: Liberties taken include:
// No checks are made to verify quoted words are valid encodings e.g. underscore vs escape
// This attempts to decode mime reguardless if it is \r\n (correct newline) or \n (incorrect)
// Both Base64 and QuotedPrintable is used for decode. UUEncode is not available right now
// and is a standardized encoding format.
run(input, args) {
// TODO Later: no uuencode function. See if we can fix this.
// TODO: content-type can be omitted and would mean us-ascii charset and text/plain.
if (!input) {
return [];
}
let emlObj = ParseIMF.splitParse(input);
if (!emlObj.body) { throw new OperationError("No body was found");}
if (args[0]) {
emlObj.rawHeader = ParseIMF.replaceDecodeWord(emlObj.rawHeader);
}
let retval = [new File([emlObj.rawHeader], "Header", {type: "text/plain"})];
let retfiles = ParseIMF.walkMime(emlObj, input.indexOf("\r") >= 0);
retfiles.forEach(function(fileObj){
let file = null;
if (fileObj.name !== null) {
file = new File([fileObj.data], fileObj.name, {type: fileObj.type});
} else {
let name = null;
if ("subject" in emlObj.header) {
name = emlObj.header["subject"][0].concat(".");
} else {
name = "Undefined.";
}
if (fileObj.type in BODY_FILE_TYPE) {
name = name.concat(BODY_FILE_TYPE[fileObj.type]);
} else {
name = name.concat("bin");
}
file = new File([fileObj.data], name, {type: fileObj.type});
}
retval.push(file);
});
return retval;
} */
/** /**
* Displays the files in HTML for web apps. * Displays the files in HTML for web apps.
* *
@ -114,213 +49,6 @@ class ParseIMF extends Operation {
async present(files) { async present(files) {
return await Utils.displayFilesAsHTML(files); return await Utils.displayFilesAsHTML(files);
} }
/**
* Walks a MIME document and returns an array of Mime data and header objects.
*
* @param {string} input
* @param {object} header
* @returns {object[]}
*
static walkMime(parentObj, rn) {
let new_line_length = rn ? 2 : 1;
let contType = null, fileName = null, charEnc = null, contDispoObj = null;
if (parentObj.header.hasOwnProperty("content-type")) {
let contTypeObj = ParseIMF.decodeComplexField(parentObj.header["content-type"][0]);
if (parentObj.header.hasOwnProperty("content-disposition")) {
contDispoObj = ParseIMF.decodeComplexField(parentObj.header["content-disposition"][0])
if (contDispoObj != null && contDispoObj.hasOwnProperty("filename")) {
fileName = contDispoObj.filename;
}
}
if (contTypeObj != null) {
if (contTypeObj.hasOwnProperty("value")) {
contType = contTypeObj.value[0];
}
if (contTypeObj.hasOwnProperty("charset")) {
charEnc = contTypeObj.charset;
}
if (fileName == null && contTypeObj.hasOwnProperty("name")) {
fileName = contTypeObj.name;
}
}
if (contType.startsWith("multipart/")) {
let content_boundary = null;
let output_sections = [];
if (contTypeObj.hasOwnProperty("boundary")) {
content_boundary = contTypeObj.boundary;
}
let mime_parts = ParseIMF.splitMultipart(parentObj.body, content_boundary, new_line_length);
mime_parts.forEach(function(mime_part){
let mimeObj = ParseIMF.splitParse(mime_part);
if (!mimeObj.body) {
return [];
}
let parts = ParseIMF.walkMime(mimeObj, rn);
parts.forEach(function(part){
output_sections.push(part);
});
});
return output_sections;
}
if (parentObj.header.hasOwnProperty("content-transfer-encoding")) {
let contEncObj = ParseIMF.decodeComplexField(parentObj.header["content-transfer-encoding"][0]);
let contTran = null;
if (contEncObj != null && contEncObj.hasOwnProperty("value")) {
contTran = contEncObj.value[0];
}
if (contTran != null) {
parentObj.body = ParseIMF.decodeMimeData(parentObj.body, charEnc, contTran);
}
}
return [{type: contType, data: parentObj.body, name: fileName}];
}
throw new OperationError("Invalid Mime section");
}
/**
* Takes a string and decodes quoted words inside them
* These take the form of =?utf-8?Q?Hello?=
*
* @param {string} input
* @returns {string}
*
static replaceDecodeWord(input) {
return input.replace(/=\?([^?]+)\?(Q|B)\?([^?]+)\?=/g, function (a, charEnc, contEnc, input) {
contEnc = (contEnc === "B") ? "base64" : "quoted-printable";
if (contEnc === "quoted-printable") {
input = input.replace(/_/g, " ");
}
return ParseIMF.decodeMimeData(input, charEnc, contEnc);
});
}
/**
* Breaks the header from the body and parses the header. The returns an
* object or null. The object contains the raw header, decoded body, and
* parsed header object.
*
* @param {string} input
* @returns {object}
*
static splitParse(input) {
const emlRegex = /(?:\r?\n){2}/g;
let matchobj = emlRegex.exec(input);
if (matchobj) {
let splitEmail = [input.substring(0,matchobj.index), input.substring(emlRegex.lastIndex)];
const sectionRegex = /([A-Za-z-]+):\s+([\x00-\xff]+?)(?=$|\r?\n\S)/g;
let headerObj = {}, section;
while ((section = sectionRegex.exec(splitEmail[0]))) {
let fieldName = section[1].toLowerCase();
let fieldValue = ParseIMF.replaceDecodeWord(section[2].replace(/\n|\r/g, " "));
if (fieldName in headerObj) {
headerObj[fieldName].push(fieldValue);
} else {
headerObj[fieldName] = [fieldValue];
}
}
return {rawHeader:splitEmail[0], body: splitEmail[1], header: headerObj};
}
return null;
}
/**
* Return decoded MIME data given the character encoding and content encoding.
*
* @param {string} input
* @param {string} charEnc
* @param {string} contEnc
* @returns {string}
*
static decodeMimeData(input, charEnc, contEnc) {
switch (contEnc) {
case "base64":
input = fromBase64(input);
break;
case "quoted-printable":
input = Utils.byteArrayToUtf8(decodeQuotedPrintable(input));
break;
}
if (charEnc && MIME_FORMAT.hasOwnProperty(charEnc.toLowerCase())) {
input = cptable.utils.decode(MIME_FORMAT[charEnc.toLowerCase()], input);
}
return input;
}
/**
* Parses a complex header field and returns an object that contains
* normalized keys with corresponding values along with single values under
* a value array.
*
* @param {string} field
* @returns {object}
*
static decodeComplexField(field) {
let fieldSplit = field.split(/;\s+/g);
let retVal = {};
fieldSplit.forEach(function(item){
if (item.indexOf("=") >= 0) {
let eq = item.indexOf("=");
let kv = null;
if (item.length > eq) {
kv = [item.substring(0, eq), item.substring(eq + 1).trim()];
} else {
throw OperationError("Not a valid header entry");
}
if ((kv[1].startsWith("\'") && kv[1].endsWith("\'"))
|| (kv[1].startsWith("\"") && kv[1].endsWith("\""))) {
kv[1] = (/(['"])(.+)\1/.exec(kv[1]))[2];
}
retVal[kv[0].toLowerCase()] = kv[1];
} else {
item = item.trim().toLowerCase();
if (retVal.hasOwnProperty("value")) {
retVal.value.push(item);
} else {
retVal.value = [item];
}
}
});
return retVal;
}
/**
* Splits a Mime document by the current boundaries and attempts to account
* for the current new line size which can be either the standard \r\n or \n.
*
* @param {string} input
* @param {string} boundary
* @param {string} new_line_length
* @return {string[]}
*
static splitMultipart(input, boundary, new_line_length) {
let output = [];
let newline = new_line_length === 2 ? "\r\n" : "\n";
const boundary_str = "--".concat(boundary, newline);
let last = input.indexOf("--".concat(boundary, "--", newline)) - new_line_length;
if (last < 0) {
last = input.indexOf("--".concat(boundary, "--")) - new_line_length;
}
let start = 0;
while(true) {
let start = input.indexOf(boundary_str, start);
if (start >= 0) {
start = start + boundary_str.length;
} else {
break;
}
let end = input.indexOf(boundary_str, start) - new_line_length;
if (end > start) {
output.push(input.substring(start, end));
} else {
output.push(input.substring(start, last));
break;
}
start = end;
}
return output;
} */
} }
export default ParseIMF export default ParseIMF;