/**
* @author bwhitn [brian.m.whitney@outlook.com]
* @copyright Crown Copyright 2016
* @license Apache-2.0
*/
import Operation from "../Operation";
import OperationError from "../errors/OperationError";
import cptable from "../vendor/js-codepage/cptable.js";
import {fromBase64} from "../lib/Base64";
import {decodeQuotedPrintable} from "../lib/QuotedPrintable";
import {MIME_FORMAT} from "../lib/ChrEnc";
import Utils from "../Utils";
// TODO: fix function header
/**
* Return the conetent encoding for a mime section from a header object.
* CONTENT_TYPE returns the content type of a mime header from a header object.
* Returns the filename from a mime header object.
* Returns the boundary value for the mime section from a header object.
* @constant
* @default
*/
const IMF_FIELD_ITEM = {
FILENAME: [/filename=".*?([^~#%&*\][\\:<>?/|]+)"/, "content-disposition"],
CONTENT_TYPE: [/\s*([^;\s]+)/, "content-type"],
BOUNDARY: [/boundary="(.+?)"/, "content-type"],
CHARSET: [/charset=([a-z0-9-]+)/, "content-type"],
TRANSER_ENCODING: [/\s*([A-Za-z0-9-]+)\s*/, "content-transfer-encoding"],
}
/**
* @constant
* @default
*/
// TODO: should 8 bit and 7 bit be treated the same?
const IMF_DECODER = {
"base64": function (input) {
return fromBase64(input);
},
"quoted-printable": function (input) {
return Utils.byteArrayToUtf8(decodeQuotedPrintable(input));
},
"7bit": function (input) {
return input;
},
"8bit": function (input) {
return input;
},
}
class ParseIMF extends Operation {
/**
* Internet MessageFormat constructor
*/
constructor() {
super();
this.name = "Parse Internet Message Format";
this.module = "Default";
this.description = ["Parser an IMF formatted messages following RFC5322.",
"
",
"Parses an IMF formated message. These often have the file extention ".eml"e; and contain the email headers and body. The output will be a file list of the headers and mime parts.",
].join("\n");
this.infoURL = "https://tools.ietf.org/html/rfc5322";
this.inputType = "string";
this.outputType = "List";
this.presentType = "html";
this.args = [
{
"name": "Decode Quoted Words",
"type": "boolean",
"value": false
}
];
}
/**
* Basic Email Parser that displays the header and mime sections as files.
* Args 0 boolean decode quoted words
*
* @param {string} input
* @param {Object[]} args
* @returns {File[]}
*/
run(input, args) {
if (!input) {
return [];
}
let headerBody = ParseIMF.splitHeaderFromBody(input);
let header = headerBody[0];
let headerArray = ParseIMF.parseHeader(header);
if (args[0] && headerBody.length > 0) {
headerBody[0] = ParseIMF.replaceDecodeWord(headerBody[0]);
}
let retval = [];
let i = 0;
headerBody.forEach(function(file){
file = new File([file], "test"+String(i), {type: "text/plain"});
retval.push(file);
i++;
});
return retval;
}
/**
* Displays the files in HTML for web apps.
*
* @param {File[]} files
* @returns {html}
*/
async present(files) {
return await Utils.displayFilesAsHTML(files);
}
/**
* Walks a MIME document and returns an array of Mime data and header objects.
*
* @param {string} input
* @param {object} header
* @returns {object[]}
*/
static walkMime(input, header) {
let output = [];
if header[""]
}
/**
* Breaks the header from the body and returns [header, body]
*
* @param {string} input
* @returns {string[]}
*/
static splitHeaderFromBody(input) {
const emlRegex = /^([\x20-\xff\n\r\t]+?)(?:\r?\n){2}([\x20-\xff\t\n\r]*)/;
let splitEmail = emlRegex.exec(input);
if (splitEmail) {
//TODO: Array splice vs shift?
splitEmail.shift();
return splitEmail;
}
}
/**
* Takes a string and decodes quoted words inside them
* These take the form of =?utf-8?Q?Hello?=
*
* @param {string} input
* @returns {string}
*/
static replaceDecodeWord(input) {
return input.replace(/=\?([^?]+)\?(Q|B)\?([^?]+)\?=/g, function (a, charEnc, contEnc, input) {
contEnc = (contEnc === "B") ? "base64" : "quoted-printable";
if (contEnc === "quoted-printable") {
input = input.replace(/_/g, " ");
}
return ParseIMF.decodeMimeData(input, charEnc, contEnc);
});
}
/**
* Breaks a header into a object to be used by other functions.
* It removes any line feeds or carriage returns from the values and
* replaces it with a space.
*
* @param {string} input
* @returns {object}
*/
static parseHeader(input) {
const sectionRegex = /([A-Z-]+):\s+([\x20-\x7e\r\n\t]+?)(?=$|\r?\n\S)/gi;
let header = {}, section;
while ((section = sectionRegex.exec(input))) {
let fieldName = section[1].toLowerCase();
let fieldValue = section[2].replace(/\n|\r/g, " ");
if (header[fieldName]) {
header[fieldName].push(fieldValue);
} else {
header[fieldName] = [fieldValue];
}
}
return header;
}
/**
* Return decoded MIME data given the character encoding and content encoding.
*
* @param {string} input
* @param {string} charEnc
* @param {string} contEnc
* @returns {string}
*/
static decodeMimeData(input, charEnc, contEnc) {
//TODO: make exceptions for unknown charEnc and contEnc?
input = IMF_DECODER[contEnc](input);
if (charEnc) {
input = cptable.utils.decode(MIME_FORMAT[charEnc.toLowerCase()], input);
}
return input;
}
/**
* Returns a header item given a header object, itemName, and index number.
*
* @param {object} header
* @param {object} FIELD_ITEM
* @param {integer} fieldNum
* @returns {string}
*/
static getHeaderItem(header, fieldItem, fieldNum = 0){
if (fieldItem[1] in header && header[fieldItem[1]].length > fieldNum) {
let field = header[fieldItem[1]][fieldNum], item;
if ((item = fieldItem[0].exec(field))) {
return item[1];
}
}
}
}
export default ParseIMF