mirror of
https://github.com/gchq/CyberChef.git
synced 2025-05-09 15:55:01 -04:00
working on moving parsing to lib
This commit is contained in:
parent
a23c94cd76
commit
fa5d2b130f
2 changed files with 316 additions and 23 deletions
|
@ -0,0 +1,292 @@
|
||||||
|
/**
|
||||||
|
* @author bwhitn [brian.m.whitney@outlook.com]
|
||||||
|
* @copyright Crown Copyright 2016
|
||||||
|
* @license Apache-2.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
import OperationError from "../errors/OperationError";
|
||||||
|
import cptable from "../vendor/js-codepage/cptable.js";
|
||||||
|
import {fromBase64} from "../lib/Base64";
|
||||||
|
import {decodeQuotedPrintable} from "../lib/QuotedPrintable";
|
||||||
|
import {MIME_FORMAT} from "../lib/ChrEnc";
|
||||||
|
import Utils from "../Utils";
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* @constant
|
||||||
|
* @default
|
||||||
|
*/
|
||||||
|
const BODY_FILE_TYPE = {
|
||||||
|
"text/plain": "txt",
|
||||||
|
"text/html": "htm",
|
||||||
|
"application/rtf": "rtf",
|
||||||
|
}
|
||||||
|
|
||||||
|
class Mime {
|
||||||
|
/**
|
||||||
|
* Internet MessageFormat constructor
|
||||||
|
*/
|
||||||
|
constructor(input) {
|
||||||
|
this.input = input;
|
||||||
|
this.rn = input.indexOf("\r") >= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Basic Email Parser that displays the header and mime sections as files.
|
||||||
|
* Args 0 boolean decode quoted words
|
||||||
|
*
|
||||||
|
* @param {string} input
|
||||||
|
* @param {boolean} decodeWords
|
||||||
|
* @returns {File[]}
|
||||||
|
*/
|
||||||
|
// NOTE: Liberties taken include:
|
||||||
|
// No checks are made to verify quoted words are valid encodings e.g. underscore vs escape
|
||||||
|
// This attempts to decode mime reguardless if it is \r\n (correct newline) or \n (incorrect)
|
||||||
|
// Both Base64 and QuotedPrintable is used for decode. UUEncode is not available right now
|
||||||
|
// and is a standardized encoding format.
|
||||||
|
decodeMime(decodeWords) {
|
||||||
|
// TODO Later: no uuencode function. See if we can fix this.
|
||||||
|
// TODO: content-type can be omitted and would mean us-ascii charset and text/plain.
|
||||||
|
if (!this.input) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
let emlObj = Mime._splitParse(this.input);
|
||||||
|
if (!emlObj.body) { throw new OperationError("No body was found");}
|
||||||
|
if (decodeWords) {
|
||||||
|
emlObj.rawHeader = Mime.replaceEncodedWord(emlObj.rawHeader);
|
||||||
|
}
|
||||||
|
let retval = [new File([emlObj.rawHeader], "Header", {type: "text/plain"})];
|
||||||
|
let retfiles = this._walkMime(emlObj);
|
||||||
|
retfiles.forEach(function(fileObj){
|
||||||
|
let file = null;
|
||||||
|
if (fileObj.name !== null) {
|
||||||
|
file = new File([fileObj.data], fileObj.name, {type: fileObj.type});
|
||||||
|
} else {
|
||||||
|
let name = null;
|
||||||
|
if ("subject" in emlObj.header) {
|
||||||
|
name = emlObj.header["subject"][0].concat(".");
|
||||||
|
} else {
|
||||||
|
name = "Undefined.";
|
||||||
|
}
|
||||||
|
if (fileObj.type in BODY_FILE_TYPE) {
|
||||||
|
name = name.concat(BODY_FILE_TYPE[fileObj.type]);
|
||||||
|
} else {
|
||||||
|
name = name.concat("bin");
|
||||||
|
}
|
||||||
|
file = new File([fileObj.data], name, {type: fileObj.type});
|
||||||
|
}
|
||||||
|
retval.push(file);
|
||||||
|
});
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Walks a MIME document and returns an array of Mime data and header objects.
|
||||||
|
*
|
||||||
|
* @param {string} input
|
||||||
|
* @param {object} header
|
||||||
|
* @returns {object[]}
|
||||||
|
*/
|
||||||
|
_walkMime(parentObj) {
|
||||||
|
let new_line_length = this.rn ? 2 : 1;
|
||||||
|
let contType = null, fileName = null, charEnc = null, contDispoObj = null;
|
||||||
|
if (parentObj.header.hasOwnProperty("content-type")) {
|
||||||
|
let contTypeObj = Mime._decodeComplexField(parentObj.header["content-type"][0]);
|
||||||
|
if (parentObj.header.hasOwnProperty("content-disposition")) {
|
||||||
|
contDispoObj = Mime._decodeComplexField(parentObj.header["content-disposition"][0])
|
||||||
|
if (contDispoObj != null && contDispoObj.hasOwnProperty("filename")) {
|
||||||
|
fileName = contDispoObj.filename;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (contTypeObj != null) {
|
||||||
|
if (contTypeObj.hasOwnProperty("value")) {
|
||||||
|
contType = contTypeObj.value[0];
|
||||||
|
}
|
||||||
|
if (contTypeObj.hasOwnProperty("charset")) {
|
||||||
|
charEnc = contTypeObj.charset;
|
||||||
|
}
|
||||||
|
if (fileName == null && contTypeObj.hasOwnProperty("name")) {
|
||||||
|
fileName = contTypeObj.name;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (contType.startsWith("multipart/")) {
|
||||||
|
let content_boundary = null;
|
||||||
|
let output_sections = [];
|
||||||
|
if (contTypeObj.hasOwnProperty("boundary")) {
|
||||||
|
content_boundary = contTypeObj.boundary;
|
||||||
|
}
|
||||||
|
let mime_parts = Mime._splitMultipart(parentObj.body, content_boundary, new_line_length);
|
||||||
|
mime_parts.forEach(function(mime_part){
|
||||||
|
let mimeObj = Mime._splitParse(mime_part);
|
||||||
|
if (!mimeObj.body) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
let parts = this._walkMime(mimeObj);
|
||||||
|
parts.forEach(function(part){
|
||||||
|
output_sections.push(part);
|
||||||
|
}, this);
|
||||||
|
}, this);
|
||||||
|
return output_sections;
|
||||||
|
}
|
||||||
|
if (parentObj.header.hasOwnProperty("content-transfer-encoding")) {
|
||||||
|
let contEncObj = Mime._decodeComplexField(parentObj.header["content-transfer-encoding"][0]);
|
||||||
|
let contTran = null;
|
||||||
|
if (contEncObj != null && contEncObj.hasOwnProperty("value")) {
|
||||||
|
contTran = contEncObj.value[0];
|
||||||
|
}
|
||||||
|
if (contTran != null) {
|
||||||
|
parentObj.body = Mime._decodeMimeData(parentObj.body, charEnc, contTran);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return [{type: contType, data: parentObj.body, name: fileName}];
|
||||||
|
}
|
||||||
|
throw new OperationError("Invalid Mime section");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes a string and decodes quoted words inside them
|
||||||
|
* These take the form of =?utf-8?Q?Hello?=
|
||||||
|
*
|
||||||
|
* @param {string} input
|
||||||
|
* @returns {string}
|
||||||
|
*/
|
||||||
|
static replaceEncodedWord(input) {
|
||||||
|
return input.replace(/=\?([^?]+)\?(Q|B)\?([^?]+)\?=/g, function (a, charEnc, contEnc, input) {
|
||||||
|
contEnc = (contEnc === "B") ? "base64" : "quoted-printable";
|
||||||
|
if (contEnc === "quoted-printable") {
|
||||||
|
input = input.replace(/_/g, " ");
|
||||||
|
}
|
||||||
|
return Mime._decodeMimeData(input, charEnc, contEnc);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Breaks the header from the body and parses the header. The returns an
|
||||||
|
* object or null. The object contains the raw header, decoded body, and
|
||||||
|
* parsed header object.
|
||||||
|
*
|
||||||
|
* @param {string} input
|
||||||
|
* @returns {object}
|
||||||
|
*/
|
||||||
|
static _splitParse(input) {
|
||||||
|
const emlRegex = /(?:\r?\n){2}/g;
|
||||||
|
let matchobj = emlRegex.exec(input);
|
||||||
|
if (matchobj) {
|
||||||
|
let splitEmail = [input.substring(0,matchobj.index), input.substring(emlRegex.lastIndex)];
|
||||||
|
const sectionRegex = /([A-Za-z-]+):\s+([\x00-\xff]+?)(?=$|\r?\n\S)/g;
|
||||||
|
let headerObj = {}, section;
|
||||||
|
while ((section = sectionRegex.exec(splitEmail[0]))) {
|
||||||
|
let fieldName = section[1].toLowerCase();
|
||||||
|
let fieldValue = Mime.replaceEncodedWord(section[2].replace(/\n|\r/g, " "));
|
||||||
|
if (fieldName in headerObj) {
|
||||||
|
headerObj[fieldName].push(fieldValue);
|
||||||
|
} else {
|
||||||
|
headerObj[fieldName] = [fieldValue];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return {rawHeader:splitEmail[0], body: splitEmail[1], header: headerObj};
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return decoded MIME data given the character encoding and content encoding.
|
||||||
|
*
|
||||||
|
* @param {string} input
|
||||||
|
* @param {string} charEnc
|
||||||
|
* @param {string} contEnc
|
||||||
|
* @returns {string}
|
||||||
|
*/
|
||||||
|
static _decodeMimeData(input, charEnc, contEnc) {
|
||||||
|
switch (contEnc) {
|
||||||
|
case "base64":
|
||||||
|
input = fromBase64(input);
|
||||||
|
break;
|
||||||
|
case "quoted-printable":
|
||||||
|
input = Utils.byteArrayToUtf8(decodeQuotedPrintable(input));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (charEnc && MIME_FORMAT.hasOwnProperty(charEnc.toLowerCase())) {
|
||||||
|
input = cptable.utils.decode(MIME_FORMAT[charEnc.toLowerCase()], input);
|
||||||
|
}
|
||||||
|
return input;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parses a complex header field and returns an object that contains
|
||||||
|
* normalized keys with corresponding values along with single values under
|
||||||
|
* a value array.
|
||||||
|
*
|
||||||
|
* @param {string} field
|
||||||
|
* @returns {object}
|
||||||
|
*/
|
||||||
|
static _decodeComplexField(field) {
|
||||||
|
let fieldSplit = field.split(/;\s+/g);
|
||||||
|
let retVal = {};
|
||||||
|
fieldSplit.forEach(function(item){
|
||||||
|
if (item.indexOf("=") >= 0) {
|
||||||
|
let eq = item.indexOf("=");
|
||||||
|
let kv = null;
|
||||||
|
if (item.length > eq) {
|
||||||
|
kv = [item.substring(0, eq), item.substring(eq + 1).trim()];
|
||||||
|
} else {
|
||||||
|
throw OperationError("Not a valid header entry");
|
||||||
|
}
|
||||||
|
if ((kv[1].startsWith("\'") && kv[1].endsWith("\'"))
|
||||||
|
|| (kv[1].startsWith("\"") && kv[1].endsWith("\""))) {
|
||||||
|
kv[1] = (/(['"])(.+)\1/.exec(kv[1]))[2];
|
||||||
|
}
|
||||||
|
retVal[kv[0].toLowerCase()] = kv[1];
|
||||||
|
} else {
|
||||||
|
item = item.trim().toLowerCase();
|
||||||
|
if (retVal.hasOwnProperty("value")) {
|
||||||
|
retVal.value.push(item);
|
||||||
|
} else {
|
||||||
|
retVal.value = [item];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return retVal;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Splits a Mime document by the current boundaries and attempts to account
|
||||||
|
* for the current new line size which can be either the standard \r\n or \n.
|
||||||
|
*
|
||||||
|
* @param {string} input
|
||||||
|
* @param {string} boundary
|
||||||
|
* @param {string} new_line_length
|
||||||
|
* @return {string[]}
|
||||||
|
*/
|
||||||
|
static _splitMultipart(input, boundary, new_line_length) {
|
||||||
|
let output = [];
|
||||||
|
let newline = new_line_length === 2 ? "\r\n" : "\n";
|
||||||
|
const boundary_str = "--".concat(boundary, newline);
|
||||||
|
let last = input.indexOf("--".concat(boundary, "--", newline)) - new_line_length;
|
||||||
|
if (last < 0) {
|
||||||
|
last = input.indexOf("--".concat(boundary, "--")) - new_line_length;
|
||||||
|
}
|
||||||
|
let start = 0;
|
||||||
|
while(true) {
|
||||||
|
let start = input.indexOf(boundary_str, start);
|
||||||
|
if (start >= 0) {
|
||||||
|
start = start + boundary_str.length;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
let end = input.indexOf(boundary_str, start) - new_line_length;
|
||||||
|
if (end > start) {
|
||||||
|
output.push(input.substring(start, end));
|
||||||
|
} else {
|
||||||
|
output.push(input.substring(start, last));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
start = end;
|
||||||
|
}
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export default Mime;
|
|
@ -10,21 +10,20 @@ import cptable from "../vendor/js-codepage/cptable.js";
|
||||||
import {fromBase64} from "../lib/Base64";
|
import {fromBase64} from "../lib/Base64";
|
||||||
import {decodeQuotedPrintable} from "../lib/QuotedPrintable";
|
import {decodeQuotedPrintable} from "../lib/QuotedPrintable";
|
||||||
import {MIME_FORMAT} from "../lib/ChrEnc";
|
import {MIME_FORMAT} from "../lib/ChrEnc";
|
||||||
|
import Mime from "../lib/Mime";
|
||||||
import Utils from "../Utils";
|
import Utils from "../Utils";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the conetent encoding for a mime section from a header object.
|
*
|
||||||
* CONTENT_TYPE returns the content type of a mime header from a header object.
|
*
|
||||||
* Returns the filename from a mime header object.
|
|
||||||
* Returns the boundary value for the mime section from a header object.
|
|
||||||
* @constant
|
* @constant
|
||||||
* @default
|
* @default
|
||||||
*/
|
*
|
||||||
const BODY_FILE_TYPE = {
|
const BODY_FILE_TYPE = {
|
||||||
"text/plain": "txt",
|
"text/plain": "txt",
|
||||||
"text/html": "htm",
|
"text/html": "htm",
|
||||||
"application/rtf": "rtf",
|
"application/rtf": "rtf",
|
||||||
}
|
} */
|
||||||
|
|
||||||
class ParseIMF extends Operation {
|
class ParseIMF extends Operation {
|
||||||
|
|
||||||
|
@ -52,6 +51,11 @@ class ParseIMF extends Operation {
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
run(input, args) {
|
||||||
|
let mimeObj = new Mime(input);
|
||||||
|
return mimeObj.decodeMime(args[0]);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Basic Email Parser that displays the header and mime sections as files.
|
* Basic Email Parser that displays the header and mime sections as files.
|
||||||
* Args 0 boolean decode quoted words
|
* Args 0 boolean decode quoted words
|
||||||
|
@ -59,7 +63,7 @@ class ParseIMF extends Operation {
|
||||||
* @param {string} input
|
* @param {string} input
|
||||||
* @param {Object[]} args
|
* @param {Object[]} args
|
||||||
* @returns {File[]}
|
* @returns {File[]}
|
||||||
*/
|
*
|
||||||
// NOTE: Liberties taken include:
|
// NOTE: Liberties taken include:
|
||||||
// No checks are made to verify quoted words are valid encodings e.g. underscore vs escape
|
// No checks are made to verify quoted words are valid encodings e.g. underscore vs escape
|
||||||
// This attempts to decode mime reguardless if it is \r\n (correct newline) or \n (incorrect)
|
// This attempts to decode mime reguardless if it is \r\n (correct newline) or \n (incorrect)
|
||||||
|
@ -99,7 +103,7 @@ class ParseIMF extends Operation {
|
||||||
retval.push(file);
|
retval.push(file);
|
||||||
});
|
});
|
||||||
return retval;
|
return retval;
|
||||||
}
|
} */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Displays the files in HTML for web apps.
|
* Displays the files in HTML for web apps.
|
||||||
|
@ -117,7 +121,7 @@ class ParseIMF extends Operation {
|
||||||
* @param {string} input
|
* @param {string} input
|
||||||
* @param {object} header
|
* @param {object} header
|
||||||
* @returns {object[]}
|
* @returns {object[]}
|
||||||
*/
|
*
|
||||||
static walkMime(parentObj, rn) {
|
static walkMime(parentObj, rn) {
|
||||||
let new_line_length = rn ? 2 : 1;
|
let new_line_length = rn ? 2 : 1;
|
||||||
let contType = null, fileName = null, charEnc = null, contDispoObj = null;
|
let contType = null, fileName = null, charEnc = null, contDispoObj = null;
|
||||||
|
@ -180,7 +184,7 @@ class ParseIMF extends Operation {
|
||||||
*
|
*
|
||||||
* @param {string} input
|
* @param {string} input
|
||||||
* @returns {string}
|
* @returns {string}
|
||||||
*/
|
*
|
||||||
static replaceDecodeWord(input) {
|
static replaceDecodeWord(input) {
|
||||||
return input.replace(/=\?([^?]+)\?(Q|B)\?([^?]+)\?=/g, function (a, charEnc, contEnc, input) {
|
return input.replace(/=\?([^?]+)\?(Q|B)\?([^?]+)\?=/g, function (a, charEnc, contEnc, input) {
|
||||||
contEnc = (contEnc === "B") ? "base64" : "quoted-printable";
|
contEnc = (contEnc === "B") ? "base64" : "quoted-printable";
|
||||||
|
@ -199,7 +203,7 @@ class ParseIMF extends Operation {
|
||||||
*
|
*
|
||||||
* @param {string} input
|
* @param {string} input
|
||||||
* @returns {object}
|
* @returns {object}
|
||||||
*/
|
*
|
||||||
static splitParse(input) {
|
static splitParse(input) {
|
||||||
const emlRegex = /(?:\r?\n){2}/g;
|
const emlRegex = /(?:\r?\n){2}/g;
|
||||||
let matchobj = emlRegex.exec(input);
|
let matchobj = emlRegex.exec(input);
|
||||||
|
@ -228,7 +232,7 @@ class ParseIMF extends Operation {
|
||||||
* @param {string} charEnc
|
* @param {string} charEnc
|
||||||
* @param {string} contEnc
|
* @param {string} contEnc
|
||||||
* @returns {string}
|
* @returns {string}
|
||||||
*/
|
*
|
||||||
static decodeMimeData(input, charEnc, contEnc) {
|
static decodeMimeData(input, charEnc, contEnc) {
|
||||||
switch (contEnc) {
|
switch (contEnc) {
|
||||||
case "base64":
|
case "base64":
|
||||||
|
@ -237,10 +241,6 @@ class ParseIMF extends Operation {
|
||||||
case "quoted-printable":
|
case "quoted-printable":
|
||||||
input = Utils.byteArrayToUtf8(decodeQuotedPrintable(input));
|
input = Utils.byteArrayToUtf8(decodeQuotedPrintable(input));
|
||||||
break;
|
break;
|
||||||
case "7bit":
|
|
||||||
case "8bit":
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
if (charEnc && MIME_FORMAT.hasOwnProperty(charEnc.toLowerCase())) {
|
if (charEnc && MIME_FORMAT.hasOwnProperty(charEnc.toLowerCase())) {
|
||||||
input = cptable.utils.decode(MIME_FORMAT[charEnc.toLowerCase()], input);
|
input = cptable.utils.decode(MIME_FORMAT[charEnc.toLowerCase()], input);
|
||||||
|
@ -249,12 +249,13 @@ class ParseIMF extends Operation {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse a complex header field and return an object that contains normalized
|
* Parses a complex header field and returns an object that contains
|
||||||
* keys with corresponding values and single values under a value array.
|
* normalized keys with corresponding values along with single values under
|
||||||
|
* a value array.
|
||||||
*
|
*
|
||||||
* @param {string} field
|
* @param {string} field
|
||||||
* @returns {object}
|
* @returns {object}
|
||||||
*/
|
*
|
||||||
static decodeComplexField(field) {
|
static decodeComplexField(field) {
|
||||||
let fieldSplit = field.split(/;\s+/g);
|
let fieldSplit = field.split(/;\s+/g);
|
||||||
let retVal = {};
|
let retVal = {};
|
||||||
|
@ -285,14 +286,14 @@ class ParseIMF extends Operation {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Splits a Mime document by the current boundaries and try to account for
|
* Splits a Mime document by the current boundaries and attempts to account
|
||||||
* the current new line size which can be either the standard \r\n or \n.
|
* for the current new line size which can be either the standard \r\n or \n.
|
||||||
*
|
*
|
||||||
* @param {string} input
|
* @param {string} input
|
||||||
* @param {string} boundary
|
* @param {string} boundary
|
||||||
* @param {string} new_line_length
|
* @param {string} new_line_length
|
||||||
* @return {string[]}
|
* @return {string[]}
|
||||||
*/
|
*
|
||||||
static splitMultipart(input, boundary, new_line_length) {
|
static splitMultipart(input, boundary, new_line_length) {
|
||||||
let output = [];
|
let output = [];
|
||||||
let newline = new_line_length === 2 ? "\r\n" : "\n";
|
let newline = new_line_length === 2 ? "\r\n" : "\n";
|
||||||
|
@ -319,7 +320,7 @@ class ParseIMF extends Operation {
|
||||||
start = end;
|
start = end;
|
||||||
}
|
}
|
||||||
return output;
|
return output;
|
||||||
}
|
} */
|
||||||
}
|
}
|
||||||
|
|
||||||
export default ParseIMF
|
export default ParseIMF
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue