Start of eml

2025-07-03 11:22:22 -04:00 · 2018-11-20 06:40:43 -05:00 · 2018-11-20 06:40:43 -05:00 · 7ae0b08b4d
commit 7ae0b08b4d
parent 42b956e402
3 changed files with 230 additions and 1 deletions
--- a/src/core/config/Categories.json
+++ b/src/core/config/Categories.json
@ -54,7 +54,8 @@
            "From MessagePack",
            "To Braille",
            "From Braille",
-            "Parse TLV"
+            "Parse TLV",
            "Parse Internet Message Format"
        ]
    },
    {
--- a/src/core/lib/ChrEnc.mjs
+++ b/src/core/lib/ChrEnc.mjs
@ -56,3 +56,50 @@ export const IO_FORMAT = {
    "Simplified Chinese GB18030 (54936)": 54936,
 };
 /**
 * Preferred MIME encoding format mappings.
 */
 export const MIME_FORMAT = {
    "utf-8": 65001,
    "utf-7": 65000,
    "unicode": 1200,
    "ibm500": 500,
    "ebcdic-cp-us": 37,
    "windows-874": 874,
    "shift_jis": 932,
    "gbk": 936,
    "gb2312": 936,
    "ks_c_5601-1987": 949,
    "big5": 950,
    "windows-1250": 1250,
    "windows-1251": 1251,
    "windows-1252": 1252,
    "windows-1253": 1253,
    "windows-1254": 1254,
    "windows-1255": 1255,
    "windows-1256": 1256,
    "windows-1257": 1257,
    "windows-1258": 1258,
    "us-ascii": 20127,
    "koi8-r": 20866,
    "koi8-u": 21866,
    "iso-8859-1": 28591,
    "iso-8859-2": 28592,
    "iso-8859-3": 28593,
    "iso-8859-4": 28594,
    "iso-8859-5": 28595,
    "iso-8859-6": 28596,
    "iso-8859-7": 28597,
    "iso-8859-8": 28598,
    "iso-8859-9": 28599,
    "iso-8859-10": 28600,
    "iso-8859-11": 28601,
    "iso-8859-13": 28603,
    "iso-8859-14": 28604,
    "iso-8859-15": 28605,
    "iso-8859-16": 28606,
    "iso-2022": 50222,
    "x-euc": 51932,
    "euc-kr": 51949,
    "gb18030": 54936,
 };
--- a/src/core/operations/ParseIMF.mjs
+++ b/src/core/operations/ParseIMF.mjs
@ -0,0 +1,181 @@
 /**
 * @author bwhitn [brian.m.whitney@outlook.com]
 * @copyright Crown Copyright 2016
 * @license Apache-2.0
 */
 import Operation from "../Operation";
 import OperationError from "../errors/OperationError";
 import cptable from "../vendor/js-codepage/cptable.js";
 import {fromBase64} from "../lib/Base64";
 import {MIME_FORMAT} from "../lib/ChrEnc";
 //TODO: fix function header
 /**
  * Return the conetent encoding for a mime section from a header object.
  * CONTENT_TYPE returns the content type of a mime header from a header object.
  * Returns the filename from a mime header object.
  * Returns the boundary value for the mime section from a header object.
  * @constant
  * @default
  */
 const FIELD_ITEM = {
     FILENAME: [/filename=".*?([^~#%&*\][\\:<>?/|]+)"/, "content-disposition"],
     CONTENT_TYPE: [/\s*([^;\s]+)/, "content-type"],
     BOUNDARY: [/boundary="(.+?)"/, "content-type"],
     CHARSET: [/charset=([a-z0-9-]+)/, "content-type"],
     TRANSER_ENCODING: [/\s*([A-Za-z0-9-]+)\s*/, "content-transfer-encoding"],
 }
 /**
  * @constant
  * @default
  */
 //TODO: should 8 bit and 7 bit be treated the same?
 const DECODER = {
     "base64": function (input) {
         return Utils.fromBase64(input, Base64.ALPHABET, "string", true);
     },
     "quoted-printable": function (input) {
         return QuotedPrintable.mimeDecode(input);
     },
     "7bit": function (input) {
         return input;
     },
     "8bit": function (input) {
         return input;
     },
 }
 class ParseIMF extends Operation {
   /**
    * Internet MessageFormat constructor
    */
  constructor() {
    super();
    this.name = "Parse Internet Message Format";
    this.module = "Default";
    this.description = ["Parser an IMF formatted messages following RFC5322.",
      "<br><br>",
      "Parses an IMF formated message. These often have the file extention &quot;.eml&quote; and contain the email headers and body. The output will be a file list of the headers and mime parts.",
    ].join("\n");
    this.infoURL = "https://tools.ietf.org/html/rfc5322";
    this.inputType = "string";
    this.outputType = "string";
    this.args = [];
    }
    /**
      * Basic Email Parser that displays the header and mime sections as files.
      *
      * @param {string} input
      * @param {Object[]} args
      * @returns {string}
      */
  run(input, args) {
    if (!input) {
        return;
    }
    let headerBody = Email._splitHeaderFromBody(input);
    let header = headerBody[0];
    let headerArray = Email._parseHeader(header);
    if (args[0]) {
        header = Email._replaceDecodeWord(header);
    }
    return JSON.stringify(headerArray);
  }
  /**
   * Breaks the header from the body and returns [header, body]
   *
   * @param {string} input
   * @returns {string[]}
   */
  splitHeaderFromBody(input) {
      const emlRegex = /^([\x20-\x7e\n\r\t]+?)(?:\r?\n){2}([\x20-\x7e\t\n\r]*)/;
      let splitEmail = emlRegex.exec(input);
      if (splitEmail) {
          //TODO: Array splice vs shift?
          splitEmail.shift();
          return splitEmail;
      }
  }
  /**
   * Takes a string and decodes quoted words inside them
   * These take the form of =?utf-8?Q?Hello?=
   *
   * @param {string} input
   * @returns {string}
   */
  replaceDecodeWord(input) {
      return input.replace(/=\?([^?]+)\?(Q|B)\?([^?]+)\?=/g, function (a, charEnc, contEnc, input) {
          //TODO fix Q encoding as it isn't identical to quoted-printable. ie _=" "
          contEnc = (contEnc === "B") ? "base64" : "quoted-printable";
          return this.decodeMimeData(input, charEnc, contEnc);
      });
  }
  /**
   * Breaks a header into a object to be used by other functions.
   * It removes any line feeds or carriage returns from the values and
   * replaces it with a space.
   *
   * @param {string} input
   * @returns {object}
   */
  parseHeader(input) {
      const sectionRegex = /([A-Z-]+):\s+([\x20-\x7e\r\n\t]+?)(?=$|\r?\n\S)/gi;
      let header = {}, section;
      while ((section = sectionRegex.exec(input))) {
          let fieldName = section[1].toLowerCase();
          let fieldValue = section[2].replace(/\n|\r/g, " ");
          if (header[fieldName]) {
              header[fieldName].push(fieldValue);
          } else {
              header[fieldName] = [fieldValue];
          }
      }
      return header;
  }
  /**
   * Return decoded MIME data given the character encoding and content encoding.
   *
   * @param {string} input
   * @param {string} charEnc
   * @param {string} contEnc
   * @returns {string}
   */
  decodeMimeData(input, charEnc, contEnc) {
      //TODO: make exceptions for unknown charEnc and contEnc?
      input = this.DECODER[contEnc](input);
      if (charEnc) {
          input = cptable.utils.decode(this.MIME_FORMAT[charEnc.toLowerCase()], input);
      }
      return input;
  }
  /**
   * Returns a header item given a header object, itemName, and index number.
   *
   * @param {object} header
   * @param {object} FIELD_ITEM
   * @param {integer} fieldNum
   * @returns {string}
   */
  getHeaderItem(header, fieldItem, fieldNum = 0){
      if (fieldItem[1] in header && header[fieldItem[1]].length > fieldNum) {
          let field = header[fieldItem[1]][fieldNum], item;
          if ((item = fieldItem[0].exec(field))) {
              return item[1];
          }
      }
  }
 }
 export default ParseIMF