2018-11-20 06:40:43 -05:00
/ * *
* @ author bwhitn [ brian . m . whitney @ outlook . com ]
* @ copyright Crown Copyright 2016
* @ license Apache - 2.0
* /
2018-11-20 22:36:29 -05:00
import Operation from "../Operation" ;
import OperationError from "../errors/OperationError" ;
import cptable from "../vendor/js-codepage/cptable.js" ;
import { fromBase64 } from "../lib/Base64" ;
import { decodeQuotedPrintable } from "../lib/QuotedPrintable" ;
import { MIME _FORMAT } from "../lib/ChrEnc" ;
import Utils from "../Utils" ;
2018-11-20 06:40:43 -05:00
2018-11-20 22:36:29 -05:00
// TODO: fix function header
/ * *
* Return the conetent encoding for a mime section from a header object .
* CONTENT _TYPE returns the content type of a mime header from a header object .
* Returns the filename from a mime header object .
* Returns the boundary value for the mime section from a header object .
* @ constant
* @ default
* /
2018-11-21 23:36:32 -05:00
const IMF _FIELD _ITEM = {
2018-11-20 22:36:29 -05:00
FILENAME : [ /filename=".*?([^~#%&*\][\\:<>?/|]+)"/ , "content-disposition" ] ,
CONTENT _TYPE : [ /\s*([^;\s]+)/ , "content-type" ] ,
BOUNDARY : [ /boundary="(.+?)"/ , "content-type" ] ,
CHARSET : [ /charset=([a-z0-9-]+)/ , "content-type" ] ,
TRANSER _ENCODING : [ /\s*([A-Za-z0-9-]+)\s*/ , "content-transfer-encoding" ] ,
}
2018-11-20 06:40:43 -05:00
2018-11-20 22:36:29 -05:00
/ * *
* @ constant
* @ default
* /
// TODO: should 8 bit and 7 bit be treated the same?
2018-11-21 23:36:32 -05:00
const IMF _DECODER = {
2018-11-20 22:36:29 -05:00
"base64" : function ( input ) {
return fromBase64 ( input ) ;
} ,
"quoted-printable" : function ( input ) {
return Utils . byteArrayToUtf8 ( decodeQuotedPrintable ( input ) ) ;
} ,
"7bit" : function ( input ) {
return input ;
} ,
"8bit" : function ( input ) {
return input ;
} ,
}
2018-11-20 06:40:43 -05:00
2018-11-20 22:36:29 -05:00
class ParseIMF extends Operation {
2018-11-20 06:40:43 -05:00
2018-11-20 22:36:29 -05:00
/ * *
* Internet MessageFormat constructor
* /
constructor ( ) {
super ( ) ;
this . name = "Parse Internet Message Format" ;
this . module = "Default" ;
this . description = [ "Parser an IMF formatted messages following RFC5322." ,
"<br><br>" ,
"Parses an IMF formated message. These often have the file extention ".eml"e; and contain the email headers and body. The output will be a file list of the headers and mime parts." ,
] . join ( "\n" ) ;
this . infoURL = "https://tools.ietf.org/html/rfc5322" ;
this . inputType = "string" ;
2018-11-21 23:36:32 -05:00
this . outputType = "List<File>" ;
this . presentType = "html" ;
this . args = [
{
"name" : "Decode Quoted Words" ,
"type" : "boolean" ,
"value" : false
}
] ;
2018-11-20 06:40:43 -05:00
}
/ * *
2018-11-20 22:36:29 -05:00
* Basic Email Parser that displays the header and mime sections as files .
2018-11-21 23:36:32 -05:00
* Args 0 boolean decode quoted words
2018-11-20 22:36:29 -05:00
*
* @ param { string } input
* @ param { Object [ ] } args
2018-11-21 23:36:32 -05:00
* @ returns { File [ ] }
2018-11-20 22:36:29 -05:00
* /
run ( input , args ) {
if ( ! input ) {
2018-11-21 23:36:32 -05:00
return [ ] ;
2018-11-20 22:36:29 -05:00
}
2018-11-21 23:36:32 -05:00
let headerBody = ParseIMF . splitHeaderFromBody ( input ) ;
2018-11-20 22:36:29 -05:00
let header = headerBody [ 0 ] ;
2018-11-21 23:36:32 -05:00
let headerArray = ParseIMF . parseHeader ( header ) ;
if ( args [ 0 ] ) {
header = ParseIMF . replaceDecodeWord ( header ) ;
2018-11-20 22:36:29 -05:00
}
2018-11-21 23:36:32 -05:00
let retval = [ ] ;
let i = 0 ;
headerBody . forEach ( function ( file ) {
file = new File ( Array . from ( file ) , "test" + String ( i ) , { type : "text/plain" } )
retval . push ( file ) ;
i ++ ;
} ) ;
return retval ;
2018-11-20 06:40:43 -05:00
}
2018-11-20 22:36:29 -05:00
/ * *
* Breaks the header from the body and returns [ header , body ]
*
* @ param { string } input
* @ returns { string [ ] }
* /
2018-11-21 15:26:09 -05:00
static splitHeaderFromBody ( input ) {
const emlRegex = /^([\x20-\xff\n\r\t]+?)(?:\r?\n){2}([\x20-\xff\t\n\r]*)/ ;
2018-11-20 22:36:29 -05:00
let splitEmail = emlRegex . exec ( input ) ;
if ( splitEmail ) {
//TODO: Array splice vs shift?
splitEmail . shift ( ) ;
return splitEmail ;
}
}
2018-11-20 06:40:43 -05:00
2018-11-20 22:36:29 -05:00
/ * *
* Takes a string and decodes quoted words inside them
* These take the form of = ? utf - 8 ? Q ? Hello ? =
*
* @ param { string } input
* @ returns { string }
* /
2018-11-21 15:26:09 -05:00
static replaceDecodeWord ( input ) {
2018-11-20 22:36:29 -05:00
return input . replace ( /=\?([^?]+)\?(Q|B)\?([^?]+)\?=/g , function ( a , charEnc , contEnc , input ) {
contEnc = ( contEnc === "B" ) ? "base64" : "quoted-printable" ;
2018-11-21 15:26:09 -05:00
if ( contEnc === "quoted-printable" ) {
2018-11-21 23:36:32 -05:00
input = input . replace ( /_/g , " " ) ;
2018-11-21 15:26:09 -05:00
}
2018-11-21 23:36:32 -05:00
return ParseIMF . decodeMimeData ( input , charEnc , contEnc ) ;
2018-11-20 22:36:29 -05:00
} ) ;
}
2018-11-20 06:40:43 -05:00
2018-11-20 22:36:29 -05:00
/ * *
* Breaks a header into a object to be used by other functions .
* It removes any line feeds or carriage returns from the values and
* replaces it with a space .
*
* @ param { string } input
* @ returns { object }
* /
2018-11-21 15:26:09 -05:00
static parseHeader ( input ) {
2018-11-20 22:36:29 -05:00
const sectionRegex = /([A-Z-]+):\s+([\x20-\x7e\r\n\t]+?)(?=$|\r?\n\S)/gi ;
let header = { } , section ;
while ( ( section = sectionRegex . exec ( input ) ) ) {
let fieldName = section [ 1 ] . toLowerCase ( ) ;
let fieldValue = section [ 2 ] . replace ( /\n|\r/g , " " ) ;
if ( header [ fieldName ] ) {
header [ fieldName ] . push ( fieldValue ) ;
} else {
header [ fieldName ] = [ fieldValue ] ;
}
}
return header ;
}
2018-11-20 06:40:43 -05:00
2018-11-20 22:36:29 -05:00
/ * *
* Return decoded MIME data given the character encoding and content encoding .
*
* @ param { string } input
* @ param { string } charEnc
* @ param { string } contEnc
* @ returns { string }
* /
2018-11-21 15:26:09 -05:00
static decodeMimeData ( input , charEnc , contEnc ) {
2018-11-21 23:36:32 -05:00
//TODO: make exceptions for unknown charEnc and contEnc?
input = IMF _DECODER [ contEnc ] ( input ) ;
if ( charEnc ) {
input = cptable . utils . decode ( MIME _FORMAT [ charEnc . toLowerCase ( ) ] , input ) ;
}
return input ;
2018-11-20 22:36:29 -05:00
}
2018-11-20 06:40:43 -05:00
2018-11-20 22:36:29 -05:00
/ * *
* Returns a header item given a header object , itemName , and index number .
*
* @ param { object } header
* @ param { object } FIELD _ITEM
* @ param { integer } fieldNum
* @ returns { string }
* /
2018-11-21 15:26:09 -05:00
static getHeaderItem ( header , fieldItem , fieldNum = 0 ) {
2018-11-20 22:36:29 -05:00
if ( fieldItem [ 1 ] in header && header [ fieldItem [ 1 ] ] . length > fieldNum ) {
let field = header [ fieldItem [ 1 ] ] [ fieldNum ] , item ;
if ( ( item = fieldItem [ 0 ] . exec ( field ) ) ) {
return item [ 1 ] ;
}
}
}
2018-11-20 06:40:43 -05:00
}
export default ParseIMF