2018-11-20 06:40:43 -05:00
/ * *
* @ author bwhitn [ brian . m . whitney @ outlook . com ]
* @ copyright Crown Copyright 2016
* @ license Apache - 2.0
* /
2018-11-20 22:36:29 -05:00
import Operation from "../Operation" ;
import OperationError from "../errors/OperationError" ;
import cptable from "../vendor/js-codepage/cptable.js" ;
import { fromBase64 } from "../lib/Base64" ;
import { decodeQuotedPrintable } from "../lib/QuotedPrintable" ;
import { MIME _FORMAT } from "../lib/ChrEnc" ;
import Utils from "../Utils" ;
2018-11-20 06:40:43 -05:00
2018-11-20 22:36:29 -05:00
/ * *
* Return the conetent encoding for a mime section from a header object .
* CONTENT _TYPE returns the content type of a mime header from a header object .
* Returns the filename from a mime header object .
* Returns the boundary value for the mime section from a header object .
* @ constant
* @ default
* /
2018-11-23 23:43:06 -05:00
const FILE _TYPE _SUFFIX = {
"text/plain" : "txt" ,
"text/html" : "htm" ,
"application/rtf" : "rtf" ,
2018-11-20 22:36:29 -05:00
}
2018-11-20 06:40:43 -05:00
2018-11-20 22:36:29 -05:00
class ParseIMF extends Operation {
2018-11-20 06:40:43 -05:00
2018-11-20 22:36:29 -05:00
/ * *
* Internet MessageFormat constructor
* /
constructor ( ) {
super ( ) ;
this . name = "Parse Internet Message Format" ;
this . module = "Default" ;
this . description = [ "Parser an IMF formatted messages following RFC5322." ,
"<br><br>" ,
"Parses an IMF formated message. These often have the file extention ".eml"e; and contain the email headers and body. The output will be a file list of the headers and mime parts." ,
] . join ( "\n" ) ;
this . infoURL = "https://tools.ietf.org/html/rfc5322" ;
this . inputType = "string" ;
2018-11-21 23:36:32 -05:00
this . outputType = "List<File>" ;
this . presentType = "html" ;
this . args = [
{
"name" : "Decode Quoted Words" ,
"type" : "boolean" ,
"value" : false
}
] ;
2018-11-20 06:40:43 -05:00
}
/ * *
2018-11-20 22:36:29 -05:00
* Basic Email Parser that displays the header and mime sections as files .
2018-11-21 23:36:32 -05:00
* Args 0 boolean decode quoted words
2018-11-20 22:36:29 -05:00
*
* @ param { string } input
* @ param { Object [ ] } args
2018-11-21 23:36:32 -05:00
* @ returns { File [ ] }
2018-11-20 22:36:29 -05:00
* /
2018-11-25 23:04:07 -05:00
// NOTE: Liberties taken include:
// header normalization by lowercasing field names and certain header values
// No checks are made to verify quoted words are valid encodings e.g. underscore vs escape
// This attempts to decode mime reguardless if it is \r\n (correct newline) or \n (incorrect)
// Both Base64 and QuotedPrintable is used for decode. UUEncode is not available right now and is a standardized encoding format.
2018-11-20 22:36:29 -05:00
run ( input , args ) {
2018-11-25 23:04:07 -05:00
// TODO: need to add Non-Mime email support
// TODO Later: no uuencode function. See if we can fix this.
// TODO: may want to do base64 decode of binary to bytearray.
// TODO Later: need to look at binhex decoder maybe.
2018-11-20 22:36:29 -05:00
if ( ! input ) {
2018-11-21 23:36:32 -05:00
return [ ] ;
2018-11-20 22:36:29 -05:00
}
2018-11-21 23:36:32 -05:00
let headerBody = ParseIMF . splitHeaderFromBody ( input ) ;
2018-11-23 23:43:06 -05:00
let headerArray = ParseIMF . parseHeader ( headerBody [ 0 ] ) ;
2018-11-22 14:51:53 -05:00
if ( args [ 0 ] && headerBody . length > 0 ) {
headerBody [ 0 ] = ParseIMF . replaceDecodeWord ( headerBody [ 0 ] ) ;
2018-11-20 22:36:29 -05:00
}
2018-11-25 23:04:07 -05:00
let retval = [ new File ( [ headerBody [ 0 ] ] , "Header" , { type : "text/plain" } ) ] ;
2018-11-23 15:00:34 -05:00
let retfiles = ParseIMF . walkMime ( headerBody [ 1 ] , headerArray , input . indexOf ( "\r" ) >= 0 ) ;
2018-11-23 23:43:06 -05:00
retfiles . forEach ( function ( fileObj ) {
let file = null ;
if ( fileObj . name !== null ) {
file = new File ( [ fileObj . data ] , fileObj . name , { type : fileObj . type } ) ;
} else {
2018-11-25 23:04:07 -05:00
let name = headerArray [ "subject" ] [ 0 ] . concat ( "." ) ;
2018-11-23 23:43:06 -05:00
if ( fileObj . type in FILE _TYPE _SUFFIX ) {
name = name . concat ( FILE _TYPE _SUFFIX [ fileObj . type ] ) ;
} else {
name = name . concat ( "bin" ) ;
}
file = new File ( [ fileObj . data ] , name , { type : fileObj . type } ) ;
}
2018-11-21 23:36:32 -05:00
retval . push ( file ) ;
} ) ;
return retval ;
2018-11-20 06:40:43 -05:00
}
2018-11-22 14:51:53 -05:00
/ * *
* Displays the files in HTML for web apps .
*
* @ param { File [ ] } files
* @ returns { html }
* /
async present ( files ) {
return await Utils . displayFilesAsHTML ( files ) ;
}
/ * *
* Walks a MIME document and returns an array of Mime data and header objects .
*
* @ param { string } input
* @ param { object } header
* @ returns { object [ ] }
* /
2018-11-23 15:00:34 -05:00
static walkMime ( input , header , rn ) {
let new _line _length = rn ? 2 : 1 ;
let output _sections = [ ] ;
2018-11-25 23:04:07 -05:00
if ( header . hasOwnProperty ( "content-type" ) && header [ "content-type" ] [ 0 ] . startsWith ( "multipart/" ) ) {
let contType = ParseIMF . decodeComplexField ( header [ "content-type" ] [ 0 ] ) ;
2018-11-23 15:00:34 -05:00
let content _boundary = null ;
2018-11-25 23:04:07 -05:00
if ( contType . hasOwnProperty ( "boundary" ) ) {
content _boundary = contType . boundary ;
2018-11-23 15:00:34 -05:00
}
2018-11-25 23:04:07 -05:00
let mime _parts = ParseIMF . splitMultipart ( input , content _boundary , new _line _length ) ;
2018-11-23 23:43:06 -05:00
mime _parts . forEach ( function ( mime _part ) {
let headerBody = ParseIMF . splitHeaderFromBody ( mime _part ) ;
let headerArray = ParseIMF . parseHeader ( headerBody [ 0 ] ) ;
let parts = ParseIMF . walkMime ( headerBody [ 1 ] , headerArray , rn ) ;
parts . forEach ( function ( part ) {
output _sections . push ( part ) ;
} ) ;
2018-11-23 15:00:34 -05:00
} ) ;
} else if ( header . hasOwnProperty ( "content-type" ) && header . hasOwnProperty ( "content-transfer-encoding" ) ) {
2018-11-25 23:04:07 -05:00
let contType = null , fileName = null , charEnc = null , contTran = null ;
let contDispoObj = header . hasOwnProperty ( "content-disposition" ) ? ParseIMF . decodeComplexField ( header [ "content-disposition" ] [ 0 ] ) : null ;
let contTypeObj = ParseIMF . decodeComplexField ( header [ "content-type" ] [ 0 ] ) ;
let contEncObj = ParseIMF . decodeComplexField ( header [ "content-transfer-encoding" ] [ 0 ] ) ;
if ( contDispoObj != null && contDispoObj . hasOwnProperty ( "filename" ) ) {
fileName = contDispoObj . filename ;
}
if ( contTypeObj != null ) {
if ( contTypeObj . hasOwnProperty ( "value" ) ) {
contType = contTypeObj . value [ 0 ] ;
}
if ( contTypeObj . hasOwnProperty ( "charset" ) ) {
charEnc = contTypeObj . charset ;
}
if ( fileName == null && contTypeObj . hasOwnProperty ( "name" ) ) {
fileName = contTypeObj . name ;
}
}
if ( contEncObj != null && contEncObj . hasOwnProperty ( "value" ) ) {
contTran = contEncObj . value [ 0 ] ;
2018-11-23 23:43:06 -05:00
}
2018-11-25 23:04:07 -05:00
if ( contTran != null ) {
input = ParseIMF . decodeMimeData ( input , charEnc , contTran ) ;
}
return [ { type : contType , data : input , name : fileName } ] ;
2018-11-23 15:00:34 -05:00
} else {
throw new OperationError ( "Invalid Mime section" ) ;
}
return output _sections ;
2018-11-22 14:51:53 -05:00
}
2018-11-20 22:36:29 -05:00
/ * *
* Breaks the header from the body and returns [ header , body ]
*
* @ param { string } input
* @ returns { string [ ] }
* /
2018-11-21 15:26:09 -05:00
static splitHeaderFromBody ( input ) {
const emlRegex = /^([\x20-\xff\n\r\t]+?)(?:\r?\n){2}([\x20-\xff\t\n\r]*)/ ;
2018-11-20 22:36:29 -05:00
let splitEmail = emlRegex . exec ( input ) ;
if ( splitEmail ) {
//TODO: Array splice vs shift?
splitEmail . shift ( ) ;
return splitEmail ;
}
}
2018-11-20 06:40:43 -05:00
2018-11-20 22:36:29 -05:00
/ * *
* Takes a string and decodes quoted words inside them
* These take the form of = ? utf - 8 ? Q ? Hello ? =
*
* @ param { string } input
* @ returns { string }
* /
2018-11-21 15:26:09 -05:00
static replaceDecodeWord ( input ) {
2018-11-20 22:36:29 -05:00
return input . replace ( /=\?([^?]+)\?(Q|B)\?([^?]+)\?=/g , function ( a , charEnc , contEnc , input ) {
contEnc = ( contEnc === "B" ) ? "base64" : "quoted-printable" ;
2018-11-21 15:26:09 -05:00
if ( contEnc === "quoted-printable" ) {
2018-11-21 23:36:32 -05:00
input = input . replace ( /_/g , " " ) ;
2018-11-21 15:26:09 -05:00
}
2018-11-21 23:36:32 -05:00
return ParseIMF . decodeMimeData ( input , charEnc , contEnc ) ;
2018-11-20 22:36:29 -05:00
} ) ;
}
2018-11-20 06:40:43 -05:00
2018-11-20 22:36:29 -05:00
/ * *
* Breaks a header into a object to be used by other functions .
* It removes any line feeds or carriage returns from the values and
* replaces it with a space .
*
* @ param { string } input
* @ returns { object }
* /
2018-11-21 15:26:09 -05:00
static parseHeader ( input ) {
2018-11-23 23:43:06 -05:00
const sectionRegex = /([A-Za-z-]+):\s+([\x20-\xff\r\n\t]+?)(?=$|\r?\n\S)/g ;
2018-11-20 22:36:29 -05:00
let header = { } , section ;
while ( ( section = sectionRegex . exec ( input ) ) ) {
let fieldName = section [ 1 ] . toLowerCase ( ) ;
2018-11-25 23:04:07 -05:00
let fieldValue = ParseIMF . replaceDecodeWord ( section [ 2 ] . replace ( /\n|\r/g , " " ) ) ;
2018-11-20 22:36:29 -05:00
if ( header [ fieldName ] ) {
header [ fieldName ] . push ( fieldValue ) ;
} else {
header [ fieldName ] = [ fieldValue ] ;
}
}
return header ;
}
2018-11-20 06:40:43 -05:00
2018-11-20 22:36:29 -05:00
/ * *
* Return decoded MIME data given the character encoding and content encoding .
*
* @ param { string } input
* @ param { string } charEnc
* @ param { string } contEnc
* @ returns { string }
* /
2018-11-21 15:26:09 -05:00
static decodeMimeData ( input , charEnc , contEnc ) {
2018-11-23 15:00:34 -05:00
switch ( contEnc ) {
case "base64" :
input = fromBase64 ( input ) ;
break ;
case "quoted-printable" :
input = Utils . byteArrayToUtf8 ( decodeQuotedPrintable ( input ) ) ;
break ;
case "7bit" :
case "8bit" :
default :
break ;
}
if ( charEnc && MIME _FORMAT . hasOwnProperty ( charEnc . toLowerCase ( ) ) ) {
2018-11-21 23:36:32 -05:00
input = cptable . utils . decode ( MIME _FORMAT [ charEnc . toLowerCase ( ) ] , input ) ;
}
return input ;
2018-11-20 22:36:29 -05:00
}
2018-11-20 06:40:43 -05:00
2018-11-25 23:04:07 -05:00
/ * *
*
*
*
*
*
* /
static decodeComplexField ( field ) {
let fieldSplit = field . split ( /;\s+/g ) ;
let retVal = { } ;
fieldSplit . forEach ( function ( item ) {
if ( item . indexOf ( "=" ) >= 0 ) {
let eq = item . indexOf ( "=" ) ;
let kv = null ;
if ( item . length > eq ) {
kv = [ item . substring ( 0 , eq ) , item . substring ( eq + 1 ) . trim ( ) ] ;
} else {
throw OperationError ( "Not a valid header entry" ) ;
}
if ( ( kv [ 1 ] . startsWith ( "\'" ) && kv [ 1 ] . endsWith ( "\'" ) )
|| ( kv [ 1 ] . startsWith ( "\"" ) && kv [ 1 ] . endsWith ( "\"" ) ) ) {
kv [ 1 ] = ( /(['"])(.+)\1/ . exec ( kv [ 1 ] ) ) [ 2 ] ;
}
retVal [ kv [ 0 ] . toLowerCase ( ) ] = kv [ 1 ] ;
} else {
item = item . trim ( ) . toLowerCase ( ) ;
if ( retVal . hasOwnProperty ( "value" ) ) {
retVal . value . push ( item ) ;
} else {
retVal . value = [ item ] ;
}
}
} ) ;
return retVal ;
}
2018-11-20 22:36:29 -05:00
/ * *
*
2018-11-23 23:43:06 -05:00
*
*
*
2018-11-20 22:36:29 -05:00
* /
2018-11-23 23:43:06 -05:00
static splitMultipart ( input , boundary , new _line _length ) {
let output = [ ] ;
let newline = new _line _length === 2 ? "\r\n" : "\n" ;
const boundary _str = "--" . concat ( boundary , newline ) ;
const last = input . indexOf ( "--" . concat ( boundary , "--" , newline ) ) - new _line _length ;
let start = 0 ;
while ( true ) {
let start = input . indexOf ( boundary _str , start ) ;
if ( start >= 0 ) {
start = start + boundary _str . length ;
} else {
break ;
2018-11-20 22:36:29 -05:00
}
2018-11-23 23:43:06 -05:00
let end = input . indexOf ( boundary _str , start ) - new _line _length ;
if ( end > start ) {
output . push ( input . substring ( start , end ) ) ;
} else {
output . push ( input . substring ( start , last ) ) ;
break ;
}
start = end ;
2018-11-20 22:36:29 -05:00
}
2018-11-23 23:43:06 -05:00
return output ;
2018-11-20 22:36:29 -05:00
}
2018-11-20 06:40:43 -05:00
}
export default ParseIMF