2018-11-20 06:40:43 -05:00
/ * *
* @ author bwhitn [ brian . m . whitney @ outlook . com ]
* @ copyright Crown Copyright 2016
* @ license Apache - 2.0
* /
2018-11-20 22:36:29 -05:00
import Operation from "../Operation" ;
import OperationError from "../errors/OperationError" ;
import cptable from "../vendor/js-codepage/cptable.js" ;
import { fromBase64 } from "../lib/Base64" ;
import { decodeQuotedPrintable } from "../lib/QuotedPrintable" ;
import { MIME _FORMAT } from "../lib/ChrEnc" ;
import Utils from "../Utils" ;
2018-11-20 06:40:43 -05:00
2018-11-20 22:36:29 -05:00
// TODO: fix function header
/ * *
* Return the conetent encoding for a mime section from a header object .
* CONTENT _TYPE returns the content type of a mime header from a header object .
* Returns the filename from a mime header object .
* Returns the boundary value for the mime section from a header object .
* @ constant
* @ default
* /
2018-11-23 23:43:06 -05:00
const FILE _TYPE _SUFFIX = {
"text/plain" : "txt" ,
"text/html" : "htm" ,
"application/rtf" : "rtf" ,
2018-11-20 22:36:29 -05:00
}
2018-11-20 06:40:43 -05:00
2018-11-20 22:36:29 -05:00
class ParseIMF extends Operation {
2018-11-20 06:40:43 -05:00
2018-11-20 22:36:29 -05:00
/ * *
* Internet MessageFormat constructor
* /
constructor ( ) {
super ( ) ;
this . name = "Parse Internet Message Format" ;
this . module = "Default" ;
this . description = [ "Parser an IMF formatted messages following RFC5322." ,
"<br><br>" ,
"Parses an IMF formated message. These often have the file extention ".eml"e; and contain the email headers and body. The output will be a file list of the headers and mime parts." ,
] . join ( "\n" ) ;
this . infoURL = "https://tools.ietf.org/html/rfc5322" ;
this . inputType = "string" ;
2018-11-21 23:36:32 -05:00
this . outputType = "List<File>" ;
this . presentType = "html" ;
this . args = [
{
"name" : "Decode Quoted Words" ,
"type" : "boolean" ,
"value" : false
}
] ;
2018-11-20 06:40:43 -05:00
}
/ * *
2018-11-20 22:36:29 -05:00
* Basic Email Parser that displays the header and mime sections as files .
2018-11-21 23:36:32 -05:00
* Args 0 boolean decode quoted words
2018-11-20 22:36:29 -05:00
*
* @ param { string } input
* @ param { Object [ ] } args
2018-11-21 23:36:32 -05:00
* @ returns { File [ ] }
2018-11-20 22:36:29 -05:00
* /
run ( input , args ) {
2018-11-23 23:43:06 -05:00
// TODO: need to add Non-Mime emails
// TODO: need to add header info to output
// TODO: no uuencode function. see if we can fix this
// TODO: Need to parse multipart headers better as they are key value pairs separated by a ";\s+".
2018-11-20 22:36:29 -05:00
if ( ! input ) {
2018-11-21 23:36:32 -05:00
return [ ] ;
2018-11-20 22:36:29 -05:00
}
2018-11-21 23:36:32 -05:00
let headerBody = ParseIMF . splitHeaderFromBody ( input ) ;
2018-11-23 23:43:06 -05:00
let headerArray = ParseIMF . parseHeader ( headerBody [ 0 ] ) ;
2018-11-22 14:51:53 -05:00
if ( args [ 0 ] && headerBody . length > 0 ) {
headerBody [ 0 ] = ParseIMF . replaceDecodeWord ( headerBody [ 0 ] ) ;
2018-11-20 22:36:29 -05:00
}
2018-11-23 15:00:34 -05:00
let retfiles = ParseIMF . walkMime ( headerBody [ 1 ] , headerArray , input . indexOf ( "\r" ) >= 0 ) ;
2018-11-21 23:36:32 -05:00
let retval = [ ] ;
2018-11-23 23:43:06 -05:00
retfiles . forEach ( function ( fileObj ) {
let file = null ;
if ( fileObj . name !== null ) {
file = new File ( [ fileObj . data ] , fileObj . name , { type : fileObj . type } ) ;
} else {
let name = ParseIMF . replaceDecodeWord ( headerArray [ "subject" ] [ 0 ] ) . concat ( "." ) ;
if ( fileObj . type in FILE _TYPE _SUFFIX ) {
name = name . concat ( FILE _TYPE _SUFFIX [ fileObj . type ] ) ;
} else {
name = name . concat ( "bin" ) ;
}
file = new File ( [ fileObj . data ] , name , { type : fileObj . type } ) ;
}
2018-11-21 23:36:32 -05:00
retval . push ( file ) ;
} ) ;
return retval ;
2018-11-20 06:40:43 -05:00
}
2018-11-22 14:51:53 -05:00
/ * *
* Displays the files in HTML for web apps .
*
* @ param { File [ ] } files
* @ returns { html }
* /
async present ( files ) {
return await Utils . displayFilesAsHTML ( files ) ;
}
/ * *
* Walks a MIME document and returns an array of Mime data and header objects .
*
* @ param { string } input
* @ param { object } header
* @ returns { object [ ] }
* /
2018-11-23 15:00:34 -05:00
static walkMime ( input , header , rn ) {
let new _line _length = rn ? 2 : 1 ;
const content _type _reg = /([^;]+);\s+boundary\=(['"])(.+?)\2/g ;
const inner _content _type _reg = /^([^;]+);\s+type\=(['"])(.+?)\2;\s+boundary\=(['"])(.+?)\4/g ;
let output _sections = [ ] ;
if ( header . hasOwnProperty ( "mime-version" ) || ( header . hasOwnProperty ( "content-type" ) && header [ "content-type" ] [ 0 ] . startsWith ( "multipart/" ) ) ) {
let content _boundary = null ;
let idx = 3 ;
if ( header [ "content-type" ] [ 0 ] . indexOf ( "type=" ) > 0 ) {
content _boundary = inner _content _type _reg . exec ( header [ "content-type" ] [ 0 ] ) ;
idx = 5 ;
} else {
content _boundary = content _type _reg . exec ( header [ "content-type" ] [ 0 ] ) ;
}
2018-11-23 23:43:06 -05:00
let mime _parts = ParseIMF . splitMultipart ( input , content _boundary [ idx ] , new _line _length ) ;
mime _parts . forEach ( function ( mime _part ) {
let headerBody = ParseIMF . splitHeaderFromBody ( mime _part ) ;
let headerArray = ParseIMF . parseHeader ( headerBody [ 0 ] ) ;
let parts = ParseIMF . walkMime ( headerBody [ 1 ] , headerArray , rn ) ;
parts . forEach ( function ( part ) {
output _sections . push ( part ) ;
} ) ;
2018-11-23 15:00:34 -05:00
} ) ;
} else if ( header . hasOwnProperty ( "content-type" ) && header . hasOwnProperty ( "content-transfer-encoding" ) ) {
2018-11-23 23:43:06 -05:00
let contType = null ;
let dataValue = null ;
let fileName = null ;
let charEnc = null ;
// TODO: if there is no content disposition filename try content type name.
if ( header . hasOwnProperty ( "content-disposition" ) ) {
const cont _disp = /^([^;]+);.*?filename\=(['"]?)(.+?)\2$/g ;
let dispo = cont _disp . exec ( header [ "content-disposition" ] [ 0 ] ) ;
// TODO: Remove path if it contains it.
fileName = dispo [ 3 ] ;
const cont _type _file = /^([^;]+);\s+name\=(["']?)(.+?)\2$/g ;
let content = cont _type _file . exec ( header [ "content-type" ] [ 0 ] ) ;
let contType = content [ 1 ] ;
} else {
const cont _type _data _reg = /^([^;]+);\s+charset\=(['"]?)(.+?)\2$/g ;
let content = cont _type _data _reg . exec ( header [ "content-type" ] [ 0 ] ) ;
contType = content [ 1 ] ;
charEnc = content [ 3 ]
}
dataValue = ParseIMF . decodeMimeData ( input , charEnc , header [ "content-transfer-encoding" ] [ 0 ] ) ;
return [ { type : contType , data : dataValue , name : fileName } ] ;
2018-11-23 15:00:34 -05:00
} else {
throw new OperationError ( "Invalid Mime section" ) ;
}
return output _sections ;
2018-11-22 14:51:53 -05:00
}
2018-11-20 22:36:29 -05:00
/ * *
* Breaks the header from the body and returns [ header , body ]
*
* @ param { string } input
* @ returns { string [ ] }
* /
2018-11-21 15:26:09 -05:00
static splitHeaderFromBody ( input ) {
const emlRegex = /^([\x20-\xff\n\r\t]+?)(?:\r?\n){2}([\x20-\xff\t\n\r]*)/ ;
2018-11-20 22:36:29 -05:00
let splitEmail = emlRegex . exec ( input ) ;
if ( splitEmail ) {
//TODO: Array splice vs shift?
splitEmail . shift ( ) ;
return splitEmail ;
}
}
2018-11-20 06:40:43 -05:00
2018-11-20 22:36:29 -05:00
/ * *
* Takes a string and decodes quoted words inside them
* These take the form of = ? utf - 8 ? Q ? Hello ? =
*
* @ param { string } input
* @ returns { string }
* /
2018-11-21 15:26:09 -05:00
static replaceDecodeWord ( input ) {
2018-11-20 22:36:29 -05:00
return input . replace ( /=\?([^?]+)\?(Q|B)\?([^?]+)\?=/g , function ( a , charEnc , contEnc , input ) {
contEnc = ( contEnc === "B" ) ? "base64" : "quoted-printable" ;
2018-11-21 15:26:09 -05:00
if ( contEnc === "quoted-printable" ) {
2018-11-21 23:36:32 -05:00
input = input . replace ( /_/g , " " ) ;
2018-11-21 15:26:09 -05:00
}
2018-11-21 23:36:32 -05:00
return ParseIMF . decodeMimeData ( input , charEnc , contEnc ) ;
2018-11-20 22:36:29 -05:00
} ) ;
}
2018-11-20 06:40:43 -05:00
2018-11-20 22:36:29 -05:00
/ * *
* Breaks a header into a object to be used by other functions .
* It removes any line feeds or carriage returns from the values and
* replaces it with a space .
*
* @ param { string } input
* @ returns { object }
* /
2018-11-21 15:26:09 -05:00
static parseHeader ( input ) {
2018-11-23 23:43:06 -05:00
const sectionRegex = /([A-Za-z-]+):\s+([\x20-\xff\r\n\t]+?)(?=$|\r?\n\S)/g ;
2018-11-20 22:36:29 -05:00
let header = { } , section ;
while ( ( section = sectionRegex . exec ( input ) ) ) {
let fieldName = section [ 1 ] . toLowerCase ( ) ;
let fieldValue = section [ 2 ] . replace ( /\n|\r/g , " " ) ;
if ( header [ fieldName ] ) {
header [ fieldName ] . push ( fieldValue ) ;
} else {
header [ fieldName ] = [ fieldValue ] ;
}
}
return header ;
}
2018-11-20 06:40:43 -05:00
2018-11-20 22:36:29 -05:00
/ * *
* Return decoded MIME data given the character encoding and content encoding .
*
* @ param { string } input
* @ param { string } charEnc
* @ param { string } contEnc
* @ returns { string }
* /
2018-11-21 15:26:09 -05:00
static decodeMimeData ( input , charEnc , contEnc ) {
2018-11-23 15:00:34 -05:00
switch ( contEnc ) {
case "base64" :
input = fromBase64 ( input ) ;
break ;
case "quoted-printable" :
input = Utils . byteArrayToUtf8 ( decodeQuotedPrintable ( input ) ) ;
break ;
case "7bit" :
case "8bit" :
default :
break ;
}
if ( charEnc && MIME _FORMAT . hasOwnProperty ( charEnc . toLowerCase ( ) ) ) {
2018-11-21 23:36:32 -05:00
input = cptable . utils . decode ( MIME _FORMAT [ charEnc . toLowerCase ( ) ] , input ) ;
}
return input ;
2018-11-20 22:36:29 -05:00
}
2018-11-20 06:40:43 -05:00
2018-11-20 22:36:29 -05:00
/ * *
*
2018-11-23 23:43:06 -05:00
*
*
*
2018-11-20 22:36:29 -05:00
* /
2018-11-23 23:43:06 -05:00
static splitMultipart ( input , boundary , new _line _length ) {
let output = [ ] ;
let newline = new _line _length === 2 ? "\r\n" : "\n" ;
const boundary _str = "--" . concat ( boundary , newline ) ;
const last = input . indexOf ( "--" . concat ( boundary , "--" , newline ) ) - new _line _length ;
let start = 0 ;
while ( true ) {
let start = input . indexOf ( boundary _str , start ) ;
if ( start >= 0 ) {
start = start + boundary _str . length ;
} else {
break ;
2018-11-20 22:36:29 -05:00
}
2018-11-23 23:43:06 -05:00
let end = input . indexOf ( boundary _str , start ) - new _line _length ;
if ( end > start ) {
output . push ( input . substring ( start , end ) ) ;
} else {
output . push ( input . substring ( start , last ) ) ;
break ;
}
start = end ;
2018-11-20 22:36:29 -05:00
}
2018-11-23 23:43:06 -05:00
return output ;
2018-11-20 22:36:29 -05:00
}
2018-11-20 06:40:43 -05:00
}
export default ParseIMF