2017-03-23 17:52:20 +00:00
import Utils from "../Utils.js" ;
2017-03-06 12:45:51 +00:00
2016-11-28 10:42:58 +00:00
/ * *
* String utility operations .
*
* @ author n1474335 [ n1474335 @ gmail . com ]
* @ copyright Crown Copyright 2016
* @ license Apache - 2.0
*
* @ namespace
* /
2017-03-23 17:52:20 +00:00
const StrUtils = {
2016-11-28 10:42:58 +00:00
/ * *
* @ constant
* @ default
* /
CASE _SCOPE : [ "All" , "Word" , "Sentence" , "Paragraph" ] ,
2017-02-09 15:09:33 +00:00
2016-11-28 10:42:58 +00:00
/ * *
* To Upper case operation .
*
* @ param { string } input
* @ param { Object [ ] } args
* @ returns { string }
* /
2017-01-31 18:24:56 +00:00
runUpper : function ( input , args ) {
2017-04-13 18:08:50 +01:00
const scope = args [ 0 ] ;
2017-02-09 15:09:33 +00:00
2016-11-28 10:42:58 +00:00
switch ( scope ) {
case "Word" :
return input . replace ( /(\b\w)/gi , function ( m ) {
return m . toUpperCase ( ) ;
} ) ;
case "Sentence" :
return input . replace ( /(?:\.|^)\s*(\b\w)/gi , function ( m ) {
return m . toUpperCase ( ) ;
} ) ;
case "Paragraph" :
return input . replace ( /(?:\n|^)\s*(\b\w)/gi , function ( m ) {
return m . toUpperCase ( ) ;
} ) ;
case "All" :
/* falls through */
default :
return input . toUpperCase ( ) ;
}
} ,
2017-02-09 15:09:33 +00:00
2016-11-28 10:42:58 +00:00
/ * *
* To Upper case operation .
*
* @ param { string } input
* @ param { Object [ ] } args
* @ returns { string }
* /
2017-01-31 18:24:56 +00:00
runLower : function ( input , args ) {
2016-11-28 10:42:58 +00:00
return input . toLowerCase ( ) ;
} ,
2017-02-09 15:09:33 +00:00
2016-11-28 10:42:58 +00:00
/ * *
* @ constant
* @ default
* /
2018-01-17 15:52:25 +01:00
SPLIT _DELIM _OPTIONS : [
{ name : "Line feed" , value : "\\n" } ,
{ name : "CRLF" , value : "\\r\\n" } ,
{ name : "Space" , value : " " } ,
{ name : "Comma" , value : "," } ,
{ name : "Semi-colon" , value : ";" } ,
{ name : "Colon" , value : ":" } ,
{ name : "Nothing (separate chars)" , value : "" }
] ,
2016-11-28 10:42:58 +00:00
/ * *
* @ constant
* @ default
* /
2018-01-17 15:52:25 +01:00
JOIN _DELIM _OPTIONS : [
{ name : "Line feed" , value : "\\n" } ,
{ name : "CRLF" , value : "\\r\\n" } ,
{ name : "Space" , value : " " } ,
{ name : "Comma" , value : "," } ,
{ name : "Semi-colon" , value : ";" } ,
{ name : "Colon" , value : ":" } ,
{ name : "Nothing (join chars)" , value : "" }
] ,
2017-02-09 15:09:33 +00:00
2016-11-28 10:42:58 +00:00
/ * *
* Split operation .
*
* @ param { string } input
* @ param { Object [ ] } args
* @ returns { string }
* /
2017-01-31 18:24:56 +00:00
runSplit : function ( input , args ) {
2018-01-17 15:52:25 +01:00
let splitDelim = args [ 0 ] ,
joinDelim = args [ 1 ] ,
2017-01-31 18:24:56 +00:00
sections = input . split ( splitDelim ) ;
2017-02-09 15:09:33 +00:00
2017-01-31 18:24:56 +00:00
return sections . join ( joinDelim ) ;
2016-11-28 10:42:58 +00:00
} ,
2016-12-17 01:53:06 +01:00
2016-12-23 14:36:16 +00:00
2016-12-17 01:53:06 +01:00
/ * *
2016-12-23 14:36:16 +00:00
* Filter operation .
2016-12-17 01:53:06 +01:00
*
* @ author Mikescher ( https : //github.com/Mikescher | https://mikescher.com)
* @ param { string } input
* @ param { Object [ ] } args
* @ returns { string }
* /
2017-01-31 18:24:56 +00:00
runFilter : function ( input , args ) {
2017-04-13 18:08:50 +01:00
let delim = Utils . charRep [ args [ 0 ] ] ,
2017-04-13 18:31:26 +01:00
regex ,
2016-12-23 14:36:16 +00:00
reverse = args [ 2 ] ;
2016-12-17 01:53:06 +01:00
try {
2017-04-13 18:31:26 +01:00
regex = new RegExp ( args [ 1 ] ) ;
2016-12-17 01:53:06 +01:00
} catch ( err ) {
return "Invalid regex. Details: " + err . message ;
}
2017-04-13 18:08:50 +01:00
const regexFilter = function ( value ) {
2016-12-17 01:53:06 +01:00
return reverse ^ regex . test ( value ) ;
2016-12-23 14:36:16 +00:00
} ;
2016-12-17 01:53:06 +01:00
2017-01-31 18:24:56 +00:00
return input . split ( delim ) . filter ( regexFilter ) . join ( delim ) ;
2016-12-17 01:53:06 +01:00
} ,
2017-02-09 15:09:33 +00:00
2016-11-28 10:42:58 +00:00
/ * *
* @ constant
* @ default
* /
OFF _CHK _SAMPLE _DELIMITER : "\\n\\n" ,
2017-02-09 15:09:33 +00:00
2016-11-28 10:42:58 +00:00
/ * *
* Offset checker operation .
*
* @ param { string } input
* @ param { Object [ ] } args
* @ returns { html }
* /
2017-01-31 18:24:56 +00:00
runOffsetChecker : function ( input , args ) {
2017-04-13 18:08:50 +01:00
let sampleDelim = args [ 0 ] ,
2017-01-31 18:24:56 +00:00
samples = input . split ( sampleDelim ) ,
2017-05-05 15:54:59 +00:00
outputs = new Array ( samples . length ) ,
2016-11-28 10:42:58 +00:00
i = 0 ,
s = 0 ,
match = false ,
2017-01-31 18:24:56 +00:00
inMatch = false ,
2016-11-28 10:42:58 +00:00
chr ;
2017-02-09 15:09:33 +00:00
2016-11-28 10:42:58 +00:00
if ( ! samples || samples . length < 2 ) {
return "Not enough samples, perhaps you need to modify the sample delimiter or add more data?" ;
}
2017-02-09 15:09:33 +00:00
2016-11-28 10:42:58 +00:00
// Initialise output strings
2017-04-13 18:43:38 +01:00
outputs . fill ( "" , 0 , samples . length ) ;
2017-02-09 15:09:33 +00:00
2016-11-28 10:42:58 +00:00
// Loop through each character in the first sample
for ( i = 0 ; i < samples [ 0 ] . length ; i ++ ) {
chr = samples [ 0 ] [ i ] ;
match = false ;
2017-02-09 15:09:33 +00:00
2016-11-28 10:42:58 +00:00
// Loop through each sample to see if the chars are the same
for ( s = 1 ; s < samples . length ; s ++ ) {
2016-12-14 16:39:17 +00:00
if ( samples [ s ] [ i ] !== chr ) {
2016-11-28 10:42:58 +00:00
match = false ;
break ;
}
match = true ;
}
2017-02-09 15:09:33 +00:00
2016-11-28 10:42:58 +00:00
// Write output for each sample
for ( s = 0 ; s < samples . length ; s ++ ) {
if ( samples [ s ] . length <= i ) {
2017-01-31 18:24:56 +00:00
if ( inMatch ) outputs [ s ] += "</span>" ;
if ( s === samples . length - 1 ) inMatch = false ;
2016-11-28 10:42:58 +00:00
continue ;
}
2017-02-09 15:09:33 +00:00
2017-01-31 18:24:56 +00:00
if ( match && ! inMatch ) {
2017-07-17 13:19:08 +00:00
outputs [ s ] += "<span class='hl5'>" + Utils . escapeHtml ( samples [ s ] [ i ] ) ;
2016-12-14 16:39:17 +00:00
if ( samples [ s ] . length === i + 1 ) outputs [ s ] += "</span>" ;
2017-01-31 18:24:56 +00:00
if ( s === samples . length - 1 ) inMatch = true ;
} else if ( ! match && inMatch ) {
outputs [ s ] += "</span>" + Utils . escapeHtml ( samples [ s ] [ i ] ) ;
if ( s === samples . length - 1 ) inMatch = false ;
2016-11-28 10:42:58 +00:00
} else {
2017-01-31 18:24:56 +00:00
outputs [ s ] += Utils . escapeHtml ( samples [ s ] [ i ] ) ;
if ( inMatch && samples [ s ] . length === i + 1 ) {
2016-11-28 10:42:58 +00:00
outputs [ s ] += "</span>" ;
2017-01-31 18:24:56 +00:00
if ( samples [ s ] . length - 1 !== i ) inMatch = false ;
2016-11-28 10:42:58 +00:00
}
}
2017-02-09 15:09:33 +00:00
2016-12-14 16:39:17 +00:00
if ( samples [ 0 ] . length - 1 === i ) {
2017-01-31 18:24:56 +00:00
if ( inMatch ) outputs [ s ] += "</span>" ;
outputs [ s ] += Utils . escapeHtml ( samples [ s ] . substring ( i + 1 ) ) ;
2016-11-28 10:42:58 +00:00
}
}
}
2017-02-09 15:09:33 +00:00
2017-01-31 18:24:56 +00:00
return outputs . join ( sampleDelim ) ;
2016-11-28 10:42:58 +00:00
} ,
2017-02-09 15:09:33 +00:00
2016-11-28 10:42:58 +00:00
/ * *
2017-08-04 11:50:45 +01:00
* @ constant
* @ default
* /
ESCAPE _REPLACEMENTS : [
{ "escaped" : "\\\\" , "unescaped" : "\\" } , // Must be first
{ "escaped" : "\\'" , "unescaped" : "'" } ,
{ "escaped" : "\\\"" , "unescaped" : "\"" } ,
{ "escaped" : "\\n" , "unescaped" : "\n" } ,
{ "escaped" : "\\r" , "unescaped" : "\r" } ,
2017-08-07 16:08:50 +01:00
{ "escaped" : "\\t" , "unescaped" : "\t" } ,
{ "escaped" : "\\b" , "unescaped" : "\b" } ,
{ "escaped" : "\\f" , "unescaped" : "\f" } ,
2017-08-04 11:50:45 +01:00
] ,
/ * *
2017-08-15 17:29:48 +00:00
* Escape string operation .
2017-08-04 11:50:45 +01:00
*
* @ author Vel0x [ dalemy @ microsoft . com ]
*
* @ param { string } input
* @ param { Object [ ] } args
* @ returns { string }
2017-08-04 15:54:00 +01:00
*
* @ example
* StrUtils . runUnescape ( "Don't do that" , [ ] )
* > "Don\'t do that"
* StrUtils . runUnescape ( ` Hello
* World ` , [])
* > "Hello\nWorld"
2017-08-04 11:50:45 +01:00
* /
runEscape : function ( input , args ) {
2017-08-04 15:54:00 +01:00
return StrUtils . _replaceByKeys ( input , "unescaped" , "escaped" ) ;
2017-08-04 11:50:45 +01:00
} ,
2017-08-15 17:29:48 +00:00
2017-08-04 11:50:45 +01:00
/ * *
2017-08-15 17:29:48 +00:00
* Unescape string operation .
2017-08-04 11:50:45 +01:00
*
* @ author Vel0x [ dalemy @ microsoft . com ]
2016-11-28 10:42:58 +00:00
*
* @ param { string } input
* @ param { Object [ ] } args
* @ returns { string }
2017-08-04 15:54:00 +01:00
*
* @ example
* StrUtils . runUnescape ( "Don\'t do that" , [ ] )
* > "Don't do that"
* StrUtils . runUnescape ( "Hello\nWorld" , [ ] )
* > ` Hello
* World `
2016-11-28 10:42:58 +00:00
* /
2017-08-04 11:50:45 +01:00
runUnescape : function ( input , args ) {
2017-08-07 16:08:50 +01:00
return StrUtils . _replaceByKeys ( input , "escaped" , "unescaped" ) ;
2016-11-28 10:42:58 +00:00
} ,
2017-08-04 11:50:45 +01:00
2017-08-15 17:29:48 +00:00
2017-08-04 11:50:45 +01:00
/ * *
* Replaces all matching tokens in ESCAPE _REPLACEMENTS with the correction . The
2017-08-15 17:29:48 +00:00
* ordering is determined by the patternKey and the replacementKey .
2017-08-04 11:50:45 +01:00
*
* @ author Vel0x [ dalemy @ microsoft . com ]
* @ author Matt C [ matt @ artemisbot . uk ]
*
* @ param { string } input
* @ param { string } pattern _key
* @ param { string } replacement _key
* @ returns { string }
* /
_replaceByKeys : function ( input , patternKey , replacementKey ) {
let output = input ;
2017-08-15 17:29:48 +00:00
// Catch the \\x encoded characters
if ( patternKey === "escaped" ) output = Utils . parseEscapedChars ( input ) ;
2017-08-04 15:54:00 +01:00
StrUtils . ESCAPE _REPLACEMENTS . forEach ( replacement => {
2017-08-04 11:50:45 +01:00
output = output . split ( replacement [ patternKey ] ) . join ( replacement [ replacementKey ] ) ;
2017-08-04 15:54:00 +01:00
} ) ;
2017-08-04 11:50:45 +01:00
return output ;
2016-11-28 10:42:58 +00:00
} ,
2017-02-09 15:09:33 +00:00
2017-04-23 18:05:00 +01:00
/ * *
* Head lines operation .
*
* @ param { string } input
* @ param { Object [ ] } args
* @ returns { string }
* /
runHead : function ( input , args ) {
let delimiter = args [ 0 ] ,
number = args [ 1 ] ;
delimiter = Utils . charRep [ delimiter ] ;
2017-05-02 23:06:28 +01:00
const splitInput = input . split ( delimiter ) ;
2017-04-23 18:05:00 +01:00
return splitInput
2017-07-24 13:49:16 +00:00
. filter ( ( line , lineIndex ) => {
lineIndex += 1 ;
2017-04-23 18:05:00 +01:00
2017-07-24 13:49:16 +00:00
if ( number < 0 ) {
return lineIndex <= splitInput . length + number ;
} else {
return lineIndex <= number ;
}
} )
. join ( delimiter ) ;
2017-04-23 18:05:00 +01:00
} ,
/ * *
* Tail lines operation .
*
* @ param { string } input
* @ param { Object [ ] } args
* @ returns { string }
* /
runTail : function ( input , args ) {
let delimiter = args [ 0 ] ,
number = args [ 1 ] ;
delimiter = Utils . charRep [ delimiter ] ;
2017-05-02 23:06:28 +01:00
const splitInput = input . split ( delimiter ) ;
2017-04-23 18:05:00 +01:00
return splitInput
2017-07-24 13:49:16 +00:00
. filter ( ( line , lineIndex ) => {
lineIndex += 1 ;
2017-04-23 18:05:00 +01:00
2017-07-24 13:49:16 +00:00
if ( number < 0 ) {
return lineIndex > - number ;
} else {
return lineIndex > splitInput . length - number ;
}
} )
. join ( delimiter ) ;
2017-04-23 18:05:00 +01:00
} ,
2018-01-12 18:17:28 +00:00
/ * *
* @ constant
* @ default
* /
HAMMING _DELIM : "\\n\\n" ,
/ * *
* @ constant
* @ default
* /
HAMMING _INPUT _TYPE : [ "Raw string" , "Hex" ] ,
/ * *
* @ constant
* @ default
* /
HAMMING _UNIT : [ "Byte" , "Bit" ] ,
/ * *
* Hamming Distance operation .
*
* @ author GCHQ Contributor [ 2 ]
*
* @ param { string } input
* @ param { Object [ ] } args
* @ returns { string }
* /
runHamming : function ( input , args ) {
const delim = args [ 0 ] ,
byByte = args [ 1 ] === "Byte" ,
inputType = args [ 2 ] ,
samples = input . split ( delim ) ;
if ( samples . length !== 2 ) {
return "Error: You can only calculae the edit distance between 2 strings. Please ensure exactly two inputs are provided, separated by the specified delimiter." ;
}
if ( samples [ 0 ] . length !== samples [ 1 ] . length ) {
return "Error: Both inputs must be of the same length." ;
}
if ( inputType === "Hex" ) {
samples [ 0 ] = Utils . fromHex ( samples [ 0 ] ) ;
samples [ 1 ] = Utils . fromHex ( samples [ 1 ] ) ;
} else {
samples [ 0 ] = Utils . strToByteArray ( samples [ 0 ] ) ;
samples [ 1 ] = Utils . strToByteArray ( samples [ 1 ] ) ;
}
let dist = 0 ;
for ( let i = 0 ; i < samples [ 0 ] . length ; i ++ ) {
const lhs = samples [ 0 ] [ i ] ,
rhs = samples [ 1 ] [ i ] ;
if ( byByte && lhs !== rhs ) {
dist ++ ;
} else if ( ! byByte ) {
let xord = lhs ^ rhs ;
while ( xord ) {
dist ++ ;
xord &= xord - 1 ;
}
}
}
return dist . toString ( ) ;
} ,
2016-11-28 10:42:58 +00:00
} ;
2017-03-23 17:52:20 +00:00
export default StrUtils ;