2018-09-24 22:47:00 +02:00
/ * *
* @ author edouard hinard [ ]
* @ copyright Crown Copyright 2018
* @ license Apache - 2.0
* /
import Operation from "../Operation" ;
2018-09-27 23:17:57 +02:00
const LANGUAGES = {
2018-10-02 20:55:12 +02:00
"C" : "c" ,
"Go" : "go" ,
2018-09-27 23:17:57 +02:00
"Python" : "python" ,
} ;
2018-09-24 22:47:00 +02:00
/ * *
2018-09-27 23:17:57 +02:00
* To Byte String Literal operation
2018-09-24 22:47:00 +02:00
* /
2018-09-27 23:17:57 +02:00
class ToByteStringLiteral extends Operation {
2018-09-24 22:47:00 +02:00
/ * *
2018-09-27 23:17:57 +02:00
* ToByteStringLiteral constructor
2018-09-24 22:47:00 +02:00
* /
constructor ( ) {
super ( ) ;
2018-09-27 23:17:57 +02:00
this . name = "To Byte String Literal" ;
2018-09-24 22:47:00 +02:00
this . module = "Default" ;
2018-09-27 23:17:57 +02:00
this . description = "Converts the input data to byte string literal in common languages.<br><br>e.g. for python, the UTF-8 encoded string <code>ça ma couté 20€</code> becomes <code>b'\\xc3\\xa7a ma cout\\xc3\\xa9 20\\xe2\\x82\\xac'</code>" ;
2018-09-24 22:47:00 +02:00
this . inputType = "ArrayBuffer" ;
2018-10-02 20:55:12 +02:00
this . outputType = "string" ; this . infoURL = "https://en.wikipedia.org/wiki/String_(computer_science)#Non-text_strings" ;
2018-09-27 23:17:57 +02:00
this . args = [
{
"name" : "Language" ,
"type" : "option" ,
"value" : Object . keys ( LANGUAGES )
} ,
] ;
2018-09-24 22:47:00 +02:00
}
/ * *
* @ param { ArrayBuffer } input
* @ param { Object [ ] } args
* @ returns { string }
* /
run ( input , args ) {
2018-09-25 21:55:43 +02:00
const data = new Uint8Array ( input ) ;
2018-10-02 20:55:12 +02:00
const language = LANGUAGES [ args [ 0 ] ] ;
if ( language === "c" ) {
const sequences = Object . assign ( DOUBLEQUOTE _SEQUENCE , COMMON _SEQUENCES , C _EXTRA _SEQUENCES ) ;
// regex is here to replace \xa7a by \xa7""a since escape sequence can have more than 2 digit
return '"' + this . escape ( data , sequences ) . replace ( /(\\x[0-9a-f]{2})([0-9a-f])/gi , '$1""$2' ) + '"' ;
} else if ( language === "go" ) {
const sequences = Object . assign ( DOUBLEQUOTE _SEQUENCE , COMMON _SEQUENCES , GO _EXTRA _SEQUENCES ) ;
return '([]byte)("' + this . escape ( data , sequences ) + '")' ;
} else if ( language === "python" ) {
const [ quote , quoteSequence ] = this . preferedQuote ( data ) ;
const sequences = Object . assign ( quoteSequence , COMMON _SEQUENCES , PYTHON _EXTRA _SEQUENCES ) ;
return "b" + quote + this . escape ( data , sequences ) + quote ;
2018-09-27 23:17:57 +02:00
}
return "" ;
}
/ * *
* @ param { Uint8Array } data
2018-10-02 20:55:12 +02:00
* @ returns { bool }
* python and javascript can use single or double quote equally
* better use the variant that reduce quote escape
2018-09-27 23:17:57 +02:00
* /
2018-10-02 20:55:12 +02:00
preferedQuote ( data ) {
let onlySingleQuoteInData = false ;
2018-09-25 21:55:43 +02:00
for ( let i = 0 ; i < data . length ; i ++ ) {
2018-10-02 20:55:12 +02:00
if ( data [ i ] === '"' . charCodeAt ( 0 ) ) {
onlySingleQuoteInData = false ;
2018-09-24 22:47:00 +02:00
break ;
}
2018-10-02 20:55:12 +02:00
if ( data [ i ] === "'" . charCodeAt ( 0 ) ) {
onlySingleQuoteInData = true ;
2018-09-24 22:47:00 +02:00
}
}
2018-10-02 20:55:12 +02:00
if ( onlySingleQuoteInData ) {
return [ '"' , DOUBLEQUOTE _SEQUENCE ] ;
2018-09-24 22:47:00 +02:00
}
2018-10-02 20:55:12 +02:00
return [ "'" , SINGLEQUOTE _SEQUENCE ] ;
}
2018-09-24 22:47:00 +02:00
2018-10-02 20:55:12 +02:00
/ * *
* @ param { Uint8Array } data
* @ param { object } sequences
* @ returns { string }
* /
escape ( data , sequences ) {
2018-09-24 22:47:00 +02:00
let output = "" ;
2018-09-25 21:55:43 +02:00
for ( let i = 0 ; i < data . length ; i ++ ) {
2018-10-02 20:55:12 +02:00
output +=
sequences [ data [ i ] ] ||
(
( data [ i ] < 0x20 || data [ i ] > 0x7e ) ?
"\\x" + data [ i ] . toString ( 16 ) . padStart ( 2 , 0 ) :
String . fromCharCode ( data [ i ] )
) ;
2018-09-24 22:47:00 +02:00
}
2018-10-02 20:55:12 +02:00
return output ;
2018-09-24 22:47:00 +02:00
}
}
2018-10-02 20:55:12 +02:00
const SINGLEQUOTE _SEQUENCE = {
0x27 : "\\'"
} ;
const DOUBLEQUOTE _SEQUENCE = {
0x22 : '\\"'
} ;
const COMMON _SEQUENCES = {
0x08 : "\\b" ,
0x09 : "\\t" ,
0x0a : "\\n" ,
0x0b : "\\v" ,
0x0c : "\\f" ,
0x0d : "\\r" ,
0x5c : "\\\\"
} ;
// https://en.wikipedia.org/wiki/Escape_sequences_in_C
const C _EXTRA _SEQUENCES = {
0x07 : "\\a"
} ;
// https://golang.org/ref/spec#Rune_literals
const GO _EXTRA _SEQUENCES = {
0x07 : "\\a"
} ;
// https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
const PYTHON _EXTRA _SEQUENCES = {
0x07 : "\\a"
} ;
2018-09-27 23:17:57 +02:00
export default ToByteStringLiteral ;