From 02ec4a3bfdb89ece796921bda609397e2e10d3a3 Mon Sep 17 00:00:00 2001 From: n1073645 Date: Mon, 18 Nov 2019 13:21:05 +0000 Subject: [PATCH 01/66] ToCaseInsensitiveRegex improvements --- .../operations/ToCaseInsensitiveRegex.mjs | 51 ++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/src/core/operations/ToCaseInsensitiveRegex.mjs b/src/core/operations/ToCaseInsensitiveRegex.mjs index 28bd3dc9..4850846d 100644 --- a/src/core/operations/ToCaseInsensitiveRegex.mjs +++ b/src/core/operations/ToCaseInsensitiveRegex.mjs @@ -32,7 +32,56 @@ class ToCaseInsensitiveRegex extends Operation { * @returns {string} */ run(input, args) { - return input.replace(/[a-z]/ig, m => `[${m.toLowerCase()}${m.toUpperCase()}]`); + /** + * Simulates look behind behaviour since javascript doesn't support it. + * + * @param {string} input + * @returns {string} + */ + function preProcess(input) { + let result = ""; + for (let i = 0; i < input.length; i++) { + const temp = input.charAt(i); + if (temp.match(/[a-zA-Z]/g) && (input.charAt(i-1) !== "-") && (input.charAt(i+1) !== "-")) + result += "[" + temp.toLowerCase() + temp.toUpperCase() + "]"; + else + result += temp; + } + return result; + } + + input = preProcess(input); + + // Example: [a-z] -> [a-zA-Z] + input = input.replace(/[a-z]-[a-z]/g, m => `${m}${m[0].toUpperCase()}-${m[2].toUpperCase()}`); + + // Example: [a-z] -> [a-zA-Z] + input = input.replace(/[A-Z]-[A-Z]/g, m => `${m}${m[0].toLowerCase()}-${m[2].toLowerCase()}`); + + // Example: [H-d] -> [A-DH-dh-z] + input = input.replace(/[A-Z]-[a-z]/g, m => `A-${m[2].toUpperCase()}${m}${m[0].toLowerCase()}-z`); + + // Example: [!-D] -> [!-Da-d] + input = input.replace(/[ -@]-[A-Z]/g, m => `${m}a-${m[2].toLowerCase()}`); + + // Example: [%-^] -> [%-^a-z] + input = input.replace(/[ -@]-[[-`]/g, m => `${m}a-z`); + + // Example: [K-`] -> [K-`k-z] + input = input.replace(/[A-Z]-[[-`]/g, m => `${m}${m[0].toLowerCase()}-z`); + + // Example: [[-}] -> [[-}A-Z] + input = input.replace(/[[-`]-[{-~]/g, m => `${m}A-Z`); + + // Example: [b-}] -> [b-}B-Z] + input = input.replace(/[a-z]-[{-~]/g, m => `${m}${m[0].toUpperCase()}-Z`); + + // Example: [<-j] -> [<-z] + input = input.replace(/[ -@]-[a-z]/g, m => `${m[0]}-z`); + + // Example: [^-j] -> [A-J^-j] + input = input.replace(/[[-`]-[a-z]/g, m => `A-${m[2].toUpperCase()}${m}`); + return input; } } From 40d3c8b071de12afc627ad6654d654fc72873da8 Mon Sep 17 00:00:00 2001 From: n1073645 Date: Mon, 18 Nov 2019 13:31:19 +0000 Subject: [PATCH 02/66] ToCaseInsensitiveRegex improvements --- src/core/operations/ToCaseInsensitiveRegex.mjs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/core/operations/ToCaseInsensitiveRegex.mjs b/src/core/operations/ToCaseInsensitiveRegex.mjs index 4850846d..044c350c 100644 --- a/src/core/operations/ToCaseInsensitiveRegex.mjs +++ b/src/core/operations/ToCaseInsensitiveRegex.mjs @@ -54,31 +54,31 @@ class ToCaseInsensitiveRegex extends Operation { // Example: [a-z] -> [a-zA-Z] input = input.replace(/[a-z]-[a-z]/g, m => `${m}${m[0].toUpperCase()}-${m[2].toUpperCase()}`); - + // Example: [a-z] -> [a-zA-Z] input = input.replace(/[A-Z]-[A-Z]/g, m => `${m}${m[0].toLowerCase()}-${m[2].toLowerCase()}`); - + // Example: [H-d] -> [A-DH-dh-z] input = input.replace(/[A-Z]-[a-z]/g, m => `A-${m[2].toUpperCase()}${m}${m[0].toLowerCase()}-z`); - + // Example: [!-D] -> [!-Da-d] input = input.replace(/[ -@]-[A-Z]/g, m => `${m}a-${m[2].toLowerCase()}`); - + // Example: [%-^] -> [%-^a-z] input = input.replace(/[ -@]-[[-`]/g, m => `${m}a-z`); - + // Example: [K-`] -> [K-`k-z] input = input.replace(/[A-Z]-[[-`]/g, m => `${m}${m[0].toLowerCase()}-z`); - + // Example: [[-}] -> [[-}A-Z] input = input.replace(/[[-`]-[{-~]/g, m => `${m}A-Z`); - + // Example: [b-}] -> [b-}B-Z] input = input.replace(/[a-z]-[{-~]/g, m => `${m}${m[0].toUpperCase()}-Z`); - + // Example: [<-j] -> [<-z] input = input.replace(/[ -@]-[a-z]/g, m => `${m[0]}-z`); - + // Example: [^-j] -> [A-J^-j] input = input.replace(/[[-`]-[a-z]/g, m => `A-${m[2].toUpperCase()}${m}`); return input; From 6d77fe6eb3ae6e1bc72a0c448ae8f680f21ef7d7 Mon Sep 17 00:00:00 2001 From: n1073645 Date: Wed, 20 Nov 2019 09:28:34 +0000 Subject: [PATCH 03/66] Combined two rules into one case insensitive rule --- .../operations/ToCaseInsensitiveRegex.mjs | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/core/operations/ToCaseInsensitiveRegex.mjs b/src/core/operations/ToCaseInsensitiveRegex.mjs index 044c350c..dcf0758b 100644 --- a/src/core/operations/ToCaseInsensitiveRegex.mjs +++ b/src/core/operations/ToCaseInsensitiveRegex.mjs @@ -50,37 +50,36 @@ class ToCaseInsensitiveRegex extends Operation { return result; } + // Example: [test] -> [[tT][eE][sS][tT]] input = preProcess(input); - // Example: [a-z] -> [a-zA-Z] - input = input.replace(/[a-z]-[a-z]/g, m => `${m}${m[0].toUpperCase()}-${m[2].toUpperCase()}`); - - // Example: [a-z] -> [a-zA-Z] - input = input.replace(/[A-Z]-[A-Z]/g, m => `${m}${m[0].toLowerCase()}-${m[2].toLowerCase()}`); + // Example: [A-Z] -> [A-Za-z] + input = input.replace(/[A-Z]-[A-Z]/ig, m => `${m[0].toUpperCase()}-${m[2].toUpperCase()}${m[0].toLowerCase()}-${m[2].toLowerCase()}`); // Example: [H-d] -> [A-DH-dh-z] input = input.replace(/[A-Z]-[a-z]/g, m => `A-${m[2].toUpperCase()}${m}${m[0].toLowerCase()}-z`); // Example: [!-D] -> [!-Da-d] - input = input.replace(/[ -@]-[A-Z]/g, m => `${m}a-${m[2].toLowerCase()}`); + input = input.replace(/\\?[ -@]-[A-Z]/g, m => `${m}a-${m[2].toLowerCase()}`); // Example: [%-^] -> [%-^a-z] - input = input.replace(/[ -@]-[[-`]/g, m => `${m}a-z`); + input = input.replace(/\\?[ -@]-\\?[[-`]/g, m => `${m}a-z`); // Example: [K-`] -> [K-`k-z] - input = input.replace(/[A-Z]-[[-`]/g, m => `${m}${m[0].toLowerCase()}-z`); + input = input.replace(/[A-Z]-\\?[[-`]/g, m => `${m}${m[0].toLowerCase()}-z`); // Example: [[-}] -> [[-}A-Z] - input = input.replace(/[[-`]-[{-~]/g, m => `${m}A-Z`); + input = input.replace(/\\?[[-`]-\\?[{-~]/g, m => `${m}A-Z`); // Example: [b-}] -> [b-}B-Z] - input = input.replace(/[a-z]-[{-~]/g, m => `${m}${m[0].toUpperCase()}-Z`); + input = input.replace(/[a-z]-\\?[{-~]/g, m => `${m}${m[0].toUpperCase()}-Z`); // Example: [<-j] -> [<-z] - input = input.replace(/[ -@]-[a-z]/g, m => `${m[0]}-z`); + input = input.replace(/\\?[ -@]-[a-z]/g, m => `${m[0]}-z`); // Example: [^-j] -> [A-J^-j] - input = input.replace(/[[-`]-[a-z]/g, m => `A-${m[2].toUpperCase()}${m}`); + input = input.replace(/\\?[[-`]-[a-z]/g, m => `A-${m[2].toUpperCase()}${m}`); + return input; } } From 7d41d4d030192844796dcaebfe7e8af7bc946247 Mon Sep 17 00:00:00 2001 From: n1073645 Date: Thu, 21 Nov 2019 09:11:12 +0000 Subject: [PATCH 04/66] Replaced the .replaces in regex operation --- .../operations/ToCaseInsensitiveRegex.mjs | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/core/operations/ToCaseInsensitiveRegex.mjs b/src/core/operations/ToCaseInsensitiveRegex.mjs index dcf0758b..32c14a3c 100644 --- a/src/core/operations/ToCaseInsensitiveRegex.mjs +++ b/src/core/operations/ToCaseInsensitiveRegex.mjs @@ -51,36 +51,35 @@ class ToCaseInsensitiveRegex extends Operation { } // Example: [test] -> [[tT][eE][sS][tT]] - input = preProcess(input); + return preProcess(input) // Example: [A-Z] -> [A-Za-z] - input = input.replace(/[A-Z]-[A-Z]/ig, m => `${m[0].toUpperCase()}-${m[2].toUpperCase()}${m[0].toLowerCase()}-${m[2].toLowerCase()}`); + .replace(/[A-Z]-[A-Z]/ig, m => `${m[0].toUpperCase()}-${m[2].toUpperCase()}${m[0].toLowerCase()}-${m[2].toLowerCase()}`) // Example: [H-d] -> [A-DH-dh-z] - input = input.replace(/[A-Z]-[a-z]/g, m => `A-${m[2].toUpperCase()}${m}${m[0].toLowerCase()}-z`); + .replace(/[A-Z]-[a-z]/g, m => `A-${m[2].toUpperCase()}${m}${m[0].toLowerCase()}-z`) // Example: [!-D] -> [!-Da-d] - input = input.replace(/\\?[ -@]-[A-Z]/g, m => `${m}a-${m[2].toLowerCase()}`); + .replace(/\\?[ -@]-[A-Z]/g, m => `${m}a-${m[2].toLowerCase()}`) // Example: [%-^] -> [%-^a-z] - input = input.replace(/\\?[ -@]-\\?[[-`]/g, m => `${m}a-z`); + .replace(/\\?[ -@]-\\?[[-`]/g, m => `${m}a-z`) // Example: [K-`] -> [K-`k-z] - input = input.replace(/[A-Z]-\\?[[-`]/g, m => `${m}${m[0].toLowerCase()}-z`); + .replace(/[A-Z]-\\?[[-`]/g, m => `${m}${m[0].toLowerCase()}-z`) // Example: [[-}] -> [[-}A-Z] - input = input.replace(/\\?[[-`]-\\?[{-~]/g, m => `${m}A-Z`); + .replace(/\\?[[-`]-\\?[{-~]/g, m => `${m}A-Z`) // Example: [b-}] -> [b-}B-Z] - input = input.replace(/[a-z]-\\?[{-~]/g, m => `${m}${m[0].toUpperCase()}-Z`); + .replace(/[a-z]-\\?[{-~]/g, m => `${m}${m[0].toUpperCase()}-Z`) // Example: [<-j] -> [<-z] - input = input.replace(/\\?[ -@]-[a-z]/g, m => `${m[0]}-z`); + .replace(/\\?[ -@]-[a-z]/g, m => `${m[0]}-z`) // Example: [^-j] -> [A-J^-j] - input = input.replace(/\\?[[-`]-[a-z]/g, m => `A-${m[2].toUpperCase()}${m}`); + .replace(/\\?[[-`]-[a-z]/g, m => `A-${m[2].toUpperCase()}${m}`); - return input; } } From c60ed2c4033dc04327bf9807decd822cd5be99ec Mon Sep 17 00:00:00 2001 From: n1073645 Date: Thu, 21 Nov 2019 09:56:52 +0000 Subject: [PATCH 05/66] Linting on regex operation --- .../operations/ToCaseInsensitiveRegex.mjs | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/core/operations/ToCaseInsensitiveRegex.mjs b/src/core/operations/ToCaseInsensitiveRegex.mjs index 32c14a3c..9467b8c8 100644 --- a/src/core/operations/ToCaseInsensitiveRegex.mjs +++ b/src/core/operations/ToCaseInsensitiveRegex.mjs @@ -53,32 +53,32 @@ class ToCaseInsensitiveRegex extends Operation { // Example: [test] -> [[tT][eE][sS][tT]] return preProcess(input) - // Example: [A-Z] -> [A-Za-z] - .replace(/[A-Z]-[A-Z]/ig, m => `${m[0].toUpperCase()}-${m[2].toUpperCase()}${m[0].toLowerCase()}-${m[2].toLowerCase()}`) + // Example: [A-Z] -> [A-Za-z] + .replace(/[A-Z]-[A-Z]/ig, m => `${m[0].toUpperCase()}-${m[2].toUpperCase()}${m[0].toLowerCase()}-${m[2].toLowerCase()}`) - // Example: [H-d] -> [A-DH-dh-z] - .replace(/[A-Z]-[a-z]/g, m => `A-${m[2].toUpperCase()}${m}${m[0].toLowerCase()}-z`) + // Example: [H-d] -> [A-DH-dh-z] + .replace(/[A-Z]-[a-z]/g, m => `A-${m[2].toUpperCase()}${m}${m[0].toLowerCase()}-z`) - // Example: [!-D] -> [!-Da-d] - .replace(/\\?[ -@]-[A-Z]/g, m => `${m}a-${m[2].toLowerCase()}`) + // Example: [!-D] -> [!-Da-d] + .replace(/\\?[ -@]-[A-Z]/g, m => `${m}a-${m[2].toLowerCase()}`) - // Example: [%-^] -> [%-^a-z] - .replace(/\\?[ -@]-\\?[[-`]/g, m => `${m}a-z`) + // Example: [%-^] -> [%-^a-z] + .replace(/\\?[ -@]-\\?[[-`]/g, m => `${m}a-z`) - // Example: [K-`] -> [K-`k-z] - .replace(/[A-Z]-\\?[[-`]/g, m => `${m}${m[0].toLowerCase()}-z`) + // Example: [K-`] -> [K-`k-z] + .replace(/[A-Z]-\\?[[-`]/g, m => `${m}${m[0].toLowerCase()}-z`) - // Example: [[-}] -> [[-}A-Z] - .replace(/\\?[[-`]-\\?[{-~]/g, m => `${m}A-Z`) + // Example: [[-}] -> [[-}A-Z] + .replace(/\\?[[-`]-\\?[{-~]/g, m => `${m}A-Z`) - // Example: [b-}] -> [b-}B-Z] - .replace(/[a-z]-\\?[{-~]/g, m => `${m}${m[0].toUpperCase()}-Z`) + // Example: [b-}] -> [b-}B-Z] + .replace(/[a-z]-\\?[{-~]/g, m => `${m}${m[0].toUpperCase()}-Z`) - // Example: [<-j] -> [<-z] - .replace(/\\?[ -@]-[a-z]/g, m => `${m[0]}-z`) + // Example: [<-j] -> [<-z] + .replace(/\\?[ -@]-[a-z]/g, m => `${m[0]}-z`) - // Example: [^-j] -> [A-J^-j] - .replace(/\\?[[-`]-[a-z]/g, m => `A-${m[2].toUpperCase()}${m}`); + // Example: [^-j] -> [A-J^-j] + .replace(/\\?[[-`]-[a-z]/g, m => `A-${m[2].toUpperCase()}${m}`); } } From 25ca8d85a6ddeb185b46ada43c0183972f5f73ba Mon Sep 17 00:00:00 2001 From: n1073645 Date: Thu, 21 Nov 2019 11:14:56 +0000 Subject: [PATCH 06/66] Added extractor for OLE2 and modified the PLIST one. --- src/core/lib/FileSignatures.mjs | 110 +++++++++++++++++++++++++++++++- 1 file changed, 107 insertions(+), 3 deletions(-) diff --git a/src/core/lib/FileSignatures.mjs b/src/core/lib/FileSignatures.mjs index dc7ced4d..e151a844 100644 --- a/src/core/lib/FileSignatures.mjs +++ b/src/core/lib/FileSignatures.mjs @@ -2914,15 +2914,119 @@ export function extractSQLITE(bytes, offset) { export function extractPListXML(bytes, offset) { const stream = new Stream(bytes.slice(offset)); - // Find closing tag () - stream.continueUntil([0x3c, 0x2f, 0x70, 0x6c, 0x69, 0x73, 0x74, 0x3e]); - stream.moveForwardsBy(8); + let braceCount = 0; + + // Continue to the first ( 0 && stream.hasMore()) { + if (stream.readInt(1) === 0x3c) { + + // If we hit an . + if (stream.getBytes(7).join("") === [0x2f, 0x70, 0x6c, 0x69, 0x73, 0x74, 0x3e].join("")) { + braceCount--; + } else { + stream.moveBackwardsBy(7); + } + } + } stream.consumeIf(0x0a); return stream.carve(); } +/** + * OLE2 extractor. + * + * @param {Uint8Array} bytes + * @param {number} offset + * @returns {Uint8Array} + */ +export function extractOLE2(bytes, offset) { + const stream = new Stream(bytes.slice(offset)); + const entries = [[[0x52, 0x00, 0x6f, 0x00, 0x6f, 0x00, 0x74, 0x00, 0x20, 0x00, 0x45, 0x00, 0x6e, 0x00, 0x74, 0x00, 0x72, 0x00, 0x79], 19, "Root Entry"], + [[0x57, 0x00, 0x6f, 0x00, 0x72, 0x00, 0x6b, 0x00, 0x62, 0x00, 0x6f, 0x00, 0x6f, 0x00, 0x6b], 15, "Workbook"], + [[0x43, 0x00, 0x75, 0x00, 0x72, 0x00, 0x72, 0x00, 0x65, 0x00, 0x6e, 0x00, 0x74, 0x00, 0x20, 0x00, 0x55, 0x00, 0x73, 0x00, 0x65, 0x00, 0x72], 23, "Current User"], + [[0x50, 0x00, 0x6f, 0x00, 0x77, 0x00, 0x65, 0x00, 0x72, 0x00, 0x50, 0x00, 0x6f, 0x00, 0x69, 0x00, 0x6e, 0x00, 0x74, 0x00, 0x20, 0x00, 0x44, 0x00, 0x6f, 0x00, 0x63, 0x00, 0x75, 0x00, 0x6d, 0x00, 0x65, 0x00, 0x6e, 0x00, 0x74], 37, "PowerPoint Document"], + [[0x57, 0x00, 0x6f, 0x00, 0x72, 0x00, 0x64, 0x00, 0x44, 0x00, 0x6f, 0x00, 0x63, 0x00, 0x75, 0x00, 0x6d, 0x00, 0x65, 0x00, 0x6e, 0x00, 0x74], 23, "WordDocument"], + [[0x44, 0x00, 0x61, 0x00, 0x74, 0x00, 0x61], 7, "Data"], + [[0x50, 0x00, 0x69, 0x00, 0x63, 0x00, 0x74, 0x00, 0x75, 0x00, 0x72, 0x00, 0x65, 0x00, 0x73], 15, "Pictures"], + [[0x31, 0x00, 0x54, 0x00, 0x61, 0x00, 0x62, 0x00, 0x6c, 0x00, 0x65], 11, "1Table"], + [[0x05, 0x00, 0x53, 0x00, 0x75, 0x00, 0x6d, 0x00, 0x6d, 0x00, 0x61, 0x00, 0x72, 0x00, 0x79, 0x00, 0x49, 0x00, 0x6e, 0x00, 0x66, 0x00, 0x6f, 0x00, 0x72, 0x00, 0x6d, 0x00, 0x61, 0x00, 0x74, 0x00, 0x69, 0x00, 0x6f, 0x00, 0x6e], 37, "SummaryInformation"], + [[0x05, 0x00, 0x44, 0x00, 0x6f, 0x00, 0x63, 0x00, 0x75, 0x00, 0x6d, 0x00, 0x65, 0x00, 0x6e, 0x00, 0x74, 0x00, 0x53, 0x00, 0x75, 0x00, 0x6d, 0x00, 0x6d, 0x00, 0x61, 0x00, 0x72, 0x00, 0x79, 0x00, 0x49, 0x00, 0x6e, 0x00, 0x66, 0x00, 0x6f, 0x00, 0x72, 0x00, 0x6d, 0x00, 0x61, 0x00, 0x74, 0x00, 0x69, 0x00, 0x6f, 0x00, 0x6e], 53, "DocumentSummaryInformation"], + [[0x43, 0x00, 0x6f, 0x00, 0x6d, 0x00, 0x70, 0x00, 0x4f, 0x00, 0x62, 0x00, 0x6a], 13, "Comp Obj"], + [[0x01, 0x00], 2, "Entry"]]; + let endianness = "le"; + + // Move to endianess field. + stream.moveForwardsBy(28); + if (stream.readInt(2, endianness) === 0xfffe) + endianness = "le"; + + // Calculate the size of the normal sectors. + const sizeOfSector = 2 ** stream.readInt(2, endianness); + + // Move to root directory offset field. + stream.moveTo(48); + + // Read root directory offset. + const rootStuff = stream.readInt(4, endianness); + + // Calculate root directory offset. + let total = 512 + (rootStuff * sizeOfSector); + stream.moveTo(total); + let found = true; + + // While valid directory entries. + while (found) { + found = false; + + // Attempt to determine what directory entry it is. + for (const element of entries) { + if (stream.getBytes(element[1]).join("") === element[0].join("")) { + stream.moveBackwardsBy(element[1]); + found = true; + + // Move forwards by the size of the comp obj. + if (element[2] === "Comp Obj") { + total += (128*6); + stream.moveTo(total); + } else if (element[2] === "Entry") { + + // If there is an entry move backwards by 126 to then move forwards by 128. Hence a total displacement of 2. + stream.moveBackwardsBy(126); + } + break; + } + stream.moveBackwardsBy(element[1]); + } + + // If we have found a valid entry, move forwards by 128. + if (found) { + total += 128; + stream.moveForwardsBy(128); + } + } + + // Round up to a multiple of 512. + total = Math.ceil(total / 512) * 512; + + stream.moveTo(total); + return stream.carve(); +} + + /** * GZIP extractor. * From 7386c145ef8d22b8a1e7334c006d2ee58eee43f2 Mon Sep 17 00:00:00 2001 From: n1073645 Date: Thu, 21 Nov 2019 11:23:28 +0000 Subject: [PATCH 07/66] Comments for OLE2 extractor. --- src/core/lib/FileSignatures.mjs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/core/lib/FileSignatures.mjs b/src/core/lib/FileSignatures.mjs index e151a844..656df190 100644 --- a/src/core/lib/FileSignatures.mjs +++ b/src/core/lib/FileSignatures.mjs @@ -2994,6 +2994,8 @@ export function extractOLE2(bytes, offset) { // Attempt to determine what directory entry it is. for (const element of entries) { + + // If the byte pattern matches. if (stream.getBytes(element[1]).join("") === element[0].join("")) { stream.moveBackwardsBy(element[1]); found = true; From 071c1bdea6af3131e98faf86f5a00bf8418ebe8b Mon Sep 17 00:00:00 2001 From: n1073645 Date: Thu, 21 Nov 2019 11:29:45 +0000 Subject: [PATCH 08/66] Comments for OLE2 extractor. --- src/core/lib/FileSignatures.mjs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/core/lib/FileSignatures.mjs b/src/core/lib/FileSignatures.mjs index 656df190..1683a950 100644 --- a/src/core/lib/FileSignatures.mjs +++ b/src/core/lib/FileSignatures.mjs @@ -3016,6 +3016,8 @@ export function extractOLE2(bytes, offset) { // If we have found a valid entry, move forwards by 128. if (found) { + + // Every entry is at least 128 in size, some are bigger which is dealt with by the above if statement. total += 128; stream.moveForwardsBy(128); } From 725b0d42f89e15f1ca3727cafb094167d4c53b94 Mon Sep 17 00:00:00 2001 From: n1073645 Date: Thu, 21 Nov 2019 11:34:11 +0000 Subject: [PATCH 09/66] Comments to OLE2 extractor --- src/core/lib/FileSignatures.mjs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/core/lib/FileSignatures.mjs b/src/core/lib/FileSignatures.mjs index 1683a950..d997f823 100644 --- a/src/core/lib/FileSignatures.mjs +++ b/src/core/lib/FileSignatures.mjs @@ -3002,6 +3002,8 @@ export function extractOLE2(bytes, offset) { // Move forwards by the size of the comp obj. if (element[2] === "Comp Obj") { + + // The size of the Comp Obj entry - 128. Since we add 128 later. total += (128*6); stream.moveTo(total); } else if (element[2] === "Entry") { From 04036e001e3ca230bc19d3ed19d54c4ff17735ed Mon Sep 17 00:00:00 2001 From: n1073645 Date: Thu, 21 Nov 2019 12:13:34 +0000 Subject: [PATCH 10/66] Comments and linting for regex operation. --- src/core/operations/ToCaseInsensitiveRegex.mjs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core/operations/ToCaseInsensitiveRegex.mjs b/src/core/operations/ToCaseInsensitiveRegex.mjs index 9467b8c8..5cb42d02 100644 --- a/src/core/operations/ToCaseInsensitiveRegex.mjs +++ b/src/core/operations/ToCaseInsensitiveRegex.mjs @@ -32,6 +32,7 @@ class ToCaseInsensitiveRegex extends Operation { * @returns {string} */ run(input, args) { + /** * Simulates look behind behaviour since javascript doesn't support it. * From 81d1007bb78db41e4581a8046976ae3ac2428b46 Mon Sep 17 00:00:00 2001 From: n1073645 Date: Fri, 22 Nov 2019 10:45:02 +0000 Subject: [PATCH 11/66] Added tests for regex operation and a slight bug fix --- .../operations/ToCaseInsensitiveRegex.mjs | 8 +- .../tests/ToFromInsensitiveRegex.mjs | 132 ++++++++++++++++++ 2 files changed, 139 insertions(+), 1 deletion(-) diff --git a/src/core/operations/ToCaseInsensitiveRegex.mjs b/src/core/operations/ToCaseInsensitiveRegex.mjs index 5cb42d02..58b21ef5 100644 --- a/src/core/operations/ToCaseInsensitiveRegex.mjs +++ b/src/core/operations/ToCaseInsensitiveRegex.mjs @@ -51,11 +51,17 @@ class ToCaseInsensitiveRegex extends Operation { return result; } + try { + RegExp(input); + } catch (error) { + return "Invalid Regular Expression (Please note this version of node does not support look behinds)."; + } + // Example: [test] -> [[tT][eE][sS][tT]] return preProcess(input) // Example: [A-Z] -> [A-Za-z] - .replace(/[A-Z]-[A-Z]/ig, m => `${m[0].toUpperCase()}-${m[2].toUpperCase()}${m[0].toLowerCase()}-${m[2].toLowerCase()}`) + .replace(/([A-Z]-[A-Z]|[a-z]-[a-z])/g, m => `${m[0].toUpperCase()}-${m[2].toUpperCase()}${m[0].toLowerCase()}-${m[2].toLowerCase()}`) // Example: [H-d] -> [A-DH-dh-z] .replace(/[A-Z]-[a-z]/g, m => `A-${m[2].toUpperCase()}${m}${m[0].toLowerCase()}-z`) diff --git a/tests/operations/tests/ToFromInsensitiveRegex.mjs b/tests/operations/tests/ToFromInsensitiveRegex.mjs index b74e9973..f33d6706 100644 --- a/tests/operations/tests/ToFromInsensitiveRegex.mjs +++ b/tests/operations/tests/ToFromInsensitiveRegex.mjs @@ -53,4 +53,136 @@ TestRegister.addTests([ }, ], }, + { + name: "To Case Insensitive Regex: [A-Z] -> [A-Za-z]", + input: "[A-Z]", + expectedOutput: "[A-Za-z]", + recipeConfig: [ + { + op: "To Case Insensitive Regex", + args: [], + }, + ], + }, + { + name: "To Case Insensitive Regex: [a-z] -> [A-Za-z]", + input: "[a-z]", + expectedOutput: "[A-Za-z]", + recipeConfig: [ + { + op: "To Case Insensitive Regex", + args: [], + }, + ], + }, + { + name: "To Case Insensitive Regex: [H-d] -> [A-DH-dh-z]", + input: "[H-d]", + expectedOutput: "[A-DH-dh-z]", + recipeConfig: [ + { + op: "To Case Insensitive Regex", + args: [], + }, + ], + }, + { + name: "To Case Insensitive Regex: [!-D] -> [!-Da-d]", + input: "[!-D]", + expectedOutput: "[!-Da-d]", + recipeConfig: [ + { + op: "To Case Insensitive Regex", + args: [], + }, + ], + }, + { + name: "To Case Insensitive Regex: [%-^] -> [%-^a-z]", + input: "[%-^]", + expectedOutput: "[%-^a-z]", + recipeConfig: [ + { + op: "To Case Insensitive Regex", + args: [], + }, + ], + }, + { + name: "To Case Insensitive Regex: [K-`] -> [K-`k-z]", + input: "[K-`]", + expectedOutput: "[K-`k-z]", + recipeConfig: [ + { + op: "To Case Insensitive Regex", + args: [], + }, + ], + }, + { + name: "To Case Insensitive Regex: [[-}] -> [[-}A-Z]", + input: "[[-}]", + expectedOutput: "[[-}A-Z]", + recipeConfig: [ + { + op: "To Case Insensitive Regex", + args: [], + }, + ], + }, + { + name: "To Case Insensitive Regex: [b-}] -> [b-}B-Z]", + input: "[b-}]", + expectedOutput: "[b-}B-Z]", + recipeConfig: [ + { + op: "To Case Insensitive Regex", + args: [], + }, + ], + }, + { + name: "To Case Insensitive Regex: [<-j] -> [<-z]", + input: "[<-j]", + expectedOutput: "[<-z]", + recipeConfig: [ + { + op: "To Case Insensitive Regex", + args: [], + }, + ], + }, + { + name: "To Case Insensitive Regex: [^-j] -> [A-J^-j]", + input: "[^-j]", + expectedOutput: "[A-J^-j]", + recipeConfig: [ + { + op: "To Case Insensitive Regex", + args: [], + }, + ], + }, + { + name: "To Case Insensitive Regex: not simple test", + input: "Mozilla[A-Z0-9]+[A-Z]Mozilla[0-9whatA-Z][H-d][!-H][a-~](.)+", + expectedOutput: "[mM][oO][zZ][iI][lL][lL][aA][A-Za-z0-9]+[A-Za-z][mM][oO][zZ][iI][lL][lL][aA][0-9[wW][hH][aA][tT]A-Za-z][A-DH-dh-z][!-Ha-h][a-~A-Z](.)+", + recipeConfig: [ + { + op: "To Case Insensitive Regex", + args: [], + }, + ], + }, + { + name: "To Case Insensitive Regex: erroneous test", + input: "Mozilla[A-Z", + expectedOutput: "Invalid Regular Expression (Please note this version of node does not support look behinds).", + recipeConfig: [ + { + op: "To Case Insensitive Regex", + args: [], + }, + ], + }, ]); From 4814922e67728dd6157056308487784540a70048 Mon Sep 17 00:00:00 2001 From: n1073645 Date: Fri, 22 Nov 2019 10:58:24 +0000 Subject: [PATCH 12/66] Linting for regex operation --- tests/operations/tests/ToFromInsensitiveRegex.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/operations/tests/ToFromInsensitiveRegex.mjs b/tests/operations/tests/ToFromInsensitiveRegex.mjs index f33d6706..1394bfe8 100644 --- a/tests/operations/tests/ToFromInsensitiveRegex.mjs +++ b/tests/operations/tests/ToFromInsensitiveRegex.mjs @@ -184,5 +184,5 @@ TestRegister.addTests([ args: [], }, ], - }, + } ]); From 09e93b4639c407dc019fa786cfbd883159b42683 Mon Sep 17 00:00:00 2001 From: n1073645 Date: Mon, 25 Nov 2019 11:26:31 +0000 Subject: [PATCH 13/66] Added ICO extractor --- src/core/lib/FileSignatures.mjs | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/core/lib/FileSignatures.mjs b/src/core/lib/FileSignatures.mjs index dc7ced4d..41f870af 100644 --- a/src/core/lib/FileSignatures.mjs +++ b/src/core/lib/FileSignatures.mjs @@ -280,7 +280,7 @@ export const FILE_SIGNATURES = { 9: 0x0, 10: [0x0, 0x1] }, - extractor: null + extractor: extractICO }, { name: "Radiance High Dynamic Range image", @@ -2772,6 +2772,32 @@ export function extractBMP(bytes, offset) { } +/** + * ICO extractor. + * + * @param {Uint8Array} bytes + * @param {number} offset + */ +export function extractICO(bytes, offset) { + const stream = new Stream(bytes.slice(offset)); + + // Move to number of file there are. + stream.moveTo(4); + + // Read the number of files stored in the ICO + const numberFiles = stream.readInt(2, "le"); + + // Move forward to the last file header. + stream.moveForwardsBy(8 + ((numberFiles-1) * 16)); + const fileSize = stream.readInt(4, "le"); + const fileOffset = stream.readInt(4, "le"); + + // Move to the end of the last file. + stream.moveTo(fileOffset + fileSize); + return stream.carve(); +} + + /** * WAV extractor. * From 1118ff598d6ecf93792a439609e6c5a4224652d9 Mon Sep 17 00:00:00 2001 From: n1073645 Date: Mon, 25 Nov 2019 13:43:31 +0000 Subject: [PATCH 14/66] From Base85 and From Braille signatures added for magic --- src/core/operations/FromBase85.mjs | 17 +++++++++++++++++ src/core/operations/FromBraille.mjs | 7 +++++++ 2 files changed, 24 insertions(+) diff --git a/src/core/operations/FromBase85.mjs b/src/core/operations/FromBase85.mjs index c874d5dc..001f4f69 100644 --- a/src/core/operations/FromBase85.mjs +++ b/src/core/operations/FromBase85.mjs @@ -33,6 +33,23 @@ class FromBase85 extends Operation { value: ALPHABET_OPTIONS }, ]; + this.patterns = [ + { + match: "^\\s*(<~)?([!-u]{4})+([!-u]{1,3})??(~>)?\\s*$", + flags: "", + args: ["!-u", true] + }, + { + match: "^\\s*(<~)?([0-9A-Z.-:+=^!/*?&<>()[]{}@%$#]{4})+([0-9A-Z.-:+=^!/*?&<>()[]{}@%$#]{1,3})??(~>)?\\s*$", + flags: "i", + args: ["0-9a-zA-Z.-:+=^!/*?&<>()[]{}@%$#", true] + }, + { + match: "^\\s*(<~)?([0-9A-Z.-:+=^!/*?&_<>()[]{}@%$#;`|~]{4})+([0-9A-Z.-:+=^!/*?&_<>()[]{}@%$#;`|~]{1,3})??(~>)?\\s*$", + flags: "i", + args: ["0-9A-Za-z!#$%&()*+-;<=>?@^_`{|~}", true] + } + ]; } /** diff --git a/src/core/operations/FromBraille.mjs b/src/core/operations/FromBraille.mjs index adbcff91..60e590a3 100644 --- a/src/core/operations/FromBraille.mjs +++ b/src/core/operations/FromBraille.mjs @@ -25,6 +25,13 @@ class FromBraille extends Operation { this.inputType = "string"; this.outputType = "string"; this.args = []; + this.patterns = [ + { + match: "^\\s*[⠀⠁⠂⠃⠄⠅⠆⠇⠈⠉⠊⠋⠌⠍⠎⠏⠐⠑⠒⠓⠔⠕⠖⠗⠘⠙⠚⠛⠜⠝⠞⠟⠠⠡⠢⠣⠤⠥⠦⠧⠨⠩⠪⠫⠬⠭⠮⠯⠰⠱⠲⠳⠴⠵⠶⠷⠸⠹⠺⠻⠼⠽⠾⠿]+\\s*$", + flags: "i", + args: [true] + } + ]; } /** From a6fa0628f2c003f7aa93a94e06abdab66156174a Mon Sep 17 00:00:00 2001 From: Matthieu Date: Mon, 25 Nov 2019 22:53:31 +0100 Subject: [PATCH 15/66] Add operation to normalise unicode --- package-lock.json | 5 ++ package.json | 1 + src/core/config/Categories.json | 1 + src/core/lib/ChrEnc.mjs | 12 +++++ src/core/operations/NormaliseUnicode.mjs | 60 +++++++++++++++++++++ tests/operations/index.mjs | 1 + tests/operations/tests/NormaliseUnicode.mjs | 54 +++++++++++++++++++ 7 files changed, 134 insertions(+) create mode 100644 src/core/operations/NormaliseUnicode.mjs create mode 100644 tests/operations/tests/NormaliseUnicode.mjs diff --git a/package-lock.json b/package-lock.json index 6c765b4e..15aa5451 100644 --- a/package-lock.json +++ b/package-lock.json @@ -14403,6 +14403,11 @@ "normalize-path": "^2.1.1" } }, + "unorm": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/unorm/-/unorm-1.6.0.tgz", + "integrity": "sha512-b2/KCUlYZUeA7JFUuRJZPUtr4gZvBh7tavtv4fvk4+KV9pfGiR6CQAQAWl49ZpR3ts2dk4FYkP7EIgDJoiOLDA==" + }, "unpipe": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", diff --git a/package.json b/package.json index 7bdea20b..61136169 100644 --- a/package.json +++ b/package.json @@ -147,6 +147,7 @@ "ssdeep.js": "0.0.2", "tesseract.js": "^2.0.0-alpha.15", "ua-parser-js": "^0.7.20", + "unorm": "^1.6.0", "utf8": "^3.0.0", "vkbeautify": "^0.99.3", "xmldom": "^0.1.27", diff --git a/src/core/config/Categories.json b/src/core/config/Categories.json index f663e16d..53ca796d 100755 --- a/src/core/config/Categories.json +++ b/src/core/config/Categories.json @@ -39,6 +39,7 @@ "URL Decode", "Escape Unicode Characters", "Unescape Unicode Characters", + "Normalise Unicode", "To Quoted Printable", "From Quoted Printable", "To Punycode", diff --git a/src/core/lib/ChrEnc.mjs b/src/core/lib/ChrEnc.mjs index a472706b..d2d0625f 100644 --- a/src/core/lib/ChrEnc.mjs +++ b/src/core/lib/ChrEnc.mjs @@ -164,3 +164,15 @@ export const IO_FORMAT = { "Simplified Chinese GB18030 (54936)": 54936, }; +/** + * Unicode Normalisation Forms + * + * @author Matthieu [m@tthieu.xyz] + * @copyright Crown Copyright 2016 + * @license Apache-2.0 + */ + +/** + * Character encoding format mappings. + */ +export const UNICODE_NORMALISATION_FORMS = ["NFD", "NFC", "NFKD", "NFKC"]; diff --git a/src/core/operations/NormaliseUnicode.mjs b/src/core/operations/NormaliseUnicode.mjs new file mode 100644 index 00000000..731a493f --- /dev/null +++ b/src/core/operations/NormaliseUnicode.mjs @@ -0,0 +1,60 @@ +/** + * @author Matthieu [m@tthieu.xyz] + * @copyright Crown Copyright 2019 + * @license Apache-2.0 + */ + +import Operation from "../Operation.mjs"; +import OperationError from "../errors/OperationError.mjs"; +import unorm from "unorm"; +import {UNICODE_NORMALISATION_FORMS} from "../lib/ChrEnc"; + +/** + * Normalise Unicode operation + */ +class NormaliseUnicode extends Operation { + + /** + * NormaliseUnicode constructor + */ + constructor() { + super(); + + this.name = "Normalise Unicode"; + this.module = "UnicodeNormalisation"; + this.description = "Transform Unicode to one of the Normalisation Form"; + this.infoURL = "http://www.unicode.org/reports/tr15/"; + this.inputType = "string"; + this.outputType = "string"; + this.args = [ + { + name: "Normal Form", + type: "option", + value: UNICODE_NORMALISATION_FORMS + } + ]; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + run(input, args) { + const [normalForm] = args; + if (normalForm === "NFD") { + return unorm.nfd(input); + } else if (normalForm === "NFC") { + return unorm.nfc(input); + } else if (normalForm === "NFKD") { + return unorm.nfkd(input); + } else if (normalForm === "NFKC") { + return unorm.nfc(input); + } + + throw new OperationError("Unknown Normalisation Form"); + } + +} + +export default NormaliseUnicode; diff --git a/tests/operations/index.mjs b/tests/operations/index.mjs index c54fa7ef..97745878 100644 --- a/tests/operations/index.mjs +++ b/tests/operations/index.mjs @@ -57,6 +57,7 @@ import "./tests/MS.mjs"; import "./tests/Magic.mjs"; import "./tests/MorseCode.mjs"; import "./tests/NetBIOS.mjs"; +import "./tests/NormaliseUnicode.mjs"; import "./tests/OTP.mjs"; import "./tests/PGP.mjs"; import "./tests/PHP.mjs"; diff --git a/tests/operations/tests/NormaliseUnicode.mjs b/tests/operations/tests/NormaliseUnicode.mjs new file mode 100644 index 00000000..d903e865 --- /dev/null +++ b/tests/operations/tests/NormaliseUnicode.mjs @@ -0,0 +1,54 @@ +/** + * Text Encoding Brute Force tests. + * + * @author Matthieu [m@tthieux.xyz] + * + * @copyright Crown Copyright 2018 + * @license Apache-2.0 + */ +import TestRegister from "../../lib/TestRegister.mjs"; + +TestRegister.addTests([ + { + name: "Normalise Unicode - NFD", + input: "\u00c7\u0043\u0327\u2160", + expectedMatch: /C\u0327C\u0327\u2160/, + recipeConfig: [ + { + op: "Normalise Unicode", + args: ["NFD"], + }, + ], + }, { + name: "Normalise Unicode - NFC", + input: "\u00c7\u0043\u0327\u2160", + expectedMatch: /\u00C7\u00C7\u2160/, + recipeConfig: [ + { + op: "Normalise Unicode", + args: ["NFC"], + }, + ], + }, { + name: "Normalise Unicode - NFKD", + input: "\u00c7\u0043\u0327\u2160", + expectedMatch: /C\u0327C\u0327I/, + recipeConfig: [ + { + op: "Normalise Unicode", + args: ["NFKD"], + }, + ], + }, { + name: "Normalise Unicode - NFKC", + input: "\u00c7\u0043\u0327\u2160", + expectedMatch: /\u00C7\u00C7\u2160/, + recipeConfig: [ + { + op: "Normalise Unicode", + args: ["NFKC"], + }, + ], + }, +]); + From 47ccafcbb29cd34dafd2c157ceda326144a6e9c2 Mon Sep 17 00:00:00 2001 From: n1073645 Date: Thu, 5 Dec 2019 09:47:32 +0000 Subject: [PATCH 16/66] Linting and tidy up --- src/core/operations/FromBase85.mjs | 17 ----------------- src/core/operations/FromBraille.mjs | 7 ------- 2 files changed, 24 deletions(-) diff --git a/src/core/operations/FromBase85.mjs b/src/core/operations/FromBase85.mjs index 001f4f69..c874d5dc 100644 --- a/src/core/operations/FromBase85.mjs +++ b/src/core/operations/FromBase85.mjs @@ -33,23 +33,6 @@ class FromBase85 extends Operation { value: ALPHABET_OPTIONS }, ]; - this.patterns = [ - { - match: "^\\s*(<~)?([!-u]{4})+([!-u]{1,3})??(~>)?\\s*$", - flags: "", - args: ["!-u", true] - }, - { - match: "^\\s*(<~)?([0-9A-Z.-:+=^!/*?&<>()[]{}@%$#]{4})+([0-9A-Z.-:+=^!/*?&<>()[]{}@%$#]{1,3})??(~>)?\\s*$", - flags: "i", - args: ["0-9a-zA-Z.-:+=^!/*?&<>()[]{}@%$#", true] - }, - { - match: "^\\s*(<~)?([0-9A-Z.-:+=^!/*?&_<>()[]{}@%$#;`|~]{4})+([0-9A-Z.-:+=^!/*?&_<>()[]{}@%$#;`|~]{1,3})??(~>)?\\s*$", - flags: "i", - args: ["0-9A-Za-z!#$%&()*+-;<=>?@^_`{|~}", true] - } - ]; } /** diff --git a/src/core/operations/FromBraille.mjs b/src/core/operations/FromBraille.mjs index 60e590a3..adbcff91 100644 --- a/src/core/operations/FromBraille.mjs +++ b/src/core/operations/FromBraille.mjs @@ -25,13 +25,6 @@ class FromBraille extends Operation { this.inputType = "string"; this.outputType = "string"; this.args = []; - this.patterns = [ - { - match: "^\\s*[⠀⠁⠂⠃⠄⠅⠆⠇⠈⠉⠊⠋⠌⠍⠎⠏⠐⠑⠒⠓⠔⠕⠖⠗⠘⠙⠚⠛⠜⠝⠞⠟⠠⠡⠢⠣⠤⠥⠦⠧⠨⠩⠪⠫⠬⠭⠮⠯⠰⠱⠲⠳⠴⠵⠶⠷⠸⠹⠺⠻⠼⠽⠾⠿]+\\s*$", - flags: "i", - args: [true] - } - ]; } /** From c06502cd7668ff35bee3814b0dd93ccce83057db Mon Sep 17 00:00:00 2001 From: Bart Date: Wed, 11 Dec 2019 22:58:33 +0100 Subject: [PATCH 17/66] Improve RTF detection Certain RTF files may attempt to thwart detection by having a malformed RTF header, such as **{\rt000**. Removing 0x66 will result in detecting these malformed yet valid RTFs as well. Additional reading: https://www.decalage.info/rtf_tricks#Trick_1:_Incomplete_RTF_Header --- src/core/lib/FileSignatures.mjs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/core/lib/FileSignatures.mjs b/src/core/lib/FileSignatures.mjs index 17b17f23..5f5c0d2f 100644 --- a/src/core/lib/FileSignatures.mjs +++ b/src/core/lib/FileSignatures.mjs @@ -1008,8 +1008,7 @@ export const FILE_SIGNATURES = { 0: 0x7b, 1: 0x5c, 2: 0x72, - 3: 0x74, - 4: 0x66 + 3: 0x74 }, extractor: extractRTF }, From 813a151524e35091fe0ef0ce1cbbc4e229c30501 Mon Sep 17 00:00:00 2001 From: n1474335 Date: Fri, 13 Dec 2019 12:59:59 +0000 Subject: [PATCH 18/66] Added 'Show all' button to output file overlay --- src/web/Manager.mjs | 1 + src/web/html/index.html | 10 ++++--- src/web/index.js | 2 +- src/web/stylesheets/components/_pane.css | 5 ++++ src/web/waiters/InputWaiter.mjs | 4 ++- src/web/waiters/OutputWaiter.mjs | 37 ++++++++++++++++++++++-- 6 files changed, 51 insertions(+), 8 deletions(-) diff --git a/src/web/Manager.mjs b/src/web/Manager.mjs index cb579721..e1e07dfd 100755 --- a/src/web/Manager.mjs +++ b/src/web/Manager.mjs @@ -197,6 +197,7 @@ class Manager { this.addMultiEventListener("#output-text", "mousedown dblclick select", this.highlighter.outputMousedown, this.highlighter); this.addMultiEventListener("#output-html", "mousedown dblclick select", this.highlighter.outputHtmlMousedown, this.highlighter); this.addDynamicListener("#output-file-download", "click", this.output.downloadFile, this.output); + this.addDynamicListener("#output-file-show-all", "click", this.output.showAllFile, this.output); this.addDynamicListener("#output-file-slice i", "click", this.output.displayFileSlice, this.output); document.getElementById("show-file-overlay").addEventListener("click", this.output.showFileOverlayClick.bind(this.output)); this.addDynamicListener(".extract-file,.extract-file i", "click", this.output.extractFileClick, this.output); diff --git a/src/web/html/index.html b/src/web/html/index.html index 81f5402b..2167c6b8 100755 --- a/src/web/html/index.html +++ b/src/web/html/index.html @@ -355,15 +355,17 @@
Size:
+
- - - +
to
- + +
KiB
diff --git a/src/web/index.js b/src/web/index.js index 736b512f..63591d89 100755 --- a/src/web/index.js +++ b/src/web/index.js @@ -49,7 +49,7 @@ function main() { attemptHighlight: true, theme: "classic", useMetaKey: false, - ioDisplayThreshold: 512, + ioDisplayThreshold: 2048, logLevel: "info", autoMagic: true, imagePreview: true, diff --git a/src/web/stylesheets/components/_pane.css b/src/web/stylesheets/components/_pane.css index 9ee8f46f..cf1b9342 100755 --- a/src/web/stylesheets/components/_pane.css +++ b/src/web/stylesheets/components/_pane.css @@ -98,6 +98,11 @@ .io-card.card input[type=number] { padding-right: 6px; padding-left: 6px; + height: unset; +} + +.io-card.card .input-group { + padding-top: 5px; } #files .card-header .float-right a:hover { diff --git a/src/web/waiters/InputWaiter.mjs b/src/web/waiters/InputWaiter.mjs index b9d78d5c..2efcb8a3 100644 --- a/src/web/waiters/InputWaiter.mjs +++ b/src/web/waiters/InputWaiter.mjs @@ -767,7 +767,9 @@ class InputWaiter { // and manually fire inputChange() inputText.value = val; inputText.setSelectionRange(selStart + pastedData.length, selStart + pastedData.length); - this.debounceInputChange(e); + // Don't debounce here otherwise the keyup event for the Ctrl key will cancel an autobake + // (at least for large inputs) + this.inputChange(e, true); } } diff --git a/src/web/waiters/OutputWaiter.mjs b/src/web/waiters/OutputWaiter.mjs index 11a78254..6ca6d526 100755 --- a/src/web/waiters/OutputWaiter.mjs +++ b/src/web/waiters/OutputWaiter.mjs @@ -1122,8 +1122,8 @@ class OutputWaiter { showFileOverlay = document.getElementById("show-file-overlay"), sliceFromEl = document.getElementById("output-file-slice-from"), sliceToEl = document.getElementById("output-file-slice-to"), - sliceFrom = parseInt(sliceFromEl.value, 10), - sliceTo = parseInt(sliceToEl.value, 10), + sliceFrom = parseInt(sliceFromEl.value, 10) * 1024, + sliceTo = parseInt(sliceToEl.value, 10) * 1024, output = this.outputs[this.manager.tabs.getActiveOutputTab()].data; let str; @@ -1137,6 +1137,39 @@ class OutputWaiter { showFileOverlay.style.display = "block"; outputText.value = Utils.printable(str, true); + outputText.style.display = "block"; + outputHtml.style.display = "none"; + outputFile.style.display = "none"; + outputHighlighter.display = "block"; + inputHighlighter.display = "block"; + + this.toggleLoader(false); + } + + /** + * Handler for showing an entire file at user's discretion (even if it's way too big) + */ + async showAllFile() { + document.querySelector("#output-loader .loading-msg").textContent = "Loading entire file at user instruction. This may cause a crash..."; + this.toggleLoader(true); + const outputText = document.getElementById("output-text"), + outputHtml = document.getElementById("output-html"), + outputFile = document.getElementById("output-file"), + outputHighlighter = document.getElementById("output-highlighter"), + inputHighlighter = document.getElementById("input-highlighter"), + showFileOverlay = document.getElementById("show-file-overlay"), + output = this.outputs[this.manager.tabs.getActiveOutputTab()].data; + + let str; + if (output.type === "ArrayBuffer") { + str = Utils.arrayBufferToStr(output.result); + } else { + str = Utils.arrayBufferToStr(await this.getDishBuffer(output.dish)); + } + + outputText.classList.remove("blur"); + showFileOverlay.style.display = "none"; + outputText.value = Utils.printable(str, true); outputText.style.display = "block"; outputHtml.style.display = "none"; From fd390bc61b812476f7fbc61e53b8e167f885263f Mon Sep 17 00:00:00 2001 From: n1474335 Date: Fri, 13 Dec 2019 14:45:13 +0000 Subject: [PATCH 19/66] Improved CR preservation logic - now based on entropy --- src/core/Utils.mjs | 38 ++++++++++++++++++++++++++++ src/web/html/index.html | 16 ++++++------ src/web/index.js | 3 +-- src/web/waiters/InputWaiter.mjs | 44 ++++++++++++++++----------------- 4 files changed, 69 insertions(+), 32 deletions(-) diff --git a/src/core/Utils.mjs b/src/core/Utils.mjs index 0f10cb8a..c99eccc9 100755 --- a/src/core/Utils.mjs +++ b/src/core/Utils.mjs @@ -591,6 +591,44 @@ class Utils { return utf8 ? Utils.byteArrayToUtf8(arr) : Utils.byteArrayToChars(arr); } + /** + * Calculates the Shannon entropy for a given set of data. + * + * @param {Uint8Array|ArrayBuffer} input + * @returns {number} + */ + static calculateShannonEntropy(data) { + if (data instanceof ArrayBuffer) { + data = new Uint8Array(data); + } + const prob = [], + occurrences = new Array(256).fill(0); + + // Count occurrences of each byte in the input + let i; + for (i = 0; i < data.length; i++) { + occurrences[data[i]]++; + } + + // Store probability list + for (i = 0; i < occurrences.length; i++) { + if (occurrences[i] > 0) { + prob.push(occurrences[i] / data.length); + } + } + + // Calculate Shannon entropy + let entropy = 0, + p; + + for (i = 0; i < prob.length; i++) { + p = prob[i]; + entropy += p * Math.log(p) / Math.log(2); + } + + return -entropy; + } + /** * Parses CSV data and returns it as a two dimensional array or strings. diff --git a/src/web/html/index.html b/src/web/html/index.html index 2167c6b8..121f0780 100755 --- a/src/web/html/index.html +++ b/src/web/html/index.html @@ -491,6 +491,15 @@ +
+ + +
+
@@ -575,13 +584,6 @@ Keep the current tab in sync between the input and output
- -
- -