diff --git a/doc/EscapeSmartCharacters.xlsx b/doc/EscapeSmartCharacters.xlsx new file mode 100644 index 00000000..cafe9fb1 Binary files /dev/null and b/doc/EscapeSmartCharacters.xlsx differ diff --git a/src/core/config/Categories.json b/src/core/config/Categories.json index 09ee8d15..92d487d2 100644 --- a/src/core/config/Categories.json +++ b/src/core/config/Categories.json @@ -63,7 +63,8 @@ "JSON to CSV", "Avro to JSON", "CBOR Encode", - "CBOR Decode" + "CBOR Decode", + "Escape Smart Characters" ] }, { diff --git a/src/core/operations/EscapeSmartCharacters.mjs b/src/core/operations/EscapeSmartCharacters.mjs new file mode 100644 index 00000000..3701bdb0 --- /dev/null +++ b/src/core/operations/EscapeSmartCharacters.mjs @@ -0,0 +1,146 @@ +/** + * @author john19696 [john19696@protonmail.com] + * @copyright Crown Copyright 2021 + * @license Apache-2.0 + */ + +import Operation from "../Operation.mjs"; +// import OperationError from "../errors/OperationError.mjs"; + +/** + * Escape Smart Characters operation + */ +class EscapeSmartCharacters extends Operation { + + /** + * EscapeSmartCharacters constructor + */ + constructor() { + super(); + + this.name = "Escape Smart Characters"; + this.module = "Default"; + this.description = "An operation to convert smart characters (quotes, dashes, apostrophes, \ + arrows, copyright signs, ellipses etc.) back to plain ASCII.\ +
"; + this.infoURL = "http://unicode.scarfboy.com/?s=quotation+mark"; + this.inputType = "string"; + this.outputType = "string"; + this.args = [ + /* Arguments. See the project wiki for full details.*/ + { + name: "Second arg", + type: "option", + value: ["Escape", "Remove", "Replace with '.'"] + } + ]; + } + + + /** + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + run(input, args) { + const [outArg] = args; + + /* JSON map of characters to escape + generated by EscapeSmartCharacters.xls in doc + */ + const ESCAPE_MAP = { + // Pasted from git:/doc/SmartEscape.xlsx + "‘": "'", + "’": "'", + "“": "\"", + "”": "\"", + "©": "(C)", + "®": "(R)", + "™": "(TM)", + "→": "-->", + "←": "<--", + "↔": "<->", + "‐": "-", + "‑": "-", + "‒": "-", + "–": "-", + "—": "--", + "―": "--", + "‖": "||", + "‗": "==", + "‘": "'", + "’": "'", + "‚": "'", + "‛": "'", + "“": "\"", + "”": "\"", + "„": "\"", + "‟": "\"", + "•": ".", + "‣": ">", + "․": ".", + "‥": "..", + "…": "...", + "‧": ".", + "‰": "%0", + "‱": "%00", + "′": "'", + "″": "''", + "‴": "'''", + "‵": "''", + "‶": "''", + "‷": "'''", + "‸": "^", + "‹": "<", + "›": ">", + "‼": "!!", + "‽": "?!", + "⁃": "-", + "⁄": "/", + "⁅": "[-", + "⁆": "-]", + "⁇": "??", + "⁈": "?!", + "⁉": "!?", + "⁌": ".", + "⁍": ".", + "⁎": "*", + "⁏": ";", + "⁒": "%", + "⁓": "~", + "⁕": "*", + "⁗": "''''", + "⁢": "*", + "⁤": "+", + }; + + let output, result = ""; + for (const char of input) { + if (Object.keys(ESCAPE_MAP).includes(char)) { + output = ESCAPE_MAP[char]; + } else { + // Do Something Sensible with the rest of the arrows + const charCode = char.charCodeAt(0); + if (charCode >= 8592 && charCode <= 8703) { + output = "->"; + } else { + output = char; + } + } + switch (outArg) { + case "Remove": + break; + case "Replace with '.'": + result += "."; + break; + default: { // Escape & no change are the same + result += output; + } + } + } + return result; + } + +} + +export default EscapeSmartCharacters; diff --git a/tests/operations/index.mjs b/tests/operations/index.mjs index 9add20b9..cb7e7d43 100644 --- a/tests/operations/index.mjs +++ b/tests/operations/index.mjs @@ -107,6 +107,7 @@ import "./tests/CBORDecode.mjs"; import "./tests/JA3Fingerprint.mjs"; import "./tests/JA3SFingerprint.mjs"; import "./tests/HASSH.mjs"; +import "./tests/EscapeSmartCharacters.mjs"; // Cannot test operations that use the File type yet diff --git a/tests/operations/tests/EscapeSmartCharacters.mjs b/tests/operations/tests/EscapeSmartCharacters.mjs new file mode 100644 index 00000000..aeee315b --- /dev/null +++ b/tests/operations/tests/EscapeSmartCharacters.mjs @@ -0,0 +1,22 @@ +/** + * @author john19696 [john19696@protonmail.com] + * @copyright Crown Copyright 2021 + * @license Apache-2.0 + */ + + import TestRegister from "../../lib/TestRegister.mjs"; + + TestRegister.addTests([ + { + name: "Escape Smart Characters", + input: "“”—‘’ →©…", + expectedOutput: "\"\"--'' -->(C)...", + recipeConfig: [ + { + op: "Escape Smart Characters", + args: ["Escape"], + }, + ], + }, +]); +g \ No newline at end of file