From d726f1e1df65a3c9db32d5d0ce9acea1ca094a07 Mon Sep 17 00:00:00 2001 From: sw5678 <151949597+sw5678@users.noreply.github.com> Date: Mon, 12 May 2025 14:03:07 +0100 Subject: [PATCH 1/3] Added word count operation --- src/core/config/Categories.json | 3 +- src/core/operations/WordCount.mjs | 116 ++++++++++++++++++++++++++ tests/operations/index.mjs | 1 + tests/operations/tests/WordCount.mjs | 117 +++++++++++++++++++++++++++ 4 files changed, 236 insertions(+), 1 deletion(-) create mode 100644 src/core/operations/WordCount.mjs create mode 100644 tests/operations/tests/WordCount.mjs diff --git a/src/core/config/Categories.json b/src/core/config/Categories.json index 7f9591e0..0226e33b 100644 --- a/src/core/config/Categories.json +++ b/src/core/config/Categories.json @@ -337,7 +337,8 @@ "Sleep", "File Tree", "Take nth bytes", - "Drop nth bytes" + "Drop nth bytes", + "Word Count" ] }, { diff --git a/src/core/operations/WordCount.mjs b/src/core/operations/WordCount.mjs new file mode 100644 index 00000000..65260e53 --- /dev/null +++ b/src/core/operations/WordCount.mjs @@ -0,0 +1,116 @@ +/** + * @author sw5678 + * @copyright Crown Copyright 2016 + * @license Apache-2.0 + */ + +import Operation from "../Operation.mjs"; +import Utils from "../Utils.mjs"; +import {LETTER_DELIM_OPTIONS} from "../lib/Delim.mjs"; +import {caseInsensitiveSort} from "../lib/Sort.mjs"; + + +/** + * Word Count operation + */ +class WordCount extends Operation { + + /** + * Word Count constructor + */ + constructor() { + super(); + + this.name = "Word Count"; + this.module = "Default"; + this.description = "Provides a count of each word in a given text"; + this.inputType = "string"; + this.outputType = "string"; + this.args = [ + { + name: "Delimiter", + type: "option", + value: LETTER_DELIM_OPTIONS + }, + { + "name": "Include Total", + "type": "boolean", + "value": true + }, + { + "name": "Order", + "type": "option", + "value": ["Alphabetical", "Count"] + } + ]; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + run(input, args) { + + const delimiter = Utils.charRep(args[0]); + + // Lower case and split + const inputArray = input.replace(/(?:\r\n|\r|\n)/g, delimiter).toLowerCase().split(delimiter); + + // Count up the words + const counter = {}; + let total = 0; + for (let j = 0; j < inputArray.length; j++) { + + // Trim whitespace and replace punctuation + const word = inputArray[j].replace(/(?:!|"|#|\$|%|&|\(|\)|\*|\+|,|-|\.|\/|:|;|<|=|>|\?|@|\[|\\|\]|\^|_|`|\{|\||\}|~|£)/g, "").trim(); + + // If empty string or ', then skip + if (word === "" || /[']+/.test(word)) { + continue; + } else if (word in counter) { + counter[word]++; + total++; + } else { + counter[word] = 1; + total++; + } + } + + // Sort results + let order; + if (args[2] === "Alphabetical") { + // Sort alphabetically + order = Object.keys(counter).sort(caseInsensitiveSort); + } else if (args[2] === "Count") { + // Sort by count + // Create the array of key-value pairs + order = Object.keys(counter).map((key) => { + return [key, counter[key]]; + }); + // Sort the array based on the second element (i.e. the value) + order.sort((first, second) => { + return second[1] - first[1]; + }); + // Obtain the list of keys in sorted order of the values. + order = order.map((e) => { + return e[0]; + }); + } + + // Process output to string + let output = "WORD,COUNT\n"; + for (let k = 0; k < order.length; k++) { + output = output + order[k] + "," + counter[order[k]] + "\n"; + } + + // Add total counter at the bottom + if (args[1]) { + output = output + "TOTAL," + total; + } + + return output; + } +} + +export default WordCount; \ No newline at end of file diff --git a/tests/operations/index.mjs b/tests/operations/index.mjs index ab1ceb8f..d0b5820d 100644 --- a/tests/operations/index.mjs +++ b/tests/operations/index.mjs @@ -170,6 +170,7 @@ import "./tests/JA3Fingerprint.mjs"; import "./tests/JA3SFingerprint.mjs"; import "./tests/HASSH.mjs"; import "./tests/JSONtoYAML.mjs"; +import "./tests/WordCount.mjs"; // Cannot test operations that use the File type yet // import "./tests/SplitColourChannels.mjs"; diff --git a/tests/operations/tests/WordCount.mjs b/tests/operations/tests/WordCount.mjs new file mode 100644 index 00000000..41f67048 --- /dev/null +++ b/tests/operations/tests/WordCount.mjs @@ -0,0 +1,117 @@ +/** + * @author sw5678 + * @copyright Crown Copyright 2023 + * @license Apache-2.0 + */ +import TestRegister from "../../lib/TestRegister.mjs"; + +TestRegister.addTests([ + { + "name": "Word Count: Empty test 1", + "input": "", + "expectedOutput": "WORD,COUNT\nTOTAL,0", + + "recipeConfig": [ + { + "op": "Word Count", + "args": ["Space", true, "Alphabetical"], + }, + ], + }, + { + "name": "Word Count: Empty test 2", + "input": "", + "expectedOutput": "WORD,COUNT\nTOTAL,0", + + "recipeConfig": [ + { + "op": "Word Count", + "args": ["Space", true, "Count"], + }, + ], + }, + { + "name": "Word Count: Empty test 3", + "input": "", + "expectedOutput": "WORD,COUNT\n", + + "recipeConfig": [ + { + "op": "Word Count", + "args": ["Space", false, "Alphabetical"], + }, + ], + }, + { + "name": "Word Count: Empty test 4", + "input": "", + "expectedOutput": "WORD,COUNT\n", + + "recipeConfig": [ + { + "op": "Word Count", + "args": ["Space", false, "Count"], + }, + ], + }, + { + "name": "Word Count: Count test 1", + "input": "Hello world. Hello. \n\n World, ''!@£$%^&*()_+=-[]{};'|:/.,<>? world", + "expectedOutput": "WORD,COUNT\nhello,2\nworld,3\nTOTAL,5", + + "recipeConfig": [ + { + "op": "Word Count", + "args": ["Space", true, "Alphabetical"], + }, + ], + }, + { + "name": "Word Count: Count test 2", + "input": "Hello world. Hello. \n\n World, ''!@£$%^&*()_+=-[]{};'|:/.,<>? world", + "expectedOutput": "WORD,COUNT\nworld,3\nhello,2\nTOTAL,5", + + "recipeConfig": [ + { + "op": "Word Count", + "args": ["Space", true, "Count"], + }, + ], + }, + { + "name": "Word Count: Count test 3", + "input": "Hello world. Hello. \n\n World, ''!@£$%^&*()_+=-[]{};'|:/.,<>? world", + "expectedOutput": "WORD,COUNT\nhello,2\nworld,3\n", + + "recipeConfig": [ + { + "op": "Word Count", + "args": ["Space", false, "Alphabetical"], + }, + ], + }, + { + "name": "Word Count: Count test 4", + "input": "Hello world. Hello. \n\n World, ''!@£$%^&*()_+=-[]{};'|:/.,<>? world", + "expectedOutput": "WORD,COUNT\nworld,3\nhello,2\n", + + "recipeConfig": [ + { + "op": "Word Count", + "args": ["Space", false, "Count"], + }, + ], + }, + { + "name": "Word Count: Different delimiter test", + "input": "Hello, World\nhello, world \n''!@£$%^&*()_+=-[]{};'|:/.,<>? world", + "expectedOutput": "WORD,COUNT\nworld,3\nhello,2\n", + + "recipeConfig": [ + { + "op": "Word Count", + "args": ["Comma", false, "Count"], + }, + ], + } +]); \ No newline at end of file From cbf8a7773ce741af8af71eddfb0191676b74896a Mon Sep 17 00:00:00 2001 From: sw5678 <151949597+sw5678@users.noreply.github.com> Date: Mon, 12 May 2025 14:06:57 +0100 Subject: [PATCH 2/3] Fixed linting errors --- src/core/operations/WordCount.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/operations/WordCount.mjs b/src/core/operations/WordCount.mjs index 65260e53..a94eb0ac 100644 --- a/src/core/operations/WordCount.mjs +++ b/src/core/operations/WordCount.mjs @@ -113,4 +113,4 @@ class WordCount extends Operation { } } -export default WordCount; \ No newline at end of file +export default WordCount; From 3366fe3b026b6e4ab248f67cd7bc6c555efd71ae Mon Sep 17 00:00:00 2001 From: sw5678 <151949597+sw5678@users.noreply.github.com> Date: Mon, 12 May 2025 14:09:24 +0100 Subject: [PATCH 3/3] Fixed linting errors --- tests/operations/tests/WordCount.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/operations/tests/WordCount.mjs b/tests/operations/tests/WordCount.mjs index 41f67048..bc518de2 100644 --- a/tests/operations/tests/WordCount.mjs +++ b/tests/operations/tests/WordCount.mjs @@ -114,4 +114,4 @@ TestRegister.addTests([ }, ], } -]); \ No newline at end of file +]);