diff --git a/src/core/config/Categories.json b/src/core/config/Categories.json index 53ca796d..5852f686 100755 --- a/src/core/config/Categories.json +++ b/src/core/config/Categories.json @@ -228,6 +228,7 @@ "Regular expression", "Offset checker", "Hamming Distance", + "Levenshtein Distance", "Convert distance", "Convert area", "Convert mass", diff --git a/src/core/lib/Levenshtein.mjs b/src/core/lib/Levenshtein.mjs new file mode 100644 index 00000000..9cc209be --- /dev/null +++ b/src/core/lib/Levenshtein.mjs @@ -0,0 +1,52 @@ +/** + * Levenshtein distance library. + * + * @author n1073645 [n1073645@gmail.com] + * @copyright Crown Copyright 2020 + * @license Apache-2.0 + */ + +/** + * Computes the Levenshtein distance betweeen two strings. + * + * @param {string} firstString + * @param {string} secString + */ +export function levenshteinDistance(firstString, secString) { + + const firstStringLength = firstString.length; + const secStringLength = secString.length; + + // If the string is empty + if (!firstStringLength) + return secStringLength; + + if (!secStringLength) + return firstStringLength; + + const distanceMatrix = Array(secStringLength + 1).fill(0).map(() => Array(firstStringLength + 1).fill(0)); + + // Fill in first row distances. + for (let i = 0; i <= firstStringLength; i++) + distanceMatrix[0][i] = i; + + // Fill in column distances. + for (let i = 0; i <= secStringLength; i++) + distanceMatrix[i][0] = i; + + // Propogate the values through the matrix, the leading diagonal holds the total difference at any point. + for (let j = 1; j <= secStringLength; j++) { + for (let i = 1; i <= firstStringLength; i++) { + const indicator = firstString[i - 1] === secString[j - 1] ? 0 : 1; + distanceMatrix[j][i] = Math.min( + distanceMatrix[j][i - 1] + 1, // Deletion + distanceMatrix[j - 1][i] + 1, // Insertion + distanceMatrix[j - 1][i - 1] + indicator, // Substitution + ); + } + } + + return distanceMatrix[secStringLength][firstStringLength]; +} + +export default levenshteinDistance; diff --git a/src/core/operations/LevenshteinDistance.mjs b/src/core/operations/LevenshteinDistance.mjs new file mode 100644 index 00000000..af667492 --- /dev/null +++ b/src/core/operations/LevenshteinDistance.mjs @@ -0,0 +1,46 @@ +/** + * @author n1073645 [n1073645@gmail.com] + * @copyright Crown Copyright 2020 + * @license Apache-2.0 + */ + +import Operation from "../Operation.mjs"; +import levenshteinDistance from "../lib/Levenshtein.mjs"; +import OperationError from "../errors/OperationError.mjs"; + + +/** + * Levenshtein Distance operation + */ +class LevenshteinDistance extends Operation { + + /** + * LevenshteinDistance constructor + */ + constructor() { + super(); + + this.name = "Levenshtein Distance"; + this.module = "Utils"; + this.description = "Computes the distance between two strings. For example 'the' and 'thy' have a distance of one since it takes one character substitution to transform one to the other. The two strings should be separated by a double newline."; + this.infoURL = "https://wikipedia.org/wiki/Levenshtein_distance"; + this.inputType = "string"; + this.outputType = "string"; + this.args = []; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + run(input, args) { + const index = input.indexOf("\n\n"); + if (index === -1) + throw new OperationError("Error: double newline not present."); + return "Levenshtein Distance: " + levenshteinDistance(input.slice(0, index), input.slice(index+2)).toString(10); + } + +} + +export default LevenshteinDistance; diff --git a/src/web/waiters/OperationsWaiter.mjs b/src/web/waiters/OperationsWaiter.mjs index 4a591249..4f0bfe85 100755 --- a/src/web/waiters/OperationsWaiter.mjs +++ b/src/web/waiters/OperationsWaiter.mjs @@ -6,6 +6,7 @@ import HTMLOperation from "../HTMLOperation.mjs"; import Sortable from "sortablejs"; +import levenshteinDistance from "../../core/lib/Levenshtein.mjs"; /** @@ -107,29 +108,74 @@ class OperationsWaiter { filterOperations(inStr, highlight) { const matchedOps = []; const matchedDescs = []; + const levenOps = [[], [], []]; const searchStr = inStr.toLowerCase(); for (const opName in this.app.operations) { const op = this.app.operations[opName]; - const namePos = opName.toLowerCase().indexOf(searchStr); - const descPos = op.description.toLowerCase().indexOf(searchStr); + const opNameLower = opName.toLowerCase(); + const namePos = opNameLower.indexOf(searchStr); + const descPos = op.description.indexOf(searchStr); + let operation, added = false; if (namePos >= 0 || descPos >= 0) { - const operation = new HTMLOperation(opName, this.app.operations[opName], this.app, this.manager); + operation = new HTMLOperation(opName, this.app.operations[opName], this.app, this.manager); if (highlight) { operation.highlightSearchString(searchStr, namePos, descPos); } - if (namePos < 0) { + if (namePos >= 0) { matchedOps.push(operation); } else { matchedDescs.push(operation); } + added = true; + } + + if (!added) { + if (this.stringDistance(searchStr, opNameLower)) { + operation = new HTMLOperation(opName, this.app.operations[opName], this.app, this.manager); + levenOps[this.result].push(operation); + } } } - return matchedDescs.concat(matchedOps); + return matchedOps.concat(matchedDescs.concat(levenOps[0].concat(levenOps[1].concat(levenOps[2])))); + } + + + /** + * Controls how much distance there is between the strings based on the length of the strings. + * + * @param {string} inStr + * @param {string} opName + */ + stringDistance(inStr, opName) { + let opNameLength = opName.length; + const inStrLength = inStr.length; + + // If the input string is less than the length of the operation name then sub-sample the operation name and vice-versa. + if (inStrLength < opNameLength) { + opName = opName.slice(0, inStrLength); + opNameLength = opName.length; + } + + this.result = -1; + + // If the search string is short then do not run the distance algorithm. + if (inStrLength <= 2 || opNameLength <= 2) + return false; + + // If the search string is length 3 then the user is allowed 1 error. + if (inStrLength < 4 || opNameLength < 4) { + this.result = levenshteinDistance(inStr, opName); + return this.result < 2; + } + + // If the search string is length 4 or above the user is allowed 2 errors. + this.result = levenshteinDistance(inStr, opName); + return this.result < 3; } diff --git a/tests/operations/index.mjs b/tests/operations/index.mjs index bf440414..7e986d17 100644 --- a/tests/operations/index.mjs +++ b/tests/operations/index.mjs @@ -96,7 +96,7 @@ import "./tests/DefangIP.mjs"; import "./tests/ParseUDP.mjs"; import "./tests/AvroToJSON.mjs"; import "./tests/Lorenz.mjs"; - +import "./tests/LevenshteinDistance.mjs"; // Cannot test operations that use the File type yet // import "./tests/SplitColourChannels.mjs"; diff --git a/tests/operations/tests/LevenshteinDistance.mjs b/tests/operations/tests/LevenshteinDistance.mjs new file mode 100644 index 00000000..a31fa686 --- /dev/null +++ b/tests/operations/tests/LevenshteinDistance.mjs @@ -0,0 +1,44 @@ +/** + * Levenshtein distance tests. + * + * @author n1073645 [n1073645@gmail.com] + * @copyright Crown Copyright 2020 + * @license Apache-2.0 + */ +import TestRegister from "../../lib/TestRegister.mjs"; + +TestRegister.addTests([ + { + name: "Levenshtein Distance", + input: "the quick brown fox\n\nthequick br0wn g0x", + expectedOutput: "Levenshtein Distance: 4", + recipeConfig: [ + { + "op": "Levenshtein Distance", + "args": [] + } + ], + }, + { + name: "Levenshtein Distance", + input: "the quick brown fox\nthequick br0wn g0x", + expectedOutput: "Error: double newline not present.", + recipeConfig: [ + { + "op": "Levenshtein Distance", + "args": [] + } + ], + }, + { + name: "Levenshtein Distance", + input: "the quick brown fox\n\n", + expectedOutput: "Levenshtein Distance: 19", + recipeConfig: [ + { + "op": "Levenshtein Distance", + "args": [] + } + ], + } +]);