mirror of
https://github.com/gchq/CyberChef.git
synced 2025-05-11 16:51:31 -04:00
Levenshtein operation and operation search improved
This commit is contained in:
parent
ace8121d0e
commit
378c1ba07b
6 changed files with 195 additions and 6 deletions
|
@ -228,6 +228,7 @@
|
|||
"Regular expression",
|
||||
"Offset checker",
|
||||
"Hamming Distance",
|
||||
"Levenshtein Distance",
|
||||
"Convert distance",
|
||||
"Convert area",
|
||||
"Convert mass",
|
||||
|
|
52
src/core/lib/Levenshtein.mjs
Normal file
52
src/core/lib/Levenshtein.mjs
Normal file
|
@ -0,0 +1,52 @@
|
|||
/**
|
||||
* Levenshtein distance library.
|
||||
*
|
||||
* @author n1073645 [n1073645@gmail.com]
|
||||
* @copyright Crown Copyright 2020
|
||||
* @license Apache-2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* Computes the Levenshtein distance betweeen two strings.
|
||||
*
|
||||
* @param {string} firstString
|
||||
* @param {string} secString
|
||||
*/
|
||||
export function levenshteinDistance(firstString, secString) {
|
||||
|
||||
const firstStringLength = firstString.length;
|
||||
const secStringLength = secString.length;
|
||||
|
||||
// If the string is empty
|
||||
if (!firstStringLength)
|
||||
return secStringLength;
|
||||
|
||||
if (!secStringLength)
|
||||
return firstStringLength;
|
||||
|
||||
const distanceMatrix = Array(secStringLength + 1).fill(0).map(() => Array(firstStringLength + 1).fill(0));
|
||||
|
||||
// Fill in first row distances.
|
||||
for (let i = 0; i <= firstStringLength; i++)
|
||||
distanceMatrix[0][i] = i;
|
||||
|
||||
// Fill in column distances.
|
||||
for (let i = 0; i <= secStringLength; i++)
|
||||
distanceMatrix[i][0] = i;
|
||||
|
||||
// Propogate the values through the matrix, the leading diagonal holds the total difference at any point.
|
||||
for (let j = 1; j <= secStringLength; j++) {
|
||||
for (let i = 1; i <= firstStringLength; i++) {
|
||||
const indicator = firstString[i - 1] === secString[j - 1] ? 0 : 1;
|
||||
distanceMatrix[j][i] = Math.min(
|
||||
distanceMatrix[j][i - 1] + 1, // Deletion
|
||||
distanceMatrix[j - 1][i] + 1, // Insertion
|
||||
distanceMatrix[j - 1][i - 1] + indicator, // Substitution
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return distanceMatrix[secStringLength][firstStringLength];
|
||||
}
|
||||
|
||||
export default levenshteinDistance;
|
46
src/core/operations/LevenshteinDistance.mjs
Normal file
46
src/core/operations/LevenshteinDistance.mjs
Normal file
|
@ -0,0 +1,46 @@
|
|||
/**
|
||||
* @author n1073645 [n1073645@gmail.com]
|
||||
* @copyright Crown Copyright 2020
|
||||
* @license Apache-2.0
|
||||
*/
|
||||
|
||||
import Operation from "../Operation.mjs";
|
||||
import levenshteinDistance from "../lib/Levenshtein.mjs";
|
||||
import OperationError from "../errors/OperationError.mjs";
|
||||
|
||||
|
||||
/**
|
||||
* Levenshtein Distance operation
|
||||
*/
|
||||
class LevenshteinDistance extends Operation {
|
||||
|
||||
/**
|
||||
* LevenshteinDistance constructor
|
||||
*/
|
||||
constructor() {
|
||||
super();
|
||||
|
||||
this.name = "Levenshtein Distance";
|
||||
this.module = "Utils";
|
||||
this.description = "Computes the distance between two strings. For example 'the' and 'thy' have a distance of one since it takes one character substitution to transform one to the other. The two strings should be separated by a double newline.";
|
||||
this.infoURL = "https://wikipedia.org/wiki/Levenshtein_distance";
|
||||
this.inputType = "string";
|
||||
this.outputType = "string";
|
||||
this.args = [];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} input
|
||||
* @param {Object[]} args
|
||||
* @returns {string}
|
||||
*/
|
||||
run(input, args) {
|
||||
const index = input.indexOf("\n\n");
|
||||
if (index === -1)
|
||||
throw new OperationError("Error: double newline not present.");
|
||||
return "Levenshtein Distance: " + levenshteinDistance(input.slice(0, index), input.slice(index+2)).toString(10);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
export default LevenshteinDistance;
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
import HTMLOperation from "../HTMLOperation.mjs";
|
||||
import Sortable from "sortablejs";
|
||||
import levenshteinDistance from "../../core/lib/Levenshtein.mjs";
|
||||
|
||||
|
||||
/**
|
||||
|
@ -107,29 +108,74 @@ class OperationsWaiter {
|
|||
filterOperations(inStr, highlight) {
|
||||
const matchedOps = [];
|
||||
const matchedDescs = [];
|
||||
const levenOps = [[], [], []];
|
||||
|
||||
const searchStr = inStr.toLowerCase();
|
||||
|
||||
for (const opName in this.app.operations) {
|
||||
const op = this.app.operations[opName];
|
||||
const namePos = opName.toLowerCase().indexOf(searchStr);
|
||||
const descPos = op.description.toLowerCase().indexOf(searchStr);
|
||||
const opNameLower = opName.toLowerCase();
|
||||
const namePos = opNameLower.indexOf(searchStr);
|
||||
const descPos = op.description.indexOf(searchStr);
|
||||
let operation, added = false;
|
||||
|
||||
if (namePos >= 0 || descPos >= 0) {
|
||||
const operation = new HTMLOperation(opName, this.app.operations[opName], this.app, this.manager);
|
||||
operation = new HTMLOperation(opName, this.app.operations[opName], this.app, this.manager);
|
||||
if (highlight) {
|
||||
operation.highlightSearchString(searchStr, namePos, descPos);
|
||||
}
|
||||
|
||||
if (namePos < 0) {
|
||||
if (namePos >= 0) {
|
||||
matchedOps.push(operation);
|
||||
} else {
|
||||
matchedDescs.push(operation);
|
||||
}
|
||||
added = true;
|
||||
}
|
||||
|
||||
if (!added) {
|
||||
if (this.stringDistance(searchStr, opNameLower)) {
|
||||
operation = new HTMLOperation(opName, this.app.operations[opName], this.app, this.manager);
|
||||
levenOps[this.result].push(operation);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return matchedDescs.concat(matchedOps);
|
||||
return matchedOps.concat(matchedDescs.concat(levenOps[0].concat(levenOps[1].concat(levenOps[2]))));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Controls how much distance there is between the strings based on the length of the strings.
|
||||
*
|
||||
* @param {string} inStr
|
||||
* @param {string} opName
|
||||
*/
|
||||
stringDistance(inStr, opName) {
|
||||
let opNameLength = opName.length;
|
||||
const inStrLength = inStr.length;
|
||||
|
||||
// If the input string is less than the length of the operation name then sub-sample the operation name and vice-versa.
|
||||
if (inStrLength < opNameLength) {
|
||||
opName = opName.slice(0, inStrLength);
|
||||
opNameLength = opName.length;
|
||||
}
|
||||
|
||||
this.result = -1;
|
||||
|
||||
// If the search string is short then do not run the distance algorithm.
|
||||
if (inStrLength <= 2 || opNameLength <= 2)
|
||||
return false;
|
||||
|
||||
// If the search string is length 3 then the user is allowed 1 error.
|
||||
if (inStrLength < 4 || opNameLength < 4) {
|
||||
this.result = levenshteinDistance(inStr, opName);
|
||||
return this.result < 2;
|
||||
}
|
||||
|
||||
// If the search string is length 4 or above the user is allowed 2 errors.
|
||||
this.result = levenshteinDistance(inStr, opName);
|
||||
return this.result < 3;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -96,7 +96,7 @@ import "./tests/DefangIP.mjs";
|
|||
import "./tests/ParseUDP.mjs";
|
||||
import "./tests/AvroToJSON.mjs";
|
||||
import "./tests/Lorenz.mjs";
|
||||
|
||||
import "./tests/LevenshteinDistance.mjs";
|
||||
|
||||
// Cannot test operations that use the File type yet
|
||||
// import "./tests/SplitColourChannels.mjs";
|
||||
|
|
44
tests/operations/tests/LevenshteinDistance.mjs
Normal file
44
tests/operations/tests/LevenshteinDistance.mjs
Normal file
|
@ -0,0 +1,44 @@
|
|||
/**
|
||||
* Levenshtein distance tests.
|
||||
*
|
||||
* @author n1073645 [n1073645@gmail.com]
|
||||
* @copyright Crown Copyright 2020
|
||||
* @license Apache-2.0
|
||||
*/
|
||||
import TestRegister from "../../lib/TestRegister.mjs";
|
||||
|
||||
TestRegister.addTests([
|
||||
{
|
||||
name: "Levenshtein Distance",
|
||||
input: "the quick brown fox\n\nthequick br0wn g0x",
|
||||
expectedOutput: "Levenshtein Distance: 4",
|
||||
recipeConfig: [
|
||||
{
|
||||
"op": "Levenshtein Distance",
|
||||
"args": []
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "Levenshtein Distance",
|
||||
input: "the quick brown fox\nthequick br0wn g0x",
|
||||
expectedOutput: "Error: double newline not present.",
|
||||
recipeConfig: [
|
||||
{
|
||||
"op": "Levenshtein Distance",
|
||||
"args": []
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "Levenshtein Distance",
|
||||
input: "the quick brown fox\n\n",
|
||||
expectedOutput: "Levenshtein Distance: 19",
|
||||
recipeConfig: [
|
||||
{
|
||||
"op": "Levenshtein Distance",
|
||||
"args": []
|
||||
}
|
||||
],
|
||||
}
|
||||
]);
|
Loading…
Add table
Add a link
Reference in a new issue