Levenshtein operation and operation search improved

This commit is contained in:
n1073645 2020-01-21 16:36:44 +00:00
parent ace8121d0e
commit 378c1ba07b
6 changed files with 195 additions and 6 deletions

View file

@ -228,6 +228,7 @@
"Regular expression",
"Offset checker",
"Hamming Distance",
"Levenshtein Distance",
"Convert distance",
"Convert area",
"Convert mass",

View file

@ -0,0 +1,52 @@
/**
* Levenshtein distance library.
*
* @author n1073645 [n1073645@gmail.com]
* @copyright Crown Copyright 2020
* @license Apache-2.0
*/
/**
* Computes the Levenshtein distance betweeen two strings.
*
* @param {string} firstString
* @param {string} secString
*/
export function levenshteinDistance(firstString, secString) {
const firstStringLength = firstString.length;
const secStringLength = secString.length;
// If the string is empty
if (!firstStringLength)
return secStringLength;
if (!secStringLength)
return firstStringLength;
const distanceMatrix = Array(secStringLength + 1).fill(0).map(() => Array(firstStringLength + 1).fill(0));
// Fill in first row distances.
for (let i = 0; i <= firstStringLength; i++)
distanceMatrix[0][i] = i;
// Fill in column distances.
for (let i = 0; i <= secStringLength; i++)
distanceMatrix[i][0] = i;
// Propogate the values through the matrix, the leading diagonal holds the total difference at any point.
for (let j = 1; j <= secStringLength; j++) {
for (let i = 1; i <= firstStringLength; i++) {
const indicator = firstString[i - 1] === secString[j - 1] ? 0 : 1;
distanceMatrix[j][i] = Math.min(
distanceMatrix[j][i - 1] + 1, // Deletion
distanceMatrix[j - 1][i] + 1, // Insertion
distanceMatrix[j - 1][i - 1] + indicator, // Substitution
);
}
}
return distanceMatrix[secStringLength][firstStringLength];
}
export default levenshteinDistance;

View file

@ -0,0 +1,46 @@
/**
* @author n1073645 [n1073645@gmail.com]
* @copyright Crown Copyright 2020
* @license Apache-2.0
*/
import Operation from "../Operation.mjs";
import levenshteinDistance from "../lib/Levenshtein.mjs";
import OperationError from "../errors/OperationError.mjs";
/**
* Levenshtein Distance operation
*/
class LevenshteinDistance extends Operation {
/**
* LevenshteinDistance constructor
*/
constructor() {
super();
this.name = "Levenshtein Distance";
this.module = "Utils";
this.description = "Computes the distance between two strings. For example 'the' and 'thy' have a distance of one since it takes one character substitution to transform one to the other. The two strings should be separated by a double newline.";
this.infoURL = "https://wikipedia.org/wiki/Levenshtein_distance";
this.inputType = "string";
this.outputType = "string";
this.args = [];
}
/**
* @param {string} input
* @param {Object[]} args
* @returns {string}
*/
run(input, args) {
const index = input.indexOf("\n\n");
if (index === -1)
throw new OperationError("Error: double newline not present.");
return "Levenshtein Distance: " + levenshteinDistance(input.slice(0, index), input.slice(index+2)).toString(10);
}
}
export default LevenshteinDistance;

View file

@ -6,6 +6,7 @@
import HTMLOperation from "../HTMLOperation.mjs";
import Sortable from "sortablejs";
import levenshteinDistance from "../../core/lib/Levenshtein.mjs";
/**
@ -107,29 +108,74 @@ class OperationsWaiter {
filterOperations(inStr, highlight) {
const matchedOps = [];
const matchedDescs = [];
const levenOps = [[], [], []];
const searchStr = inStr.toLowerCase();
for (const opName in this.app.operations) {
const op = this.app.operations[opName];
const namePos = opName.toLowerCase().indexOf(searchStr);
const descPos = op.description.toLowerCase().indexOf(searchStr);
const opNameLower = opName.toLowerCase();
const namePos = opNameLower.indexOf(searchStr);
const descPos = op.description.indexOf(searchStr);
let operation, added = false;
if (namePos >= 0 || descPos >= 0) {
const operation = new HTMLOperation(opName, this.app.operations[opName], this.app, this.manager);
operation = new HTMLOperation(opName, this.app.operations[opName], this.app, this.manager);
if (highlight) {
operation.highlightSearchString(searchStr, namePos, descPos);
}
if (namePos < 0) {
if (namePos >= 0) {
matchedOps.push(operation);
} else {
matchedDescs.push(operation);
}
added = true;
}
if (!added) {
if (this.stringDistance(searchStr, opNameLower)) {
operation = new HTMLOperation(opName, this.app.operations[opName], this.app, this.manager);
levenOps[this.result].push(operation);
}
}
}
return matchedDescs.concat(matchedOps);
return matchedOps.concat(matchedDescs.concat(levenOps[0].concat(levenOps[1].concat(levenOps[2]))));
}
/**
* Controls how much distance there is between the strings based on the length of the strings.
*
* @param {string} inStr
* @param {string} opName
*/
stringDistance(inStr, opName) {
let opNameLength = opName.length;
const inStrLength = inStr.length;
// If the input string is less than the length of the operation name then sub-sample the operation name and vice-versa.
if (inStrLength < opNameLength) {
opName = opName.slice(0, inStrLength);
opNameLength = opName.length;
}
this.result = -1;
// If the search string is short then do not run the distance algorithm.
if (inStrLength <= 2 || opNameLength <= 2)
return false;
// If the search string is length 3 then the user is allowed 1 error.
if (inStrLength < 4 || opNameLength < 4) {
this.result = levenshteinDistance(inStr, opName);
return this.result < 2;
}
// If the search string is length 4 or above the user is allowed 2 errors.
this.result = levenshteinDistance(inStr, opName);
return this.result < 3;
}

View file

@ -96,7 +96,7 @@ import "./tests/DefangIP.mjs";
import "./tests/ParseUDP.mjs";
import "./tests/AvroToJSON.mjs";
import "./tests/Lorenz.mjs";
import "./tests/LevenshteinDistance.mjs";
// Cannot test operations that use the File type yet
// import "./tests/SplitColourChannels.mjs";

View file

@ -0,0 +1,44 @@
/**
* Levenshtein distance tests.
*
* @author n1073645 [n1073645@gmail.com]
* @copyright Crown Copyright 2020
* @license Apache-2.0
*/
import TestRegister from "../../lib/TestRegister.mjs";
TestRegister.addTests([
{
name: "Levenshtein Distance",
input: "the quick brown fox\n\nthequick br0wn g0x",
expectedOutput: "Levenshtein Distance: 4",
recipeConfig: [
{
"op": "Levenshtein Distance",
"args": []
}
],
},
{
name: "Levenshtein Distance",
input: "the quick brown fox\nthequick br0wn g0x",
expectedOutput: "Error: double newline not present.",
recipeConfig: [
{
"op": "Levenshtein Distance",
"args": []
}
],
},
{
name: "Levenshtein Distance",
input: "the quick brown fox\n\n",
expectedOutput: "Levenshtein Distance: 19",
recipeConfig: [
{
"op": "Levenshtein Distance",
"args": []
}
],
}
]);