Remove all diacritics using latinize

This commit is contained in:
Wojciech Szostak 2021-10-15 17:51:27 +02:00
parent ae1b12c120
commit 2f60c4f03c
4 changed files with 19 additions and 1 deletions

5
package-lock.json generated
View file

@ -8326,6 +8326,11 @@
"integrity": "sha512-ZRbnvdg/NxqzC7L9Uyqzf4psi1OM4Cuc+sJAkQPjO6XkQIJTNbfK2Rsmbw8fx1p2mkZdp2FZYo2+LwXYY/uwIA==",
"dev": true
},
"latinize": {
"version": "0.5.0",
"resolved": "https://registry.npmjs.org/latinize/-/latinize-0.5.0.tgz",
"integrity": "sha512-SHzxgdcFP/64lUEfX3183QALY2KdSQxad3gmhCc/b03QN1mbx0AnJWvsQjqoJLbucY9pJuK+NMbnasUIocDmnQ=="
},
"levn": {
"version": "0.3.0",
"resolved": "https://registry.npmjs.org/levn/-/levn-0.3.0.tgz",

View file

@ -128,6 +128,7 @@
"jsqr": "^1.4.0",
"jsrsasign": "^10.4.0",
"kbpgp": "2.1.15",
"latinize": "0.5.0",
"libbzip2-wasm": "0.0.4",
"libyara-wasm": "^1.1.0",
"lodash": "^4.17.21",

View file

@ -5,6 +5,7 @@
*/
import Operation from "../Operation.mjs";
import latinize from "latinize";
/**
* Remove Diacritics operation
@ -33,7 +34,7 @@ class RemoveDiacritics extends Operation {
*/
run(input, args) {
// reference: https://stackoverflow.com/questions/990904/remove-accents-diacritics-in-a-string-in-javascript/37511463
return input.normalize("NFD").replace(/[\u0300-\u036f]/g, "");
return latinize(input.normalize("NFD").replace(/[\u0300-\u036f]/g, ""));
}
}

View file

@ -80,4 +80,15 @@ TestRegister.addTests([
},
],
},
{
name: "Remove Diacritics: polish letter ł",
input: "zażółć gęślą jaźń ZAŻÓŁĆ GĘŚLĄ JAŹŃ",
expectedOutput: "zazolc gesla jazn ZAZOLC GESLA JAZN",
recipeConfig: [
{
"op": "Remove Diacritics",
"args": []
},
],
},
]);