Remove all diacritics using latinize

This commit is contained in:
Wojciech Szostak 2021-10-15 17:51:27 +02:00
parent ae1b12c120
commit 2f60c4f03c
4 changed files with 19 additions and 1 deletions

5
package-lock.json generated
View file

@ -8326,6 +8326,11 @@
"integrity": "sha512-ZRbnvdg/NxqzC7L9Uyqzf4psi1OM4Cuc+sJAkQPjO6XkQIJTNbfK2Rsmbw8fx1p2mkZdp2FZYo2+LwXYY/uwIA==", "integrity": "sha512-ZRbnvdg/NxqzC7L9Uyqzf4psi1OM4Cuc+sJAkQPjO6XkQIJTNbfK2Rsmbw8fx1p2mkZdp2FZYo2+LwXYY/uwIA==",
"dev": true "dev": true
}, },
"latinize": {
"version": "0.5.0",
"resolved": "https://registry.npmjs.org/latinize/-/latinize-0.5.0.tgz",
"integrity": "sha512-SHzxgdcFP/64lUEfX3183QALY2KdSQxad3gmhCc/b03QN1mbx0AnJWvsQjqoJLbucY9pJuK+NMbnasUIocDmnQ=="
},
"levn": { "levn": {
"version": "0.3.0", "version": "0.3.0",
"resolved": "https://registry.npmjs.org/levn/-/levn-0.3.0.tgz", "resolved": "https://registry.npmjs.org/levn/-/levn-0.3.0.tgz",

View file

@ -128,6 +128,7 @@
"jsqr": "^1.4.0", "jsqr": "^1.4.0",
"jsrsasign": "^10.4.0", "jsrsasign": "^10.4.0",
"kbpgp": "2.1.15", "kbpgp": "2.1.15",
"latinize": "0.5.0",
"libbzip2-wasm": "0.0.4", "libbzip2-wasm": "0.0.4",
"libyara-wasm": "^1.1.0", "libyara-wasm": "^1.1.0",
"lodash": "^4.17.21", "lodash": "^4.17.21",

View file

@ -5,6 +5,7 @@
*/ */
import Operation from "../Operation.mjs"; import Operation from "../Operation.mjs";
import latinize from "latinize";
/** /**
* Remove Diacritics operation * Remove Diacritics operation
@ -33,7 +34,7 @@ class RemoveDiacritics extends Operation {
*/ */
run(input, args) { run(input, args) {
// reference: https://stackoverflow.com/questions/990904/remove-accents-diacritics-in-a-string-in-javascript/37511463 // reference: https://stackoverflow.com/questions/990904/remove-accents-diacritics-in-a-string-in-javascript/37511463
return input.normalize("NFD").replace(/[\u0300-\u036f]/g, ""); return latinize(input.normalize("NFD").replace(/[\u0300-\u036f]/g, ""));
} }
} }

View file

@ -80,4 +80,15 @@ TestRegister.addTests([
}, },
], ],
}, },
{
name: "Remove Diacritics: polish letter ł",
input: "zażółć gęślą jaźń ZAŻÓŁĆ GĘŚLĄ JAŹŃ",
expectedOutput: "zazolc gesla jazn ZAZOLC GESLA JAZN",
recipeConfig: [
{
"op": "Remove Diacritics",
"args": []
},
],
},
]); ]);