From 2f60c4f03c512fb593ab57d28639ad932c0d82c7 Mon Sep 17 00:00:00 2001 From: Wojciech Szostak Date: Fri, 15 Oct 2021 17:51:27 +0200 Subject: [PATCH] Remove all diacritics using latinize --- package-lock.json | 5 +++++ package.json | 1 + src/core/operations/RemoveDiacritics.mjs | 3 ++- tests/operations/tests/Unicode.mjs | 11 +++++++++++ 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/package-lock.json b/package-lock.json index 6fa9016e..ff66cb26 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8326,6 +8326,11 @@ "integrity": "sha512-ZRbnvdg/NxqzC7L9Uyqzf4psi1OM4Cuc+sJAkQPjO6XkQIJTNbfK2Rsmbw8fx1p2mkZdp2FZYo2+LwXYY/uwIA==", "dev": true }, + "latinize": { + "version": "0.5.0", + "resolved": "https://registry.npmjs.org/latinize/-/latinize-0.5.0.tgz", + "integrity": "sha512-SHzxgdcFP/64lUEfX3183QALY2KdSQxad3gmhCc/b03QN1mbx0AnJWvsQjqoJLbucY9pJuK+NMbnasUIocDmnQ==" + }, "levn": { "version": "0.3.0", "resolved": "https://registry.npmjs.org/levn/-/levn-0.3.0.tgz", diff --git a/package.json b/package.json index 78a8696b..1dd82d87 100644 --- a/package.json +++ b/package.json @@ -128,6 +128,7 @@ "jsqr": "^1.4.0", "jsrsasign": "^10.4.0", "kbpgp": "2.1.15", + "latinize": "0.5.0", "libbzip2-wasm": "0.0.4", "libyara-wasm": "^1.1.0", "lodash": "^4.17.21", diff --git a/src/core/operations/RemoveDiacritics.mjs b/src/core/operations/RemoveDiacritics.mjs index 859d86d7..e74cfd82 100644 --- a/src/core/operations/RemoveDiacritics.mjs +++ b/src/core/operations/RemoveDiacritics.mjs @@ -5,6 +5,7 @@ */ import Operation from "../Operation.mjs"; +import latinize from "latinize"; /** * Remove Diacritics operation @@ -33,7 +34,7 @@ class RemoveDiacritics extends Operation { */ run(input, args) { // reference: https://stackoverflow.com/questions/990904/remove-accents-diacritics-in-a-string-in-javascript/37511463 - return input.normalize("NFD").replace(/[\u0300-\u036f]/g, ""); + return latinize(input.normalize("NFD").replace(/[\u0300-\u036f]/g, "")); } } diff --git a/tests/operations/tests/Unicode.mjs b/tests/operations/tests/Unicode.mjs index 2603768f..f801b765 100644 --- a/tests/operations/tests/Unicode.mjs +++ b/tests/operations/tests/Unicode.mjs @@ -80,4 +80,15 @@ TestRegister.addTests([ }, ], }, + { + name: "Remove Diacritics: polish letter ł", + input: "zażółć gęślą jaźń ZAŻÓŁĆ GĘŚLĄ JAŹŃ", + expectedOutput: "zazolc gesla jazn ZAZOLC GESLA JAZN", + recipeConfig: [ + { + "op": "Remove Diacritics", + "args": [] + }, + ], + }, ]);