From 825dd6910027a1d0957649cdfc7e9c296c937f01 Mon Sep 17 00:00:00 2001 From: wszostak Date: Tue, 5 Oct 2021 14:05:28 +0200 Subject: [PATCH] Convert LATIN LETTER L WITH STROKE to LATIN L MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because polish letter "ł" is not accented letter but separate character `normalize("NFD")` doesn't work so we need another approach. See also: - https://www.fileformat.info/info/unicode/char/0142/index.htm - https://www.fileformat.info/info/unicode/char/0141/index.htm --- src/core/operations/RemoveDiacritics.mjs | 4 +++- tests/operations/tests/Unicode.mjs | 11 +++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/core/operations/RemoveDiacritics.mjs b/src/core/operations/RemoveDiacritics.mjs index 859d86d7..4ab2df2c 100644 --- a/src/core/operations/RemoveDiacritics.mjs +++ b/src/core/operations/RemoveDiacritics.mjs @@ -33,7 +33,9 @@ class RemoveDiacritics extends Operation { */ run(input, args) { // reference: https://stackoverflow.com/questions/990904/remove-accents-diacritics-in-a-string-in-javascript/37511463 - return input.normalize("NFD").replace(/[\u0300-\u036f]/g, ""); + return input.normalize("NFD") + .replace(/\u0142/g, "l").replace(/\u0141/g, "L") + .replace(/[\u0300-\u036f]/g, ""); } } diff --git a/tests/operations/tests/Unicode.mjs b/tests/operations/tests/Unicode.mjs index 2603768f..f801b765 100644 --- a/tests/operations/tests/Unicode.mjs +++ b/tests/operations/tests/Unicode.mjs @@ -80,4 +80,15 @@ TestRegister.addTests([ }, ], }, + { + name: "Remove Diacritics: polish letter ł", + input: "zażółć gęślą jaźń ZAŻÓŁĆ GĘŚLĄ JAŹŃ", + expectedOutput: "zazolc gesla jazn ZAZOLC GESLA JAZN", + recipeConfig: [ + { + "op": "Remove Diacritics", + "args": [] + }, + ], + }, ]);