Add operation to normalise unicode

This commit is contained in:
Matthieu 2019-11-25 22:53:31 +01:00
parent 610d46a1a4
commit a6fa0628f2
7 changed files with 134 additions and 0 deletions

View file

@ -39,6 +39,7 @@
"URL Decode",
"Escape Unicode Characters",
"Unescape Unicode Characters",
"Normalise Unicode",
"To Quoted Printable",
"From Quoted Printable",
"To Punycode",

View file

@ -164,3 +164,15 @@ export const IO_FORMAT = {
"Simplified Chinese GB18030 (54936)": 54936,
};
/**
* Unicode Normalisation Forms
*
* @author Matthieu [m@tthieu.xyz]
* @copyright Crown Copyright 2016
* @license Apache-2.0
*/
/**
* Character encoding format mappings.
*/
export const UNICODE_NORMALISATION_FORMS = ["NFD", "NFC", "NFKD", "NFKC"];

View file

@ -0,0 +1,60 @@
/**
* @author Matthieu [m@tthieu.xyz]
* @copyright Crown Copyright 2019
* @license Apache-2.0
*/
import Operation from "../Operation.mjs";
import OperationError from "../errors/OperationError.mjs";
import unorm from "unorm";
import {UNICODE_NORMALISATION_FORMS} from "../lib/ChrEnc";
/**
* Normalise Unicode operation
*/
class NormaliseUnicode extends Operation {
/**
* NormaliseUnicode constructor
*/
constructor() {
super();
this.name = "Normalise Unicode";
this.module = "UnicodeNormalisation";
this.description = "Transform Unicode to one of the Normalisation Form";
this.infoURL = "http://www.unicode.org/reports/tr15/";
this.inputType = "string";
this.outputType = "string";
this.args = [
{
name: "Normal Form",
type: "option",
value: UNICODE_NORMALISATION_FORMS
}
];
}
/**
* @param {string} input
* @param {Object[]} args
* @returns {string}
*/
run(input, args) {
const [normalForm] = args;
if (normalForm === "NFD") {
return unorm.nfd(input);
} else if (normalForm === "NFC") {
return unorm.nfc(input);
} else if (normalForm === "NFKD") {
return unorm.nfkd(input);
} else if (normalForm === "NFKC") {
return unorm.nfc(input);
}
throw new OperationError("Unknown Normalisation Form");
}
}
export default NormaliseUnicode;