diff --git a/src/tools/text-to-unicode/text-to-unicode.service.test.ts b/src/tools/text-to-unicode/text-to-unicode.service.test.ts index 94a6fc62..7b4e7f74 100644 --- a/src/tools/text-to-unicode/text-to-unicode.service.test.ts +++ b/src/tools/text-to-unicode/text-to-unicode.service.test.ts @@ -3,7 +3,7 @@ import { type ConverterId, SKIP_ASCII_HTML, SKIP_ASCII_JS, converters } from './ describe('text-to-unicode (legacy tests)', () => { const convertTextToUnicode = (text: string) => converters.decimalEntities.escape(text, false); - const convertUnicodeToText = converters.decimalEntities.unescape; + const convertUnicodeToText = (escaped: string) => converters.decimalEntities.unescape(escaped); describe('convertTextToUnicode', () => { it('a text string is converted to unicode representation', () => { diff --git a/src/tools/text-to-unicode/text-to-unicode.service.ts b/src/tools/text-to-unicode/text-to-unicode.service.ts index 22568044..ac07fd31 100644 --- a/src/tools/text-to-unicode/text-to-unicode.service.ts +++ b/src/tools/text-to-unicode/text-to-unicode.service.ts @@ -11,70 +11,63 @@ function codePoints(text: string): number[] { return [...text].map(char => char.codePointAt(0)); } -export interface Converter { +interface ConverterConfig { name: string - escape(text: string, skipAscii: boolean): string - unescape(text: string): string -}; + escape: { + charValues?(text: string): number[] + mapper(charValue: number): string + /** @prop regular expression for default content to skip. Must have exactly 1 capture group. */ + asciiSkipper: RegExp + } + unescape: { + regex: RegExp + radix: number + } +} +class Converter { + constructor(public config: ConverterConfig) {} -interface EscapeConfig { - charValues?(text: string): number[] - mapper(charValue: number): string - /** @prop regular expression for default content to skip. Must have exactly 1 capture group. */ - asciiSkipper: RegExp -}; - -function escaper({ charValues: getCharValues, mapper, asciiSkipper: skipper }: EscapeConfig) { - return (text: string, skip: boolean): string => { - getCharValues ??= codePoints; + escape(text: string, skipAscii: boolean): string { + const { asciiSkipper, charValues, mapper } = this.config.escape; + const getCharValues = charValues ?? codePoints; return text - .split(skip ? skipper : SKIP_NOTHING_RE) - .flatMap((x, i) => { - if (i % 2) { - return x; - } - return getCharValues(x).map(mapper); - }) + .split(skipAscii ? asciiSkipper : SKIP_NOTHING_RE) + .flatMap((x, i) => i % 2 ? x : getCharValues(x).map(mapper)) .join(''); - }; -} + } -interface UnescapeConfig { - regex: RegExp - radix: number -}; + unescape(escaped: string): string { + const { regex, radix } = this.config.unescape; -function unescaper({ regex, radix }: UnescapeConfig) { - return (escaped: string): string => { return escaped.replace(regex, (match) => { return String.fromCodePoint(Number.parseInt(match.replace(/\P{AHex}/gu, ''), radix)); }); - }; + } } export type ConverterId = keyof typeof converters; const converters = { - fullUnicode: { + fullUnicode: new Converter({ name: 'Full Unicode', - escape: escaper({ mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }), - unescape: unescaper({ regex: /\\u\p{AHex}{4}|\\u\{\p{AHex}{1,6}\}/gu, radix: 16 }), - }, - utf16: { + escape: { mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }, + unescape: { regex: /\\u\p{AHex}{4}|\\u\{\p{AHex}{1,6}\}/gu, radix: 16 }, + }), + utf16: new Converter({ name: 'UTF-16 Code Units', - escape: escaper({ charValues: codeUnits, mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }), - unescape: unescaper({ regex: /\\u\p{AHex}{4}/gu, radix: 16 }), - }, - hexEntities: { + escape: { charValues: codeUnits, mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }, + unescape: { regex: /\\u\p{AHex}{4}/gu, radix: 16 }, + }), + hexEntities: new Converter({ name: 'HTML Entities (Hex)', - escape: escaper({ mapper: toHexEntities, asciiSkipper: SKIP_ASCII_HTML }), - unescape: unescaper({ regex: /&#x\p{AHex}{1,6};/gu, radix: 16 }), - }, - decimalEntities: { + escape: { mapper: toHexEntities, asciiSkipper: SKIP_ASCII_HTML }, + unescape: { regex: /&#x\p{AHex}{1,6};/gu, radix: 16 }, + }), + decimalEntities: new Converter({ name: 'HTML Entities (Decimal)', - escape: escaper({ mapper: toDecimalEntities, asciiSkipper: SKIP_ASCII_HTML }), - unescape: unescaper({ regex: /&#\d+;/gu, radix: 10 }), - }, + escape: { mapper: toDecimalEntities, asciiSkipper: SKIP_ASCII_HTML }, + unescape: { regex: /&#\d+;/gu, radix: 10 }, + }), } satisfies Record; function convertCodePointToUnicode(codePoint: number): string {