Convert Converter to class

This commit is contained in:
lionel-rowe 2024-05-15 11:09:06 +08:00
parent 1dc965d9a8
commit d75473b2e8
No known key found for this signature in database
2 changed files with 40 additions and 47 deletions

View file

@ -3,7 +3,7 @@ import { type ConverterId, SKIP_ASCII_HTML, SKIP_ASCII_JS, converters } from './
describe('text-to-unicode (legacy tests)', () => { describe('text-to-unicode (legacy tests)', () => {
const convertTextToUnicode = (text: string) => converters.decimalEntities.escape(text, false); const convertTextToUnicode = (text: string) => converters.decimalEntities.escape(text, false);
const convertUnicodeToText = converters.decimalEntities.unescape; const convertUnicodeToText = (escaped: string) => converters.decimalEntities.unescape(escaped);
describe('convertTextToUnicode', () => { describe('convertTextToUnicode', () => {
it('a text string is converted to unicode representation', () => { it('a text string is converted to unicode representation', () => {

View file

@ -11,70 +11,63 @@ function codePoints(text: string): number[] {
return [...text].map(char => char.codePointAt(0)); return [...text].map(char => char.codePointAt(0));
} }
export interface Converter { interface ConverterConfig {
name: string name: string
escape(text: string, skipAscii: boolean): string escape: {
unescape(text: string): string charValues?(text: string): number[]
}; mapper(charValue: number): string
/** @prop regular expression for default content to skip. Must have exactly 1 capture group. */
asciiSkipper: RegExp
}
unescape: {
regex: RegExp
radix: number
}
}
class Converter {
constructor(public config: ConverterConfig) {}
interface EscapeConfig { escape(text: string, skipAscii: boolean): string {
charValues?(text: string): number[] const { asciiSkipper, charValues, mapper } = this.config.escape;
mapper(charValue: number): string const getCharValues = charValues ?? codePoints;
/** @prop regular expression for default content to skip. Must have exactly 1 capture group. */
asciiSkipper: RegExp
};
function escaper({ charValues: getCharValues, mapper, asciiSkipper: skipper }: EscapeConfig) {
return (text: string, skip: boolean): string => {
getCharValues ??= codePoints;
return text return text
.split(skip ? skipper : SKIP_NOTHING_RE) .split(skipAscii ? asciiSkipper : SKIP_NOTHING_RE)
.flatMap((x, i) => { .flatMap((x, i) => i % 2 ? x : getCharValues(x).map(mapper))
if (i % 2) {
return x;
}
return getCharValues(x).map(mapper);
})
.join(''); .join('');
}; }
}
interface UnescapeConfig { unescape(escaped: string): string {
regex: RegExp const { regex, radix } = this.config.unescape;
radix: number
};
function unescaper({ regex, radix }: UnescapeConfig) {
return (escaped: string): string => {
return escaped.replace(regex, (match) => { return escaped.replace(regex, (match) => {
return String.fromCodePoint(Number.parseInt(match.replace(/\P{AHex}/gu, ''), radix)); return String.fromCodePoint(Number.parseInt(match.replace(/\P{AHex}/gu, ''), radix));
}); });
}; }
} }
export type ConverterId = keyof typeof converters; export type ConverterId = keyof typeof converters;
const converters = { const converters = {
fullUnicode: { fullUnicode: new Converter({
name: 'Full Unicode', name: 'Full Unicode',
escape: escaper({ mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }), escape: { mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS },
unescape: unescaper({ regex: /\\u\p{AHex}{4}|\\u\{\p{AHex}{1,6}\}/gu, radix: 16 }), unescape: { regex: /\\u\p{AHex}{4}|\\u\{\p{AHex}{1,6}\}/gu, radix: 16 },
}, }),
utf16: { utf16: new Converter({
name: 'UTF-16 Code Units', name: 'UTF-16 Code Units',
escape: escaper({ charValues: codeUnits, mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }), escape: { charValues: codeUnits, mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS },
unescape: unescaper({ regex: /\\u\p{AHex}{4}/gu, radix: 16 }), unescape: { regex: /\\u\p{AHex}{4}/gu, radix: 16 },
}, }),
hexEntities: { hexEntities: new Converter({
name: 'HTML Entities (Hex)', name: 'HTML Entities (Hex)',
escape: escaper({ mapper: toHexEntities, asciiSkipper: SKIP_ASCII_HTML }), escape: { mapper: toHexEntities, asciiSkipper: SKIP_ASCII_HTML },
unescape: unescaper({ regex: /&#x\p{AHex}{1,6};/gu, radix: 16 }), unescape: { regex: /&#x\p{AHex}{1,6};/gu, radix: 16 },
}, }),
decimalEntities: { decimalEntities: new Converter({
name: 'HTML Entities (Decimal)', name: 'HTML Entities (Decimal)',
escape: escaper({ mapper: toDecimalEntities, asciiSkipper: SKIP_ASCII_HTML }), escape: { mapper: toDecimalEntities, asciiSkipper: SKIP_ASCII_HTML },
unescape: unescaper({ regex: /&#\d+;/gu, radix: 10 }), unescape: { regex: /&#\d+;/gu, radix: 10 },
}, }),
} satisfies Record<string, Converter>; } satisfies Record<string, Converter>;
function convertCodePointToUnicode(codePoint: number): string { function convertCodePointToUnicode(codePoint: number): string {