mirror of
https://github.com/CorentinTh/it-tools.git
synced 2025-05-04 21:37:11 -04:00
Convert Converter to class
This commit is contained in:
parent
1dc965d9a8
commit
d75473b2e8
2 changed files with 40 additions and 47 deletions
|
@ -3,7 +3,7 @@ import { type ConverterId, SKIP_ASCII_HTML, SKIP_ASCII_JS, converters } from './
|
||||||
|
|
||||||
describe('text-to-unicode (legacy tests)', () => {
|
describe('text-to-unicode (legacy tests)', () => {
|
||||||
const convertTextToUnicode = (text: string) => converters.decimalEntities.escape(text, false);
|
const convertTextToUnicode = (text: string) => converters.decimalEntities.escape(text, false);
|
||||||
const convertUnicodeToText = converters.decimalEntities.unescape;
|
const convertUnicodeToText = (escaped: string) => converters.decimalEntities.unescape(escaped);
|
||||||
|
|
||||||
describe('convertTextToUnicode', () => {
|
describe('convertTextToUnicode', () => {
|
||||||
it('a text string is converted to unicode representation', () => {
|
it('a text string is converted to unicode representation', () => {
|
||||||
|
|
|
@ -11,70 +11,63 @@ function codePoints(text: string): number[] {
|
||||||
return [...text].map(char => char.codePointAt(0));
|
return [...text].map(char => char.codePointAt(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface Converter {
|
interface ConverterConfig {
|
||||||
name: string
|
name: string
|
||||||
escape(text: string, skipAscii: boolean): string
|
escape: {
|
||||||
unescape(text: string): string
|
charValues?(text: string): number[]
|
||||||
};
|
mapper(charValue: number): string
|
||||||
|
/** @prop regular expression for default content to skip. Must have exactly 1 capture group. */
|
||||||
|
asciiSkipper: RegExp
|
||||||
|
}
|
||||||
|
unescape: {
|
||||||
|
regex: RegExp
|
||||||
|
radix: number
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class Converter {
|
||||||
|
constructor(public config: ConverterConfig) {}
|
||||||
|
|
||||||
interface EscapeConfig {
|
escape(text: string, skipAscii: boolean): string {
|
||||||
charValues?(text: string): number[]
|
const { asciiSkipper, charValues, mapper } = this.config.escape;
|
||||||
mapper(charValue: number): string
|
const getCharValues = charValues ?? codePoints;
|
||||||
/** @prop regular expression for default content to skip. Must have exactly 1 capture group. */
|
|
||||||
asciiSkipper: RegExp
|
|
||||||
};
|
|
||||||
|
|
||||||
function escaper({ charValues: getCharValues, mapper, asciiSkipper: skipper }: EscapeConfig) {
|
|
||||||
return (text: string, skip: boolean): string => {
|
|
||||||
getCharValues ??= codePoints;
|
|
||||||
|
|
||||||
return text
|
return text
|
||||||
.split(skip ? skipper : SKIP_NOTHING_RE)
|
.split(skipAscii ? asciiSkipper : SKIP_NOTHING_RE)
|
||||||
.flatMap((x, i) => {
|
.flatMap((x, i) => i % 2 ? x : getCharValues(x).map(mapper))
|
||||||
if (i % 2) {
|
|
||||||
return x;
|
|
||||||
}
|
|
||||||
return getCharValues(x).map(mapper);
|
|
||||||
})
|
|
||||||
.join('');
|
.join('');
|
||||||
};
|
}
|
||||||
}
|
|
||||||
|
|
||||||
interface UnescapeConfig {
|
unescape(escaped: string): string {
|
||||||
regex: RegExp
|
const { regex, radix } = this.config.unescape;
|
||||||
radix: number
|
|
||||||
};
|
|
||||||
|
|
||||||
function unescaper({ regex, radix }: UnescapeConfig) {
|
|
||||||
return (escaped: string): string => {
|
|
||||||
return escaped.replace(regex, (match) => {
|
return escaped.replace(regex, (match) => {
|
||||||
return String.fromCodePoint(Number.parseInt(match.replace(/\P{AHex}/gu, ''), radix));
|
return String.fromCodePoint(Number.parseInt(match.replace(/\P{AHex}/gu, ''), radix));
|
||||||
});
|
});
|
||||||
};
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export type ConverterId = keyof typeof converters;
|
export type ConverterId = keyof typeof converters;
|
||||||
const converters = {
|
const converters = {
|
||||||
fullUnicode: {
|
fullUnicode: new Converter({
|
||||||
name: 'Full Unicode',
|
name: 'Full Unicode',
|
||||||
escape: escaper({ mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }),
|
escape: { mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS },
|
||||||
unescape: unescaper({ regex: /\\u\p{AHex}{4}|\\u\{\p{AHex}{1,6}\}/gu, radix: 16 }),
|
unescape: { regex: /\\u\p{AHex}{4}|\\u\{\p{AHex}{1,6}\}/gu, radix: 16 },
|
||||||
},
|
}),
|
||||||
utf16: {
|
utf16: new Converter({
|
||||||
name: 'UTF-16 Code Units',
|
name: 'UTF-16 Code Units',
|
||||||
escape: escaper({ charValues: codeUnits, mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }),
|
escape: { charValues: codeUnits, mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS },
|
||||||
unescape: unescaper({ regex: /\\u\p{AHex}{4}/gu, radix: 16 }),
|
unescape: { regex: /\\u\p{AHex}{4}/gu, radix: 16 },
|
||||||
},
|
}),
|
||||||
hexEntities: {
|
hexEntities: new Converter({
|
||||||
name: 'HTML Entities (Hex)',
|
name: 'HTML Entities (Hex)',
|
||||||
escape: escaper({ mapper: toHexEntities, asciiSkipper: SKIP_ASCII_HTML }),
|
escape: { mapper: toHexEntities, asciiSkipper: SKIP_ASCII_HTML },
|
||||||
unescape: unescaper({ regex: /&#x\p{AHex}{1,6};/gu, radix: 16 }),
|
unescape: { regex: /&#x\p{AHex}{1,6};/gu, radix: 16 },
|
||||||
},
|
}),
|
||||||
decimalEntities: {
|
decimalEntities: new Converter({
|
||||||
name: 'HTML Entities (Decimal)',
|
name: 'HTML Entities (Decimal)',
|
||||||
escape: escaper({ mapper: toDecimalEntities, asciiSkipper: SKIP_ASCII_HTML }),
|
escape: { mapper: toDecimalEntities, asciiSkipper: SKIP_ASCII_HTML },
|
||||||
unescape: unescaper({ regex: /&#\d+;/gu, radix: 10 }),
|
unescape: { regex: /&#\d+;/gu, radix: 10 },
|
||||||
},
|
}),
|
||||||
} satisfies Record<string, Converter>;
|
} satisfies Record<string, Converter>;
|
||||||
|
|
||||||
function convertCodePointToUnicode(codePoint: number): string {
|
function convertCodePointToUnicode(codePoint: number): string {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue