mirror of
https://github.com/CorentinTh/it-tools.git
synced 2025-05-04 13:29:13 -04:00
Convert Converter to class
This commit is contained in:
parent
1dc965d9a8
commit
d75473b2e8
2 changed files with 40 additions and 47 deletions
|
@ -3,7 +3,7 @@ import { type ConverterId, SKIP_ASCII_HTML, SKIP_ASCII_JS, converters } from './
|
|||
|
||||
describe('text-to-unicode (legacy tests)', () => {
|
||||
const convertTextToUnicode = (text: string) => converters.decimalEntities.escape(text, false);
|
||||
const convertUnicodeToText = converters.decimalEntities.unescape;
|
||||
const convertUnicodeToText = (escaped: string) => converters.decimalEntities.unescape(escaped);
|
||||
|
||||
describe('convertTextToUnicode', () => {
|
||||
it('a text string is converted to unicode representation', () => {
|
||||
|
|
|
@ -11,70 +11,63 @@ function codePoints(text: string): number[] {
|
|||
return [...text].map(char => char.codePointAt(0));
|
||||
}
|
||||
|
||||
export interface Converter {
|
||||
interface ConverterConfig {
|
||||
name: string
|
||||
escape(text: string, skipAscii: boolean): string
|
||||
unescape(text: string): string
|
||||
};
|
||||
escape: {
|
||||
charValues?(text: string): number[]
|
||||
mapper(charValue: number): string
|
||||
/** @prop regular expression for default content to skip. Must have exactly 1 capture group. */
|
||||
asciiSkipper: RegExp
|
||||
}
|
||||
unescape: {
|
||||
regex: RegExp
|
||||
radix: number
|
||||
}
|
||||
}
|
||||
class Converter {
|
||||
constructor(public config: ConverterConfig) {}
|
||||
|
||||
interface EscapeConfig {
|
||||
charValues?(text: string): number[]
|
||||
mapper(charValue: number): string
|
||||
/** @prop regular expression for default content to skip. Must have exactly 1 capture group. */
|
||||
asciiSkipper: RegExp
|
||||
};
|
||||
|
||||
function escaper({ charValues: getCharValues, mapper, asciiSkipper: skipper }: EscapeConfig) {
|
||||
return (text: string, skip: boolean): string => {
|
||||
getCharValues ??= codePoints;
|
||||
escape(text: string, skipAscii: boolean): string {
|
||||
const { asciiSkipper, charValues, mapper } = this.config.escape;
|
||||
const getCharValues = charValues ?? codePoints;
|
||||
|
||||
return text
|
||||
.split(skip ? skipper : SKIP_NOTHING_RE)
|
||||
.flatMap((x, i) => {
|
||||
if (i % 2) {
|
||||
return x;
|
||||
}
|
||||
return getCharValues(x).map(mapper);
|
||||
})
|
||||
.split(skipAscii ? asciiSkipper : SKIP_NOTHING_RE)
|
||||
.flatMap((x, i) => i % 2 ? x : getCharValues(x).map(mapper))
|
||||
.join('');
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
interface UnescapeConfig {
|
||||
regex: RegExp
|
||||
radix: number
|
||||
};
|
||||
unescape(escaped: string): string {
|
||||
const { regex, radix } = this.config.unescape;
|
||||
|
||||
function unescaper({ regex, radix }: UnescapeConfig) {
|
||||
return (escaped: string): string => {
|
||||
return escaped.replace(regex, (match) => {
|
||||
return String.fromCodePoint(Number.parseInt(match.replace(/\P{AHex}/gu, ''), radix));
|
||||
});
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
export type ConverterId = keyof typeof converters;
|
||||
const converters = {
|
||||
fullUnicode: {
|
||||
fullUnicode: new Converter({
|
||||
name: 'Full Unicode',
|
||||
escape: escaper({ mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }),
|
||||
unescape: unescaper({ regex: /\\u\p{AHex}{4}|\\u\{\p{AHex}{1,6}\}/gu, radix: 16 }),
|
||||
},
|
||||
utf16: {
|
||||
escape: { mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS },
|
||||
unescape: { regex: /\\u\p{AHex}{4}|\\u\{\p{AHex}{1,6}\}/gu, radix: 16 },
|
||||
}),
|
||||
utf16: new Converter({
|
||||
name: 'UTF-16 Code Units',
|
||||
escape: escaper({ charValues: codeUnits, mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }),
|
||||
unescape: unescaper({ regex: /\\u\p{AHex}{4}/gu, radix: 16 }),
|
||||
},
|
||||
hexEntities: {
|
||||
escape: { charValues: codeUnits, mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS },
|
||||
unescape: { regex: /\\u\p{AHex}{4}/gu, radix: 16 },
|
||||
}),
|
||||
hexEntities: new Converter({
|
||||
name: 'HTML Entities (Hex)',
|
||||
escape: escaper({ mapper: toHexEntities, asciiSkipper: SKIP_ASCII_HTML }),
|
||||
unescape: unescaper({ regex: /&#x\p{AHex}{1,6};/gu, radix: 16 }),
|
||||
},
|
||||
decimalEntities: {
|
||||
escape: { mapper: toHexEntities, asciiSkipper: SKIP_ASCII_HTML },
|
||||
unescape: { regex: /&#x\p{AHex}{1,6};/gu, radix: 16 },
|
||||
}),
|
||||
decimalEntities: new Converter({
|
||||
name: 'HTML Entities (Decimal)',
|
||||
escape: escaper({ mapper: toDecimalEntities, asciiSkipper: SKIP_ASCII_HTML }),
|
||||
unescape: unescaper({ regex: /&#\d+;/gu, radix: 10 }),
|
||||
},
|
||||
escape: { mapper: toDecimalEntities, asciiSkipper: SKIP_ASCII_HTML },
|
||||
unescape: { regex: /&#\d+;/gu, radix: 10 },
|
||||
}),
|
||||
} satisfies Record<string, Converter>;
|
||||
|
||||
function convertCodePointToUnicode(codePoint: number): string {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue