This commit is contained in:
lionel-rowe 2025-04-06 18:42:00 -07:00 committed by GitHub
commit a39a04299e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 283 additions and 32 deletions

View file

@ -10,16 +10,18 @@ test.describe('Tool - Text to Unicode', () => {
});
test('Text to unicode conversion', async ({ page }) => {
await page.getByTestId('text-to-unicode-input').fill('it-tools');
await page.getByTestId('text-to-unicode-input').fill('"it-tools" 文字');
const unicode = await page.getByTestId('text-to-unicode-output').inputValue();
expect(unicode).toEqual('it-tools');
// eslint-disable-next-line unicorn/escape-case
expect(unicode).toEqual(String.raw`\u0022it-tools\u0022 \u6587\u5b57`);
});
test('Unicode to text conversion', async ({ page }) => {
await page.getByTestId('unicode-to-text-input').fill('it-tools');
// eslint-disable-next-line unicorn/escape-case
await page.getByTestId('unicode-to-text-input').fill(String.raw`\u0022it-tools\u0022 \u6587\u5b57`);
const text = await page.getByTestId('unicode-to-text-output').inputValue();
expect(text).toEqual('it-tools');
expect(text).toEqual('"it-tools" 文字');
});
});

View file

@ -1,7 +1,10 @@
import { describe, expect, it } from 'vitest';
import { convertTextToUnicode, convertUnicodeToText } from './text-to-unicode.service';
import { type ConverterId, SKIP_ASCII_HTML, SKIP_ASCII_JS, converters } from './text-to-unicode.service';
describe('text-to-unicode (legacy tests)', () => {
const convertTextToUnicode = (text: string) => converters.decimalEntities.escape(text, false);
const convertUnicodeToText = (escaped: string) => converters.decimalEntities.unescape(escaped);
describe('text-to-unicode', () => {
describe('convertTextToUnicode', () => {
it('a text string is converted to unicode representation', () => {
expect(convertTextToUnicode('A')).toBe('A');
@ -18,3 +21,100 @@ describe('text-to-unicode', () => {
});
});
});
const ALL_PRINTABLE_ASCII = ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~';
describe('text-to-unicode regexes', () => {
// eslint-disable-next-line prefer-regex-literals
const skipAsciiJs = new RegExp(String.raw`([[ -~]--['"\\]]+)`, 'gv');
// eslint-disable-next-line prefer-regex-literals
const skipAsciiHtml = new RegExp(String.raw`([[ -~]--[<>&'"]]+)`, 'gv');
it('regexes are equivalent to `v`-flag versions', () => {
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicodeSets
// regexes in `text-to-unicode.service.ts` can be replaced with `v`-flag versions once unicodeSets reaches
// sufficient in-browser support
expect(ALL_PRINTABLE_ASCII.match(skipAsciiJs)).toStrictEqual(ALL_PRINTABLE_ASCII.match(SKIP_ASCII_JS));
expect(ALL_PRINTABLE_ASCII.match(skipAsciiHtml)).toStrictEqual(ALL_PRINTABLE_ASCII.match(SKIP_ASCII_HTML));
});
});
describe('text-to-unicode', () => {
interface TestConfig {
text: string
results: Record<ConverterId, string>
skipAscii?: boolean
};
const tests: TestConfig[] = [
{
text: 'ABC',
results: {
fullUnicode: String.raw`\u0041\u0042\u0043`,
utf16: String.raw`\u0041\u0042\u0043`,
hexEntities: String.raw`&#x41;&#x42;&#x43;`,
decimalEntities: String.raw`&#65;&#66;&#67;`,
},
},
{
text: 'ABC',
skipAscii: true,
results: {
fullUnicode: 'ABC',
utf16: 'ABC',
hexEntities: 'ABC',
decimalEntities: 'ABC',
},
},
{
text: ALL_PRINTABLE_ASCII,
skipAscii: true,
results: {
// eslint-disable-next-line unicorn/escape-case
fullUnicode: String.raw` !\u0022#$%&\u0027()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\u005c]^_${'`'}abcdefghijklmnopqrstuvwxyz{|}~`,
// eslint-disable-next-line unicorn/escape-case
utf16: String.raw` !\u0022#$%&\u0027()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\u005c]^_${'`'}abcdefghijklmnopqrstuvwxyz{|}~`,
hexEntities: String.raw` !&#x22;#$%&#x26;&#x27;()*+,-./0123456789:;&#x3c;=&#x3e;?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_${'`'}abcdefghijklmnopqrstuvwxyz{|}~`,
decimalEntities: String.raw` !&#34;#$%&#38;&#39;()*+,-./0123456789:;&#60;=&#62;?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_${'`'}abcdefghijklmnopqrstuvwxyz{|}~`,
},
},
{
text: '文字',
results: {
// eslint-disable-next-line unicorn/escape-case
fullUnicode: String.raw`\u6587\u5b57`,
// eslint-disable-next-line unicorn/escape-case
utf16: String.raw`\u6587\u5b57`,
hexEntities: String.raw`&#x6587;&#x5b57;`,
decimalEntities: String.raw`&#25991;&#23383;`,
},
},
{
text: 'a 💩 b',
skipAscii: true,
results: {
// eslint-disable-next-line unicorn/escape-case
fullUnicode: String.raw`a \u{1f4a9} b`,
// eslint-disable-next-line unicorn/escape-case
utf16: String.raw`a \ud83d\udca9 b`,
hexEntities: String.raw`a &#x1f4a9; b`,
decimalEntities: String.raw`a &#128169; b`,
},
},
];
for (const { text, skipAscii, results } of tests) {
describe(`${text} (skipAscii=${skipAscii})`, () => {
for (const [key, result] of Object.entries(results)) {
describe(key, () => {
const converter = converters[key as ConverterId];
it('Escaping', () => {
expect(converter.escape(text, skipAscii ?? false)).toBe(result);
});
it('Unescaping', () => {
expect(converter.unescape(result)).toBe(text);
});
});
}
});
}
});

View file

@ -1,9 +1,86 @@
function convertTextToUnicode(text: string): string {
return text.split('').map(value => `&#${value.charCodeAt(0)};`).join('');
// regex that never matches
const SKIP_NOTHING_RE = /(\b\B)/;
export const SKIP_ASCII_JS = /([ -!#-&(-\[\]-~]+)/g;
export const SKIP_ASCII_HTML = /([ -!#-%(-;=?-~]+)/g;
function codeUnits(text: string): number[] {
return text.split('').map(char => char.codePointAt(0)!);
}
function convertUnicodeToText(unicodeStr: string): string {
return unicodeStr.replace(/&#(\d+);/g, (match, dec) => String.fromCharCode(dec));
function codePoints(text: string): number[] {
return [...text].map(char => char.codePointAt(0)!);
}
export { convertTextToUnicode, convertUnicodeToText };
interface ConverterConfig {
name: string
escape: {
charValues?(text: string): number[]
mapper(charValue: number): string
/** @prop regular expression for default content to skip. Must have exactly 1 capture group. */
asciiSkipper: RegExp
}
unescape: {
regex: RegExp
radix: number
}
}
class Converter {
constructor(public config: ConverterConfig) {}
escape(text: string, skipAscii: boolean): string {
const { asciiSkipper, charValues, mapper } = this.config.escape;
const getCharValues = charValues ?? codePoints;
return text
.split(skipAscii ? asciiSkipper : SKIP_NOTHING_RE)
.flatMap((x, i) => i % 2 ? x : getCharValues(x).map(mapper))
.join('');
}
unescape(escaped: string): string {
const { regex, radix } = this.config.unescape;
return escaped.replace(regex, (match) => {
return String.fromCodePoint(Number.parseInt(match.replace(/\P{AHex}/gu, ''), radix));
});
}
}
export type ConverterId = keyof typeof converters;
const converters = {
fullUnicode: new Converter({
name: 'Full Unicode',
escape: { mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS },
unescape: { regex: /\\u\p{AHex}{4}|\\u\{\p{AHex}{1,6}\}/gu, radix: 16 },
}),
utf16: new Converter({
name: 'UTF-16 Code Units',
escape: { charValues: codeUnits, mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS },
unescape: { regex: /\\u\p{AHex}{4}/gu, radix: 16 },
}),
hexEntities: new Converter({
name: 'HTML Entities (Hex)',
escape: { mapper: toHexEntities, asciiSkipper: SKIP_ASCII_HTML },
unescape: { regex: /&#x\p{AHex}{1,6};/gu, radix: 16 },
}),
decimalEntities: new Converter({
name: 'HTML Entities (Decimal)',
escape: { mapper: toDecimalEntities, asciiSkipper: SKIP_ASCII_HTML },
unescape: { regex: /&#\d+;/gu, radix: 10 },
}),
} satisfies Record<string, Converter>;
function convertCodePointToUnicode(codePoint: number): string {
const hex = codePoint.toString(16);
return hex.length > 4 ? String.raw`\u{${hex}}` : String.raw`\u${hex.padStart(4, '0')}`;
}
function toHexEntities(codePoint: number): string {
return `&#x${codePoint.toString(16)};`;
}
function toDecimalEntities(codePoint: number): string {
return `&#${codePoint};`;
}
export { converters };

View file

@ -1,34 +1,106 @@
<script setup lang="ts">
import { convertTextToUnicode, convertUnicodeToText } from './text-to-unicode.service';
import { type ConverterId, converters } from './text-to-unicode.service';
import { useCopy } from '@/composable/copy';
const converterId = ref<ConverterId>('fullUnicode');
const skipAscii = ref(true);
const inputText = ref('');
const unicodeFromText = computed(() => inputText.value.trim() === '' ? '' : convertTextToUnicode(inputText.value));
const unicodeFromText = computed(() =>
inputText.value.trim() === ''
? ''
: converters[converterId.value].escape(inputText.value, skipAscii.value),
);
const { copy: copyUnicode } = useCopy({ source: unicodeFromText });
const inputUnicode = ref('');
const textFromUnicode = computed(() => inputUnicode.value.trim() === '' ? '' : convertUnicodeToText(inputUnicode.value));
const textFromUnicode = computed(() =>
inputUnicode.value.trim() === '' ? '' : converters[converterId.value].unescape(inputUnicode.value),
);
const { copy: copyText } = useCopy({ source: textFromUnicode });
</script>
<template>
<c-card title="Text to Unicode">
<c-input-text v-model:value="inputText" multiline placeholder="e.g. 'Hello Avengers'" label="Enter text to convert to unicode" autosize autofocus raw-text test-id="text-to-unicode-input" />
<c-input-text v-model:value="unicodeFromText" label="Unicode from your text" multiline raw-text readonly mt-2 placeholder="The unicode representation of your text will be here" test-id="text-to-unicode-output" />
<div mt-2 flex justify-center>
<c-button :disabled="!unicodeFromText" @click="copyUnicode()">
Copy unicode to clipboard
</c-button>
<div class="outer" flex flex-col gap-6>
<div class="controls">
<c-select
v-model:value="converterId"
searchable
label="Conversion type:"
:options="Object.entries(converters).map(([key, val]) => ({ label: val.config.name, value: key }))"
/>
</div>
</c-card>
<c-card title="Unicode to Text">
<c-input-text v-model:value="inputUnicode" multiline placeholder="Input Unicode" label="Enter unicode to convert to text" autosize raw-text test-id="unicode-to-text-input" />
<c-input-text v-model:value="textFromUnicode" label="Text from your Unicode" multiline raw-text readonly mt-2 placeholder="The text representation of your unicode will be here" test-id="unicode-to-text-output" />
<div mt-2 flex justify-center>
<c-button :disabled="!textFromUnicode" @click="copyText()">
Copy text to clipboard
</c-button>
</div>
</c-card>
<c-card class="card" title="Text to Unicode">
<c-input-text
v-model:value="inputText"
multiline
placeholder="e.g. 'Hello Avengers'"
label="Enter text to convert to Unicode"
autosize
autofocus
raw-text
test-id="text-to-unicode-input"
/>
<c-input-text
v-model:value="unicodeFromText"
label="Unicode from your text"
multiline
raw-text
readonly
mt-2
placeholder="The unicode representation of your text will be here"
test-id="text-to-unicode-output"
/>
<div mt-2 flex justify-start>
<n-form-item label="Skip ASCII chars with no special meaning?" :show-feedback="false" label-placement="left">
<n-switch v-model:value="skipAscii" />
</n-form-item>
</div>
<div mt-2 flex justify-center>
<c-button :disabled="!unicodeFromText" @click="copyUnicode()"> Copy unicode to clipboard </c-button>
</div>
</c-card>
<c-card class="card" title="Unicode to Text">
<c-input-text
v-model:value="inputUnicode"
multiline
placeholder="Input Unicode"
label="Enter unicode to convert to text"
autosize
raw-text
test-id="unicode-to-text-input"
/>
<c-input-text
v-model:value="textFromUnicode"
label="Text from your Unicode"
multiline
raw-text
readonly
mt-2
placeholder="The text representation of your unicode will be here"
test-id="unicode-to-text-output"
/>
<div mt-2 flex justify-center>
<c-button :disabled="!textFromUnicode" @click="copyText()"> Copy text to clipboard </c-button>
</div>
</c-card>
</div>
</template>
<style lang="less" scoped>
.outer {
flex: 0 1 1200px;
margin-inline: 50px;
display: flex;
flex-direction: row;
flex-wrap: wrap;
}
.controls {
flex: 0 1 100%;
}
.card {
flex: 1 0 max(40%, 500px);
}
</style>