mirror of
https://github.com/CorentinTh/it-tools.git
synced 2025-05-05 05:47:10 -04:00
Always escape ASCII chars with special meaning
This commit is contained in:
parent
b0ae8d7b3d
commit
1dc965d9a8
4 changed files with 57 additions and 28 deletions
|
@ -10,16 +10,18 @@ test.describe('Tool - Text to Unicode', () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
test('Text to unicode conversion', async ({ page }) => {
|
test('Text to unicode conversion', async ({ page }) => {
|
||||||
await page.getByTestId('text-to-unicode-input').fill('it-tools');
|
await page.getByTestId('text-to-unicode-input').fill('"it-tools" 文字');
|
||||||
const unicode = await page.getByTestId('text-to-unicode-output').inputValue();
|
const unicode = await page.getByTestId('text-to-unicode-output').inputValue();
|
||||||
|
|
||||||
expect(unicode).toEqual('it-tools');
|
// eslint-disable-next-line unicorn/escape-case
|
||||||
|
expect(unicode).toEqual(String.raw`\u0022it-tools\u0022 \u6587\u5b57`);
|
||||||
});
|
});
|
||||||
|
|
||||||
test('Unicode to text conversion', async ({ page }) => {
|
test('Unicode to text conversion', async ({ page }) => {
|
||||||
await page.getByTestId('unicode-to-text-input').fill('it-tools');
|
// eslint-disable-next-line unicorn/escape-case
|
||||||
|
await page.getByTestId('unicode-to-text-input').fill(String.raw`\u0022it-tools\u0022 \u6587\u5b57`);
|
||||||
const text = await page.getByTestId('unicode-to-text-output').inputValue();
|
const text = await page.getByTestId('unicode-to-text-output').inputValue();
|
||||||
|
|
||||||
expect(text).toEqual('it-tools');
|
expect(text).toEqual('"it-tools" 文字');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
import { describe, expect, it } from 'vitest';
|
import { describe, expect, it } from 'vitest';
|
||||||
import { type ConverterId, SKIP_PRINTABLE_ASCII_RE, converters } from './text-to-unicode.service';
|
import { type ConverterId, SKIP_ASCII_HTML, SKIP_ASCII_JS, converters } from './text-to-unicode.service';
|
||||||
|
|
||||||
describe('text-to-unicode (legacy tests)', () => {
|
describe('text-to-unicode (legacy tests)', () => {
|
||||||
const convertTextToUnicode = converters.decimalEntities.escape;
|
const convertTextToUnicode = (text: string) => converters.decimalEntities.escape(text, false);
|
||||||
const convertUnicodeToText = converters.decimalEntities.unescape;
|
const convertUnicodeToText = converters.decimalEntities.unescape;
|
||||||
|
|
||||||
describe('convertTextToUnicode', () => {
|
describe('convertTextToUnicode', () => {
|
||||||
|
@ -22,6 +22,23 @@ describe('text-to-unicode (legacy tests)', () => {
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const ALL_PRINTABLE_ASCII = ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~';
|
||||||
|
|
||||||
|
describe('text-to-unicode regexes', () => {
|
||||||
|
// eslint-disable-next-line prefer-regex-literals
|
||||||
|
const skipAsciiJs = new RegExp(String.raw`([[ -~]--['"\\]]+)`, 'gv');
|
||||||
|
// eslint-disable-next-line prefer-regex-literals
|
||||||
|
const skipAsciiHtml = new RegExp(String.raw`([[ -~]--[<>&'"]]+)`, 'gv');
|
||||||
|
|
||||||
|
it('regexes are equivalent to `v`-flag versions', () => {
|
||||||
|
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicodeSets
|
||||||
|
// regexes in `text-to-unicode.service.ts` can be replaced with `v`-flag versions once unicodeSets reaches
|
||||||
|
// sufficient in-browser support
|
||||||
|
expect(ALL_PRINTABLE_ASCII.match(skipAsciiJs)).toStrictEqual(ALL_PRINTABLE_ASCII.match(SKIP_ASCII_JS));
|
||||||
|
expect(ALL_PRINTABLE_ASCII.match(skipAsciiHtml)).toStrictEqual(ALL_PRINTABLE_ASCII.match(SKIP_ASCII_HTML));
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe('text-to-unicode', () => {
|
describe('text-to-unicode', () => {
|
||||||
interface TestConfig {
|
interface TestConfig {
|
||||||
text: string
|
text: string
|
||||||
|
@ -48,6 +65,18 @@ describe('text-to-unicode', () => {
|
||||||
decimalEntities: 'ABC',
|
decimalEntities: 'ABC',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
text: ALL_PRINTABLE_ASCII,
|
||||||
|
skipPrintableAscii: true,
|
||||||
|
results: {
|
||||||
|
// eslint-disable-next-line unicorn/escape-case
|
||||||
|
fullUnicode: String.raw` !\u0022#$%&\u0027()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\u005c]^_${'`'}abcdefghijklmnopqrstuvwxyz{|}~`,
|
||||||
|
// eslint-disable-next-line unicorn/escape-case
|
||||||
|
utf16: String.raw` !\u0022#$%&\u0027()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\u005c]^_${'`'}abcdefghijklmnopqrstuvwxyz{|}~`,
|
||||||
|
hexEntities: String.raw` !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_${'`'}abcdefghijklmnopqrstuvwxyz{|}~`,
|
||||||
|
decimalEntities: String.raw` !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_${'`'}abcdefghijklmnopqrstuvwxyz{|}~`,
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
text: '文字',
|
text: '文字',
|
||||||
results: {
|
results: {
|
||||||
|
@ -79,7 +108,7 @@ describe('text-to-unicode', () => {
|
||||||
describe(key, () => {
|
describe(key, () => {
|
||||||
const converter = converters[key as ConverterId];
|
const converter = converters[key as ConverterId];
|
||||||
it('Escaping', () => {
|
it('Escaping', () => {
|
||||||
expect(converter.escape(text, skipAscii ? SKIP_PRINTABLE_ASCII_RE : undefined)).toBe(result);
|
expect(converter.escape(text, skipAscii)).toBe(result);
|
||||||
});
|
});
|
||||||
it('Unescaping', () => {
|
it('Unescaping', () => {
|
||||||
expect(converter.unescape(result)).toBe(text);
|
expect(converter.unescape(result)).toBe(text);
|
||||||
|
|
|
@ -1,37 +1,35 @@
|
||||||
// regex that never matches
|
// regex that never matches
|
||||||
const SKIP_NOTHING_RE = /(\b\B)/;
|
const SKIP_NOTHING_RE = /(\b\B)/;
|
||||||
export const SKIP_PRINTABLE_ASCII_RE = /([ -~]+)/g;
|
export const SKIP_ASCII_JS = /([ -!#-&(-\[\]-~]+)/g;
|
||||||
|
export const SKIP_ASCII_HTML = /([ -!#-%(-;=?-~]+)/g;
|
||||||
|
|
||||||
function _codeUnits(text: string): number[] {
|
function codeUnits(text: string): number[] {
|
||||||
return text.split('').map(char => char.codePointAt(0));
|
return text.split('').map(char => char.codePointAt(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
function _codePoints(text: string): number[] {
|
function codePoints(text: string): number[] {
|
||||||
return [...text].map(char => char.codePointAt(0));
|
return [...text].map(char => char.codePointAt(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface Converter {
|
export interface Converter {
|
||||||
name: string
|
name: string
|
||||||
escape(text: string, skip: RegExp): string
|
escape(text: string, skipAscii: boolean): string
|
||||||
unescape(text: string): string
|
unescape(text: string): string
|
||||||
};
|
};
|
||||||
|
|
||||||
interface EscapeConfig {
|
interface EscapeConfig {
|
||||||
getCharValues?(text: string): number[]
|
charValues?(text: string): number[]
|
||||||
mapper(charValue: number): string
|
mapper(charValue: number): string
|
||||||
|
/** @prop regular expression for default content to skip. Must have exactly 1 capture group. */
|
||||||
|
asciiSkipper: RegExp
|
||||||
};
|
};
|
||||||
|
|
||||||
function escaper({ getCharValues, mapper }: EscapeConfig) {
|
function escaper({ charValues: getCharValues, mapper, asciiSkipper: skipper }: EscapeConfig) {
|
||||||
/**
|
return (text: string, skip: boolean): string => {
|
||||||
* @param text text input to escape
|
getCharValues ??= codePoints;
|
||||||
* @param skipper regular expression for content _not_ to escape. Must have exactly 1 capture group.
|
|
||||||
*/
|
|
||||||
return (text: string, skipper?: RegExp): string => {
|
|
||||||
skipper ??= SKIP_NOTHING_RE;
|
|
||||||
getCharValues ??= _codePoints;
|
|
||||||
|
|
||||||
return text
|
return text
|
||||||
.split(skipper)
|
.split(skip ? skipper : SKIP_NOTHING_RE)
|
||||||
.flatMap((x, i) => {
|
.flatMap((x, i) => {
|
||||||
if (i % 2) {
|
if (i % 2) {
|
||||||
return x;
|
return x;
|
||||||
|
@ -59,22 +57,22 @@ export type ConverterId = keyof typeof converters;
|
||||||
const converters = {
|
const converters = {
|
||||||
fullUnicode: {
|
fullUnicode: {
|
||||||
name: 'Full Unicode',
|
name: 'Full Unicode',
|
||||||
escape: escaper({ mapper: convertCodePointToUnicode }),
|
escape: escaper({ mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }),
|
||||||
unescape: unescaper({ regex: /\\u\p{AHex}{4}|\\u\{\p{AHex}{1,6}\}/gu, radix: 16 }),
|
unescape: unescaper({ regex: /\\u\p{AHex}{4}|\\u\{\p{AHex}{1,6}\}/gu, radix: 16 }),
|
||||||
},
|
},
|
||||||
utf16: {
|
utf16: {
|
||||||
name: 'UTF-16 Code Units',
|
name: 'UTF-16 Code Units',
|
||||||
escape: escaper({ getCharValues: _codeUnits, mapper: convertCodePointToUnicode }),
|
escape: escaper({ charValues: codeUnits, mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }),
|
||||||
unescape: unescaper({ regex: /\\u\p{AHex}{4}/gu, radix: 16 }),
|
unescape: unescaper({ regex: /\\u\p{AHex}{4}/gu, radix: 16 }),
|
||||||
},
|
},
|
||||||
hexEntities: {
|
hexEntities: {
|
||||||
name: 'HTML Entities (Hex)',
|
name: 'HTML Entities (Hex)',
|
||||||
escape: escaper({ mapper: toHexEntities }),
|
escape: escaper({ mapper: toHexEntities, asciiSkipper: SKIP_ASCII_HTML }),
|
||||||
unescape: unescaper({ regex: /&#x\p{AHex}{1,6};/gu, radix: 16 }),
|
unescape: unescaper({ regex: /&#x\p{AHex}{1,6};/gu, radix: 16 }),
|
||||||
},
|
},
|
||||||
decimalEntities: {
|
decimalEntities: {
|
||||||
name: 'HTML Entities (Decimal)',
|
name: 'HTML Entities (Decimal)',
|
||||||
escape: escaper({ mapper: toDecimalEntities }),
|
escape: escaper({ mapper: toDecimalEntities, asciiSkipper: SKIP_ASCII_HTML }),
|
||||||
unescape: unescaper({ regex: /&#\d+;/gu, radix: 10 }),
|
unescape: unescaper({ regex: /&#\d+;/gu, radix: 10 }),
|
||||||
},
|
},
|
||||||
} satisfies Record<string, Converter>;
|
} satisfies Record<string, Converter>;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
<script setup lang="ts">
|
<script setup lang="ts">
|
||||||
import { type ConverterId, SKIP_PRINTABLE_ASCII_RE, converters } from './text-to-unicode.service';
|
import { type ConverterId, converters } from './text-to-unicode.service';
|
||||||
import { useCopy } from '@/composable/copy';
|
import { useCopy } from '@/composable/copy';
|
||||||
|
|
||||||
const converterId = ref<ConverterId>('fullUnicode');
|
const converterId = ref<ConverterId>('fullUnicode');
|
||||||
|
@ -9,7 +9,7 @@ const inputText = ref('');
|
||||||
const unicodeFromText = computed(() =>
|
const unicodeFromText = computed(() =>
|
||||||
inputText.value.trim() === ''
|
inputText.value.trim() === ''
|
||||||
? ''
|
? ''
|
||||||
: converters[converterId.value].escape(inputText.value, skipAscii.value ? SKIP_PRINTABLE_ASCII_RE : undefined),
|
: converters[converterId.value].escape(inputText.value, skipAscii.value),
|
||||||
);
|
);
|
||||||
const { copy: copyUnicode } = useCopy({ source: unicodeFromText });
|
const { copy: copyUnicode } = useCopy({ source: unicodeFromText });
|
||||||
|
|
||||||
|
@ -52,7 +52,7 @@ const { copy: copyText } = useCopy({ source: textFromUnicode });
|
||||||
test-id="text-to-unicode-output"
|
test-id="text-to-unicode-output"
|
||||||
/>
|
/>
|
||||||
<div mt-2 flex justify-start>
|
<div mt-2 flex justify-start>
|
||||||
<n-form-item label="Skip ASCII?" :show-feedback="false" label-placement="left">
|
<n-form-item label="Skip ASCII chars with no special meaning?" :show-feedback="false" label-placement="left">
|
||||||
<n-switch v-model:value="skipAscii" />
|
<n-switch v-model:value="skipAscii" />
|
||||||
</n-form-item>
|
</n-form-item>
|
||||||
</div>
|
</div>
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue