mirror of
https://github.com/CorentinTh/it-tools.git
synced 2025-05-05 05:47:10 -04:00
fix(text-to-unicode): handle non-BMP + more conversion options
This commit is contained in:
parent
e876d03608
commit
b0ae8d7b3d
3 changed files with 257 additions and 28 deletions
|
@ -1,7 +1,10 @@
|
||||||
import { describe, expect, it } from 'vitest';
|
import { describe, expect, it } from 'vitest';
|
||||||
import { convertTextToUnicode, convertUnicodeToText } from './text-to-unicode.service';
|
import { type ConverterId, SKIP_PRINTABLE_ASCII_RE, converters } from './text-to-unicode.service';
|
||||||
|
|
||||||
|
describe('text-to-unicode (legacy tests)', () => {
|
||||||
|
const convertTextToUnicode = converters.decimalEntities.escape;
|
||||||
|
const convertUnicodeToText = converters.decimalEntities.unescape;
|
||||||
|
|
||||||
describe('text-to-unicode', () => {
|
|
||||||
describe('convertTextToUnicode', () => {
|
describe('convertTextToUnicode', () => {
|
||||||
it('a text string is converted to unicode representation', () => {
|
it('a text string is converted to unicode representation', () => {
|
||||||
expect(convertTextToUnicode('A')).toBe('A');
|
expect(convertTextToUnicode('A')).toBe('A');
|
||||||
|
@ -18,3 +21,71 @@ describe('text-to-unicode', () => {
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('text-to-unicode', () => {
|
||||||
|
interface TestConfig {
|
||||||
|
text: string
|
||||||
|
results: Record<ConverterId, string>
|
||||||
|
skipPrintableAscii?: boolean
|
||||||
|
};
|
||||||
|
const tests: TestConfig[] = [
|
||||||
|
{
|
||||||
|
text: 'ABC',
|
||||||
|
results: {
|
||||||
|
fullUnicode: String.raw`\u0041\u0042\u0043`,
|
||||||
|
utf16: String.raw`\u0041\u0042\u0043`,
|
||||||
|
hexEntities: String.raw`ABC`,
|
||||||
|
decimalEntities: String.raw`ABC`,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
text: 'ABC',
|
||||||
|
skipPrintableAscii: true,
|
||||||
|
results: {
|
||||||
|
fullUnicode: 'ABC',
|
||||||
|
utf16: 'ABC',
|
||||||
|
hexEntities: 'ABC',
|
||||||
|
decimalEntities: 'ABC',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
text: '文字',
|
||||||
|
results: {
|
||||||
|
// eslint-disable-next-line unicorn/escape-case
|
||||||
|
fullUnicode: String.raw`\u6587\u5b57`,
|
||||||
|
// eslint-disable-next-line unicorn/escape-case
|
||||||
|
utf16: String.raw`\u6587\u5b57`,
|
||||||
|
hexEntities: String.raw`文字`,
|
||||||
|
decimalEntities: String.raw`文字`,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
text: 'a 💩 b',
|
||||||
|
skipPrintableAscii: true,
|
||||||
|
results: {
|
||||||
|
// eslint-disable-next-line unicorn/escape-case
|
||||||
|
fullUnicode: String.raw`a \u{1f4a9} b`,
|
||||||
|
// eslint-disable-next-line unicorn/escape-case
|
||||||
|
utf16: String.raw`a \ud83d\udca9 b`,
|
||||||
|
hexEntities: String.raw`a 💩 b`,
|
||||||
|
decimalEntities: String.raw`a 💩 b`,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const { text, skipPrintableAscii: skipAscii, results } of tests) {
|
||||||
|
describe(`${text} (skipAscii=${skipAscii})`, () => {
|
||||||
|
for (const [key, result] of Object.entries(results)) {
|
||||||
|
describe(key, () => {
|
||||||
|
const converter = converters[key as ConverterId];
|
||||||
|
it('Escaping', () => {
|
||||||
|
expect(converter.escape(text, skipAscii ? SKIP_PRINTABLE_ASCII_RE : undefined)).toBe(result);
|
||||||
|
});
|
||||||
|
it('Unescaping', () => {
|
||||||
|
expect(converter.unescape(result)).toBe(text);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
|
@ -1,9 +1,95 @@
|
||||||
function convertTextToUnicode(text: string): string {
|
// regex that never matches
|
||||||
return text.split('').map(value => `&#${value.charCodeAt(0)};`).join('');
|
const SKIP_NOTHING_RE = /(\b\B)/;
|
||||||
|
export const SKIP_PRINTABLE_ASCII_RE = /([ -~]+)/g;
|
||||||
|
|
||||||
|
function _codeUnits(text: string): number[] {
|
||||||
|
return text.split('').map(char => char.codePointAt(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
function convertUnicodeToText(unicodeStr: string): string {
|
function _codePoints(text: string): number[] {
|
||||||
return unicodeStr.replace(/&#(\d+);/g, (match, dec) => String.fromCharCode(dec));
|
return [...text].map(char => char.codePointAt(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
export { convertTextToUnicode, convertUnicodeToText };
|
export interface Converter {
|
||||||
|
name: string
|
||||||
|
escape(text: string, skip: RegExp): string
|
||||||
|
unescape(text: string): string
|
||||||
|
};
|
||||||
|
|
||||||
|
interface EscapeConfig {
|
||||||
|
getCharValues?(text: string): number[]
|
||||||
|
mapper(charValue: number): string
|
||||||
|
};
|
||||||
|
|
||||||
|
function escaper({ getCharValues, mapper }: EscapeConfig) {
|
||||||
|
/**
|
||||||
|
* @param text text input to escape
|
||||||
|
* @param skipper regular expression for content _not_ to escape. Must have exactly 1 capture group.
|
||||||
|
*/
|
||||||
|
return (text: string, skipper?: RegExp): string => {
|
||||||
|
skipper ??= SKIP_NOTHING_RE;
|
||||||
|
getCharValues ??= _codePoints;
|
||||||
|
|
||||||
|
return text
|
||||||
|
.split(skipper)
|
||||||
|
.flatMap((x, i) => {
|
||||||
|
if (i % 2) {
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
return getCharValues(x).map(mapper);
|
||||||
|
})
|
||||||
|
.join('');
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
interface UnescapeConfig {
|
||||||
|
regex: RegExp
|
||||||
|
radix: number
|
||||||
|
};
|
||||||
|
|
||||||
|
function unescaper({ regex, radix }: UnescapeConfig) {
|
||||||
|
return (escaped: string): string => {
|
||||||
|
return escaped.replace(regex, (match) => {
|
||||||
|
return String.fromCodePoint(Number.parseInt(match.replace(/\P{AHex}/gu, ''), radix));
|
||||||
|
});
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export type ConverterId = keyof typeof converters;
|
||||||
|
const converters = {
|
||||||
|
fullUnicode: {
|
||||||
|
name: 'Full Unicode',
|
||||||
|
escape: escaper({ mapper: convertCodePointToUnicode }),
|
||||||
|
unescape: unescaper({ regex: /\\u\p{AHex}{4}|\\u\{\p{AHex}{1,6}\}/gu, radix: 16 }),
|
||||||
|
},
|
||||||
|
utf16: {
|
||||||
|
name: 'UTF-16 Code Units',
|
||||||
|
escape: escaper({ getCharValues: _codeUnits, mapper: convertCodePointToUnicode }),
|
||||||
|
unescape: unescaper({ regex: /\\u\p{AHex}{4}/gu, radix: 16 }),
|
||||||
|
},
|
||||||
|
hexEntities: {
|
||||||
|
name: 'HTML Entities (Hex)',
|
||||||
|
escape: escaper({ mapper: toHexEntities }),
|
||||||
|
unescape: unescaper({ regex: /&#x\p{AHex}{1,6};/gu, radix: 16 }),
|
||||||
|
},
|
||||||
|
decimalEntities: {
|
||||||
|
name: 'HTML Entities (Decimal)',
|
||||||
|
escape: escaper({ mapper: toDecimalEntities }),
|
||||||
|
unescape: unescaper({ regex: /&#\d+;/gu, radix: 10 }),
|
||||||
|
},
|
||||||
|
} satisfies Record<string, Converter>;
|
||||||
|
|
||||||
|
function convertCodePointToUnicode(codePoint: number): string {
|
||||||
|
const hex = codePoint.toString(16);
|
||||||
|
return hex.length > 4 ? String.raw`\u{${hex}}` : String.raw`\u${hex.padStart(4, '0')}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function toHexEntities(codePoint: number): string {
|
||||||
|
return `&#x${codePoint.toString(16)};`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function toDecimalEntities(codePoint: number): string {
|
||||||
|
return `&#${codePoint};`;
|
||||||
|
}
|
||||||
|
|
||||||
|
export { converters };
|
||||||
|
|
|
@ -1,34 +1,106 @@
|
||||||
<script setup lang="ts">
|
<script setup lang="ts">
|
||||||
import { convertTextToUnicode, convertUnicodeToText } from './text-to-unicode.service';
|
import { type ConverterId, SKIP_PRINTABLE_ASCII_RE, converters } from './text-to-unicode.service';
|
||||||
import { useCopy } from '@/composable/copy';
|
import { useCopy } from '@/composable/copy';
|
||||||
|
|
||||||
|
const converterId = ref<ConverterId>('fullUnicode');
|
||||||
|
const skipAscii = ref(true);
|
||||||
|
|
||||||
const inputText = ref('');
|
const inputText = ref('');
|
||||||
const unicodeFromText = computed(() => inputText.value.trim() === '' ? '' : convertTextToUnicode(inputText.value));
|
const unicodeFromText = computed(() =>
|
||||||
|
inputText.value.trim() === ''
|
||||||
|
? ''
|
||||||
|
: converters[converterId.value].escape(inputText.value, skipAscii.value ? SKIP_PRINTABLE_ASCII_RE : undefined),
|
||||||
|
);
|
||||||
const { copy: copyUnicode } = useCopy({ source: unicodeFromText });
|
const { copy: copyUnicode } = useCopy({ source: unicodeFromText });
|
||||||
|
|
||||||
const inputUnicode = ref('');
|
const inputUnicode = ref('');
|
||||||
const textFromUnicode = computed(() => inputUnicode.value.trim() === '' ? '' : convertUnicodeToText(inputUnicode.value));
|
const textFromUnicode = computed(() =>
|
||||||
|
inputUnicode.value.trim() === '' ? '' : converters[converterId.value].unescape(inputUnicode.value),
|
||||||
|
);
|
||||||
const { copy: copyText } = useCopy({ source: textFromUnicode });
|
const { copy: copyText } = useCopy({ source: textFromUnicode });
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<template>
|
<template>
|
||||||
<c-card title="Text to Unicode">
|
<div class="outer" flex flex-col gap-6>
|
||||||
<c-input-text v-model:value="inputText" multiline placeholder="e.g. 'Hello Avengers'" label="Enter text to convert to unicode" autosize autofocus raw-text test-id="text-to-unicode-input" />
|
<div class="controls">
|
||||||
<c-input-text v-model:value="unicodeFromText" label="Unicode from your text" multiline raw-text readonly mt-2 placeholder="The unicode representation of your text will be here" test-id="text-to-unicode-output" />
|
<c-select
|
||||||
|
v-model:value="converterId"
|
||||||
|
searchable
|
||||||
|
label="Conversion type:"
|
||||||
|
:options="Object.entries(converters).map(([key, val]) => ({ label: val.name, value: key }))"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<c-card class="card" title="Text to Unicode">
|
||||||
|
<c-input-text
|
||||||
|
v-model:value="inputText"
|
||||||
|
multiline
|
||||||
|
placeholder="e.g. 'Hello Avengers'"
|
||||||
|
label="Enter text to convert to Unicode"
|
||||||
|
autosize
|
||||||
|
autofocus
|
||||||
|
raw-text
|
||||||
|
test-id="text-to-unicode-input"
|
||||||
|
/>
|
||||||
|
<c-input-text
|
||||||
|
v-model:value="unicodeFromText"
|
||||||
|
label="Unicode from your text"
|
||||||
|
multiline
|
||||||
|
raw-text
|
||||||
|
readonly
|
||||||
|
mt-2
|
||||||
|
placeholder="The unicode representation of your text will be here"
|
||||||
|
test-id="text-to-unicode-output"
|
||||||
|
/>
|
||||||
|
<div mt-2 flex justify-start>
|
||||||
|
<n-form-item label="Skip ASCII?" :show-feedback="false" label-placement="left">
|
||||||
|
<n-switch v-model:value="skipAscii" />
|
||||||
|
</n-form-item>
|
||||||
|
</div>
|
||||||
<div mt-2 flex justify-center>
|
<div mt-2 flex justify-center>
|
||||||
<c-button :disabled="!unicodeFromText" @click="copyUnicode()">
|
<c-button :disabled="!unicodeFromText" @click="copyUnicode()"> Copy unicode to clipboard </c-button>
|
||||||
Copy unicode to clipboard
|
|
||||||
</c-button>
|
|
||||||
</div>
|
</div>
|
||||||
</c-card>
|
</c-card>
|
||||||
|
<c-card class="card" title="Unicode to Text">
|
||||||
<c-card title="Unicode to Text">
|
<c-input-text
|
||||||
<c-input-text v-model:value="inputUnicode" multiline placeholder="Input Unicode" label="Enter unicode to convert to text" autosize raw-text test-id="unicode-to-text-input" />
|
v-model:value="inputUnicode"
|
||||||
<c-input-text v-model:value="textFromUnicode" label="Text from your Unicode" multiline raw-text readonly mt-2 placeholder="The text representation of your unicode will be here" test-id="unicode-to-text-output" />
|
multiline
|
||||||
|
placeholder="Input Unicode"
|
||||||
|
label="Enter unicode to convert to text"
|
||||||
|
autosize
|
||||||
|
raw-text
|
||||||
|
test-id="unicode-to-text-input"
|
||||||
|
/>
|
||||||
|
<c-input-text
|
||||||
|
v-model:value="textFromUnicode"
|
||||||
|
label="Text from your Unicode"
|
||||||
|
multiline
|
||||||
|
raw-text
|
||||||
|
readonly
|
||||||
|
mt-2
|
||||||
|
placeholder="The text representation of your unicode will be here"
|
||||||
|
test-id="unicode-to-text-output"
|
||||||
|
/>
|
||||||
<div mt-2 flex justify-center>
|
<div mt-2 flex justify-center>
|
||||||
<c-button :disabled="!textFromUnicode" @click="copyText()">
|
<c-button :disabled="!textFromUnicode" @click="copyText()"> Copy text to clipboard </c-button>
|
||||||
Copy text to clipboard
|
|
||||||
</c-button>
|
|
||||||
</div>
|
</div>
|
||||||
</c-card>
|
</c-card>
|
||||||
|
</div>
|
||||||
</template>
|
</template>
|
||||||
|
|
||||||
|
<style lang="less" scoped>
|
||||||
|
.outer {
|
||||||
|
flex: 0 1 1200px;
|
||||||
|
margin-inline: 50px;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: row;
|
||||||
|
flex-wrap: wrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
.controls {
|
||||||
|
flex: 0 1 100%;
|
||||||
|
}
|
||||||
|
|
||||||
|
.card {
|
||||||
|
flex: 1 0 max(40%, 500px);
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue