fix: better display format (char, U+XXXX and name)

This commit is contained in:
ShareVB 2024-07-07 14:35:26 +02:00
parent 0b84e266fd
commit e220d4a074
3 changed files with 8 additions and 8 deletions

View file

@ -1,23 +1,23 @@
import { describe, expect, it } from 'vitest';
import { convertTextToUnicodeNames } from './text-to-unicode-names.service';
describe('text-to-binary', () => {
describe('text-to-unicode-names', () => {
describe('convertTextToUnicodeNames', () => {
it('a text string is converted to its unicode names representation', () => {
expect(convertTextToUnicodeNames('A')).toBe(
'U+0041 (LATIN CAPITAL LETTER A)');
'A (U+0041: LATIN CAPITAL LETTER A)');
expect(convertTextToUnicodeNames('hello')).toBe(
'U+0068 (LATIN SMALL LETTER H) U+0065 (LATIN SMALL LETTER E) U+006C (LATIN SMALL LETTER L) U+006C (LATIN SMALL LETTER L) U+006F (LATIN SMALL LETTER O)');
'h (U+0068: LATIN SMALL LETTER H) e (U+0065: LATIN SMALL LETTER E) l (U+006C: LATIN SMALL LETTER L) l (U+006C: LATIN SMALL LETTER L) o (U+006F: LATIN SMALL LETTER O)');
expect(convertTextToUnicodeNames('')).toBe(
'');
expect(convertTextToUnicodeNames('être 1 $ ¤ …')).toBe(
'U+00EA (LATIN SMALL LETTER E WITH CIRCUMFLEX) U+0074 (LATIN SMALL LETTER T) U+0072 (LATIN SMALL LETTER R) U+0065 (LATIN SMALL LETTER E) U+0020 (SPACE) U+0031 (DIGIT ONE) U+0020 (SPACE) U+0024 (DOLLAR SIGN) U+0020 (SPACE) U+00A4 (CURRENCY SIGN) U+0020 (SPACE) U+2026 (HORIZONTAL ELLIPSIS)');
'ê (U+00EA: LATIN SMALL LETTER E WITH CIRCUMFLEX) t (U+0074: LATIN SMALL LETTER T) r (U+0072: LATIN SMALL LETTER R) e (U+0065: LATIN SMALL LETTER E) (U+0020: SPACE) 1 (U+0031: DIGIT ONE) (U+0020: SPACE) $ (U+0024: DOLLAR SIGN) (U+0020: SPACE) ¤ (U+00A4: CURRENCY SIGN) (U+0020: SPACE) … (U+2026: HORIZONTAL ELLIPSIS)');
expect(convertTextToUnicodeNames('⁇ 𥆧 💩')).toBe(
'U+2047 (DOUBLE QUESTION MARK) U+0020 (SPACE) U+251A7 (CJK Ideograph Extension B) U+0020 (SPACE) U+1F4A9 (PILE OF POO)');
'⁇ (U+2047: DOUBLE QUESTION MARK) (U+0020: SPACE) 𥆧 (U+251A7: CJK Ideograph Extension B) (U+0020: SPACE) 💩 (U+1F4A9: PILE OF POO)');
});
it('the separator between octets can be changed', () => {
expect(convertTextToUnicodeNames('hello', { separator: ' ; ' })).toBe(
'U+0068 (LATIN SMALL LETTER H) ; U+0065 (LATIN SMALL LETTER E) ; U+006C (LATIN SMALL LETTER L) ; U+006C (LATIN SMALL LETTER L) ; U+006F (LATIN SMALL LETTER O)');
'h (U+0068: LATIN SMALL LETTER H) ; e (U+0065: LATIN SMALL LETTER E) ; l (U+006C: LATIN SMALL LETTER L) ; l (U+006C: LATIN SMALL LETTER L) ; o (U+006F: LATIN SMALL LETTER O)');
});
});
});

View file

@ -2,6 +2,6 @@ import unicode from '@unicode/unicode-15.1.0/Names/index.js';
export function convertTextToUnicodeNames(text: string, { separator = ' ' }: { separator?: string } = {}): string {
return [...text]
.map(char => `U+${char.codePointAt(0)?.toString(16).toUpperCase().padStart(4, '0')} (${(unicode.get(char.codePointAt(0)) || 'UNKNOWN CHARACTER')})`)
.map(char => `${char} (U+${char.codePointAt(0)?.toString(16).toUpperCase().padStart(4, '0')}: ${(unicode.get(char.codePointAt(0)) || 'UNKNOWN CHARACTER')})`)
.join(separator);
}

View file

@ -2,7 +2,7 @@
import { convertTextToUnicodeNames } from './text-to-unicode-names.service';
const inputText = ref('');
const unicodeNamesFromText = computed(() => convertTextToUnicodeNames(inputText.value));
const unicodeNamesFromText = computed(() => convertTextToUnicodeNames(inputText.value, { separator: '\n' }));
</script>
<template>