feat: refactor text stats to service

This commit is contained in:
sharevb 2024-04-28 13:04:53 +02:00 committed by ShareVB
parent b7debece4d
commit 7f2bf11bd9
3 changed files with 128 additions and 1 deletions

View file

@ -1,5 +1,5 @@
import { describe, expect, it } from 'vitest';
import { getStringSizeInBytes } from './text-statistics.service';
import { getStringSizeInBytes, textStatistics } from './text-statistics.service';
describe('text-statistics', () => {
describe('getStringSizeInBytes', () => {
@ -11,4 +11,114 @@ describe('text-statistics', () => {
expect(getStringSizeInBytes('aaaaaaaaaa')).toEqual(10);
});
});
describe('textStatistics', () => {
it('should return text statistics', () => {
expect(textStatistics('a')).toEqual({
chars: 1,
chars_digits: 0,
chars_lower: 1,
chars_no_spaces: 1,
chars_puncts: 0,
chars_spaces: 0,
chars_upper: 0,
lines: 1,
sentences: 1,
words: 1,
words_no_puncs: 1,
});
expect(textStatistics('A')).toEqual({
chars: 1,
chars_digits: 0,
chars_lower: 0,
chars_no_spaces: 1,
chars_puncts: 0,
chars_spaces: 0,
chars_upper: 1,
lines: 1,
sentences: 1,
words: 1,
words_no_puncs: 1,
});
expect(textStatistics('a a')).toEqual({
chars: 3,
chars_digits: 0,
chars_lower: 2,
chars_no_spaces: 2,
chars_puncts: 0,
chars_spaces: 1,
chars_upper: 0,
lines: 1,
sentences: 1,
words: 2,
words_no_puncs: 2,
});
expect(textStatistics('A a ; 1')).toEqual({
chars: 7,
chars_digits: 1,
chars_lower: 1,
chars_no_spaces: 4,
chars_puncts: 1,
chars_spaces: 3,
chars_upper: 1,
lines: 1,
sentences: 1,
words: 4,
words_no_puncs: 3,
});
expect(textStatistics('Some sentence! Une autre phrase ? « et avec des chiffres 1234 ! »')).toEqual({
chars: 65,
chars_digits: 4,
chars_lower: 41,
chars_no_spaces: 52,
chars_puncts: 5,
chars_spaces: 13,
chars_upper: 2,
lines: 1,
sentences: 3,
words: 14,
words_no_puncs: 10,
});
expect(textStatistics(`Some sentence! Une autre phrase ?
« et avec des chiffres 1234 ! »`)).toEqual({
chars: 72,
chars_digits: 4,
chars_lower: 41,
chars_no_spaces: 52,
chars_puncts: 5,
chars_spaces: 20,
chars_upper: 2,
lines: 2,
sentences: 3,
words: 14,
words_no_puncs: 10,
});
expect(textStatistics('12 35')).toEqual({
chars: 5,
chars_digits: 4,
chars_lower: 0,
chars_no_spaces: 4,
chars_puncts: 0,
chars_spaces: 1,
chars_upper: 0,
lines: 1,
sentences: 1,
words: 2,
words_no_puncs: 2,
});
expect(textStatistics(' 1 2 3. Other ')).toEqual({
chars: 14,
chars_digits: 3,
chars_lower: 4,
chars_no_spaces: 9,
chars_puncts: 1,
chars_spaces: 5,
chars_upper: 1,
lines: 1,
sentences: 2,
words: 4,
words_no_puncs: 4,
});
});
});
});

View file

@ -1,3 +1,19 @@
export function getStringSizeInBytes(text: string) {
return new TextEncoder().encode(text).buffer.byteLength;
}
export function textStatistics(text: string) {
return {
chars: text.length,
chars_no_spaces: text.replace(/\s+/ug, '').length,
chars_upper: text.replace(/[^\p{Lu}]/ug, '').length,
chars_lower: text.replace(/[^\p{Ll}]/ug, '').length,
chars_digits: text.replace(/\D+/ug, '').length,
chars_puncts: text.replace(/[^\p{P}]/ug, '').length,
chars_spaces: text.replace(/\S/ug, '').length,
words: text.trim().split(/\s+/).length,
words_no_puncs: text.replace(/\p{P}/ug, '').trim().split(/\s+/).length,
sentences: (`${text} `).split(/\w\s*[\.!\?][\s\p{P}]*\s/u).filter(s => s && s?.length > 0).length,
lines: text.split(/\r\n|\r|\n/).length,
};
}