feat: add unique words count and read time

Fix #1214
This commit is contained in:
ShareVB 2024-08-25 22:06:55 +02:00
parent af1013da36
commit 1cf54a84a4
3 changed files with 64 additions and 9 deletions

View file

@ -14,7 +14,7 @@ describe('text-statistics', () => {
describe('textStatistics', () => { describe('textStatistics', () => {
it('should return text statistics', () => { it('should return text statistics', () => {
expect(textStatistics('a')).toEqual({ expect(textStatistics('a')).to.deep.eq({
chars: 1, chars: 1,
chars_digits: 0, chars_digits: 0,
chars_lower: 1, chars_lower: 1,
@ -26,8 +26,11 @@ describe('text-statistics', () => {
sentences: 1, sentences: 1,
words: 1, words: 1,
words_no_puncs: 1, words_no_puncs: 1,
read_time: 0.3,
words_uniques: 1,
words_uniques_ci: 1,
}); });
expect(textStatistics('A')).toEqual({ expect(textStatistics('A')).to.deep.eq({
chars: 1, chars: 1,
chars_digits: 0, chars_digits: 0,
chars_lower: 0, chars_lower: 0,
@ -39,8 +42,11 @@ describe('text-statistics', () => {
sentences: 1, sentences: 1,
words: 1, words: 1,
words_no_puncs: 1, words_no_puncs: 1,
read_time: 0.3,
words_uniques: 1,
words_uniques_ci: 1,
}); });
expect(textStatistics('a a')).toEqual({ expect(textStatistics('a a')).to.deep.eq({
chars: 3, chars: 3,
chars_digits: 0, chars_digits: 0,
chars_lower: 2, chars_lower: 2,
@ -52,8 +58,11 @@ describe('text-statistics', () => {
sentences: 1, sentences: 1,
words: 2, words: 2,
words_no_puncs: 2, words_no_puncs: 2,
read_time: 0.6,
words_uniques: 1,
words_uniques_ci: 1,
}); });
expect(textStatistics('A a ; 1')).toEqual({ expect(textStatistics('A a ; 1')).to.deep.eq({
chars: 7, chars: 7,
chars_digits: 1, chars_digits: 1,
chars_lower: 1, chars_lower: 1,
@ -65,8 +74,11 @@ describe('text-statistics', () => {
sentences: 1, sentences: 1,
words: 4, words: 4,
words_no_puncs: 3, words_no_puncs: 3,
read_time: 0.8999999999999999,
words_uniques: 3,
words_uniques_ci: 2,
}); });
expect(textStatistics('Some sentence! Une autre phrase ? « et avec des chiffres 1234 ! »')).toEqual({ expect(textStatistics('Some sentence! Une autre phrase ? « et avec des chiffres 1234 ! »')).to.deep.eq({
chars: 65, chars: 65,
chars_digits: 4, chars_digits: 4,
chars_lower: 41, chars_lower: 41,
@ -78,9 +90,12 @@ describe('text-statistics', () => {
sentences: 3, sentences: 3,
words: 14, words: 14,
words_no_puncs: 10, words_no_puncs: 10,
read_time: 3,
words_uniques: 10,
words_uniques_ci: 10,
}); });
expect(textStatistics(`Some sentence! Une autre phrase ? expect(textStatistics(`Some sentence! Une autre phrase ?
« et avec des chiffres 1234 ! »`)).toEqual({ « et avec des chiffres 1234 ! »`)).to.deep.eq({
chars: 72, chars: 72,
chars_digits: 4, chars_digits: 4,
chars_lower: 41, chars_lower: 41,
@ -92,8 +107,11 @@ describe('text-statistics', () => {
sentences: 3, sentences: 3,
words: 14, words: 14,
words_no_puncs: 10, words_no_puncs: 10,
read_time: 3,
words_uniques: 10,
words_uniques_ci: 10,
}); });
expect(textStatistics('12 35')).toEqual({ expect(textStatistics('12 35')).to.deep.eq({
chars: 5, chars: 5,
chars_digits: 4, chars_digits: 4,
chars_lower: 0, chars_lower: 0,
@ -105,8 +123,11 @@ describe('text-statistics', () => {
sentences: 1, sentences: 1,
words: 2, words: 2,
words_no_puncs: 2, words_no_puncs: 2,
read_time: 0.6,
words_uniques: 2,
words_uniques_ci: 2,
}); });
expect(textStatistics(' 1 2 3. Other ')).toEqual({ expect(textStatistics(' 1 2 3. Other ')).to.deep.eq({
chars: 14, chars: 14,
chars_digits: 3, chars_digits: 3,
chars_lower: 4, chars_lower: 4,
@ -118,6 +139,26 @@ describe('text-statistics', () => {
sentences: 2, sentences: 2,
words: 4, words: 4,
words_no_puncs: 4, words_no_puncs: 4,
read_time: 1.2,
words_uniques: 4,
words_uniques_ci: 4,
});
expect(textStatistics('Az az er')).to.deep.eq({
chars: 8,
chars_digits: 0,
chars_lower: 5,
chars_no_spaces: 6,
chars_puncts: 0,
chars_spaces: 2,
chars_upper: 1,
lines: 1,
read_time: 0.8999999999999999,
sentences: 1,
words: 3,
words_no_puncs: 3,
words_uniques: 3,
words_uniques_ci: 2,
}); });
}); });
}); });

View file

@ -3,6 +3,8 @@ export function getStringSizeInBytes(text: string) {
} }
export function textStatistics(text: string) { export function textStatistics(text: string) {
const words_no_puncts = text.replace(/\p{P}/ug, '').trim().split(/\s+/);
const read_word_per_minutes = 200;
return { return {
chars: text.length, chars: text.length,
chars_no_spaces: text.replace(/\s+/ug, '').length, chars_no_spaces: text.replace(/\s+/ug, '').length,
@ -12,7 +14,10 @@ export function textStatistics(text: string) {
chars_puncts: text.replace(/[^\p{P}]/ug, '').length, chars_puncts: text.replace(/[^\p{P}]/ug, '').length,
chars_spaces: text.replace(/\S/ug, '').length, chars_spaces: text.replace(/\S/ug, '').length,
words: text.trim().split(/\s+/).length, words: text.trim().split(/\s+/).length,
words_no_puncs: text.replace(/\p{P}/ug, '').trim().split(/\s+/).length, read_time: words_no_puncts.length / read_word_per_minutes * 60,
words_no_puncs: words_no_puncts.length,
words_uniques: (new Set(words_no_puncts)).size,
words_uniques_ci: (new Set(words_no_puncts.map(s => s.toLowerCase()))).size,
sentences: (`${text} `).split(/\w\s*[\.!\?][\s\p{P}]*\s/u).filter(s => s && s?.length > 0).length, sentences: (`${text} `).split(/\w\s*[\.!\?][\s\p{P}]*\s/u).filter(s => s && s?.length > 0).length,
lines: text.split(/\r\n|\r|\n/).length, lines: text.split(/\r\n|\r|\n/).length,
}; };

View file

@ -1,4 +1,5 @@
<script setup lang="ts"> <script setup lang="ts">
import { formatMsDuration } from '../eta-calculator/eta-calculator.service';
import { getStringSizeInBytes, textStatistics } from './text-statistics.service'; import { getStringSizeInBytes, textStatistics } from './text-statistics.service';
import { formatBytes } from '@/utils/convert'; import { formatBytes } from '@/utils/convert';
@ -20,6 +21,14 @@ const stats = computed(() => textStatistics(text.value));
<n-divider /> <n-divider />
<n-space mt-3>
<n-statistic label="Unique Word count" :value="stats.words_uniques" />
<n-statistic label="Unique Word count (case insensitive)" :value="stats.words_uniques_ci" />
<n-statistic label="Read Time" :value="formatMsDuration(stats.read_time * 1000)" />
</n-space>
<n-divider />
<n-space> <n-space>
<n-statistic label="Chars (no spaces)" :value="stats.chars_no_spaces" /> <n-statistic label="Chars (no spaces)" :value="stats.chars_no_spaces" />
<n-statistic label="Uppercase chars" :value="stats.chars_upper" /> <n-statistic label="Uppercase chars" :value="stats.chars_upper" />