From 7a1beb5dd376f1edc037f55ffd5ff9794922002d Mon Sep 17 00:00:00 2001 From: ShareVB Date: Sun, 28 Apr 2024 14:57:17 +0200 Subject: [PATCH] feat: add OCRized text statistics --- components.d.ts | 11 +---------- src/tools/ocr-image/ocr-image.vue | 28 +++++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/components.d.ts b/components.d.ts index 4a61d5b6..2e08fd8e 100644 --- a/components.d.ts +++ b/components.d.ts @@ -132,25 +132,16 @@ declare module '@vue/runtime-core' { NConfigProvider: typeof import('naive-ui')['NConfigProvider'] NDivider: typeof import('naive-ui')['NDivider'] NEllipsis: typeof import('naive-ui')['NEllipsis'] - NFormItem: typeof import('naive-ui')['NFormItem'] - NGi: typeof import('naive-ui')['NGi'] - NGrid: typeof import('naive-ui')['NGrid'] NH1: typeof import('naive-ui')['NH1'] NH3: typeof import('naive-ui')['NH3'] NIcon: typeof import('naive-ui')['NIcon'] - NInputNumber: typeof import('naive-ui')['NInputNumber'] - NLabel: typeof import('naive-ui')['NLabel'] NLayout: typeof import('naive-ui')['NLayout'] NLayoutSider: typeof import('naive-ui')['NLayoutSider'] NMenu: typeof import('naive-ui')['NMenu'] NScrollbar: typeof import('naive-ui')['NScrollbar'] - NSlider: typeof import('naive-ui')['NSlider'] + NSpace: typeof import('naive-ui')['NSpace'] NSpin: typeof import('naive-ui')['NSpin'] NStatistic: typeof import('naive-ui')['NStatistic'] - NSwitch: typeof import('naive-ui')['NSwitch'] - NTable: typeof import('naive-ui')['NTable'] - NTag: typeof import('naive-ui')['NTag'] - NSpin: typeof import('naive-ui')['NSpin'] NumeronymGenerator: typeof import('./src/tools/numeronym-generator/numeronym-generator.vue')['default'] OcrImage: typeof import('./src/tools/ocr-image/ocr-image.vue')['default'] OtpCodeGeneratorAndValidator: typeof import('./src/tools/otp-code-generator-and-validator/otp-code-generator-and-validator.vue')['default'] diff --git a/src/tools/ocr-image/ocr-image.vue b/src/tools/ocr-image/ocr-image.vue index bbde03b4..9609d7e2 100644 --- a/src/tools/ocr-image/ocr-image.vue +++ b/src/tools/ocr-image/ocr-image.vue @@ -4,6 +4,7 @@ import { createWorker } from 'tesseract.js'; import { getDocument } from 'pdfjs-dist'; import * as pdfJS from 'pdfjs-dist'; import pdfJSWorkerURL from 'pdfjs-dist/build/pdf.worker?url'; +import { textStatistics } from '../text-statistics/text-statistics.service'; import TextareaCopyable from '@/components/TextareaCopyable.vue'; import { useQueryParamOrStorage } from '@/composable/queryParams'; @@ -115,6 +116,7 @@ const languagesOptions = Array.from(languages.map(l => ({ const language = useQueryParamOrStorage({ name: 'lang', storageName: 'ocr-image:lang', defaultValue: 'eng' }); +const pageSeparator = '\n=============\n'; const ocrInProgress = ref(false); const fileInput = ref() as Ref; const ocrText = computedAsync(async () => { @@ -125,6 +127,8 @@ const ocrText = computedAsync(async () => { return e.toString(); } }); +const stats = computed(() => textStatistics(ocrText.value?.replace(new RegExp(pageSeparator, 'g'), ' ') || '')); +const pageCount = computed(() => ocrText.value?.split(new RegExp(pageSeparator, 'g')).length || 0); async function onUpload(file: File) { if (file) { @@ -180,7 +184,7 @@ async function ocr(file: File, language: string) { } await worker.terminate(); ocrInProgress.value = false; - return allTexts.join('\n=============\n'); + return allTexts.join(pageSeparator); }; @@ -215,6 +219,28 @@ async function ocr(file: File, language: string) { size="small" /> + + + + + + + + + + + + + + + + + + + + + +