mirror of
https://github.com/CorentinTh/it-tools.git
synced 2025-05-04 21:37:11 -04:00
feat(new tool): Text extractor form HTML
Fix https://github.com/CorentinTh/it-tools/issues/1035
This commit is contained in:
parent
b59942ad9f
commit
089853e05b
7 changed files with 128 additions and 4 deletions
10
components.d.ts
vendored
10
components.d.ts
vendored
|
@ -77,6 +77,7 @@ declare module '@vue/runtime-core' {
|
||||||
EmojiPicker: typeof import('./src/tools/emoji-picker/emoji-picker.vue')['default']
|
EmojiPicker: typeof import('./src/tools/emoji-picker/emoji-picker.vue')['default']
|
||||||
Encryption: typeof import('./src/tools/encryption/encryption.vue')['default']
|
Encryption: typeof import('./src/tools/encryption/encryption.vue')['default']
|
||||||
EtaCalculator: typeof import('./src/tools/eta-calculator/eta-calculator.vue')['default']
|
EtaCalculator: typeof import('./src/tools/eta-calculator/eta-calculator.vue')['default']
|
||||||
|
ExtractTextFromHtml: typeof import('./src/tools/extract-text-from-html/extract-text-from-html.vue')['default']
|
||||||
FavoriteButton: typeof import('./src/components/FavoriteButton.vue')['default']
|
FavoriteButton: typeof import('./src/components/FavoriteButton.vue')['default']
|
||||||
FormatTransformer: typeof import('./src/components/FormatTransformer.vue')['default']
|
FormatTransformer: typeof import('./src/components/FormatTransformer.vue')['default']
|
||||||
GitMemo: typeof import('./src/tools/git-memo/git-memo.vue')['default']
|
GitMemo: typeof import('./src/tools/git-memo/git-memo.vue')['default']
|
||||||
|
@ -126,25 +127,26 @@ declare module '@vue/runtime-core' {
|
||||||
MenuLayout: typeof import('./src/components/MenuLayout.vue')['default']
|
MenuLayout: typeof import('./src/components/MenuLayout.vue')['default']
|
||||||
MetaTagGenerator: typeof import('./src/tools/meta-tag-generator/meta-tag-generator.vue')['default']
|
MetaTagGenerator: typeof import('./src/tools/meta-tag-generator/meta-tag-generator.vue')['default']
|
||||||
MimeTypes: typeof import('./src/tools/mime-types/mime-types.vue')['default']
|
MimeTypes: typeof import('./src/tools/mime-types/mime-types.vue')['default']
|
||||||
|
NAlert: typeof import('naive-ui')['NAlert']
|
||||||
NavbarButtons: typeof import('./src/components/NavbarButtons.vue')['default']
|
NavbarButtons: typeof import('./src/components/NavbarButtons.vue')['default']
|
||||||
NCode: typeof import('naive-ui')['NCode']
|
NCode: typeof import('naive-ui')['NCode']
|
||||||
NCollapseTransition: typeof import('naive-ui')['NCollapseTransition']
|
NCollapseTransition: typeof import('naive-ui')['NCollapseTransition']
|
||||||
|
NColorPicker: typeof import('naive-ui')['NColorPicker']
|
||||||
NConfigProvider: typeof import('naive-ui')['NConfigProvider']
|
NConfigProvider: typeof import('naive-ui')['NConfigProvider']
|
||||||
NDivider: typeof import('naive-ui')['NDivider']
|
NDivider: typeof import('naive-ui')['NDivider']
|
||||||
NEllipsis: typeof import('naive-ui')['NEllipsis']
|
NEllipsis: typeof import('naive-ui')['NEllipsis']
|
||||||
NFormItem: typeof import('naive-ui')['NFormItem']
|
NFormItem: typeof import('naive-ui')['NFormItem']
|
||||||
NGi: typeof import('naive-ui')['NGi']
|
|
||||||
NGrid: typeof import('naive-ui')['NGrid']
|
|
||||||
NH1: typeof import('naive-ui')['NH1']
|
NH1: typeof import('naive-ui')['NH1']
|
||||||
NH3: typeof import('naive-ui')['NH3']
|
NH3: typeof import('naive-ui')['NH3']
|
||||||
NIcon: typeof import('naive-ui')['NIcon']
|
NIcon: typeof import('naive-ui')['NIcon']
|
||||||
|
NInputGroup: typeof import('naive-ui')['NInputGroup']
|
||||||
|
NInputGroupLabel: typeof import('naive-ui')['NInputGroupLabel']
|
||||||
NInputNumber: typeof import('naive-ui')['NInputNumber']
|
NInputNumber: typeof import('naive-ui')['NInputNumber']
|
||||||
NLabel: typeof import('naive-ui')['NLabel']
|
|
||||||
NLayout: typeof import('naive-ui')['NLayout']
|
NLayout: typeof import('naive-ui')['NLayout']
|
||||||
NLayoutSider: typeof import('naive-ui')['NLayoutSider']
|
NLayoutSider: typeof import('naive-ui')['NLayoutSider']
|
||||||
NMenu: typeof import('naive-ui')['NMenu']
|
NMenu: typeof import('naive-ui')['NMenu']
|
||||||
NScrollbar: typeof import('naive-ui')['NScrollbar']
|
NScrollbar: typeof import('naive-ui')['NScrollbar']
|
||||||
NSpin: typeof import('naive-ui')['NSpin']
|
NSwitch: typeof import('naive-ui')['NSwitch']
|
||||||
NumeronymGenerator: typeof import('./src/tools/numeronym-generator/numeronym-generator.vue')['default']
|
NumeronymGenerator: typeof import('./src/tools/numeronym-generator/numeronym-generator.vue')['default']
|
||||||
OtpCodeGeneratorAndValidator: typeof import('./src/tools/otp-code-generator-and-validator/otp-code-generator-and-validator.vue')['default']
|
OtpCodeGeneratorAndValidator: typeof import('./src/tools/otp-code-generator-and-validator/otp-code-generator-and-validator.vue')['default']
|
||||||
PasswordStrengthAnalyser: typeof import('./src/tools/password-strength-analyser/password-strength-analyser.vue')['default']
|
PasswordStrengthAnalyser: typeof import('./src/tools/password-strength-analyser/password-strength-analyser.vue')['default']
|
||||||
|
|
|
@ -0,0 +1,17 @@
|
||||||
|
import { test, expect } from '@playwright/test';
|
||||||
|
|
||||||
|
test.describe('Tool - Extract text from html', () => {
|
||||||
|
test.beforeEach(async ({ page }) => {
|
||||||
|
await page.goto('/extract-text-from-html');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('Has correct title', async ({ page }) => {
|
||||||
|
await expect(page).toHaveTitle('Extract text from HTML');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('Extract text from HTML', async ({ page }) => {
|
||||||
|
await page.getByTestId('input').fill('<p>Paste your HTML in the input form on the left</p>');
|
||||||
|
const extractedText = await page.getByTestId('area-content').innerText();
|
||||||
|
expect(extractedText.trim()).toEqual('Paste your HTML in the input form on the left'.trim());
|
||||||
|
});
|
||||||
|
});
|
|
@ -0,0 +1,36 @@
|
||||||
|
import { expect, describe, it } from 'vitest';
|
||||||
|
import { getTextFromHtml, validateHtml } from './extract-text-from-html.service';
|
||||||
|
|
||||||
|
describe('extract-text-from-html service', () => {
|
||||||
|
describe('validateHtml', () => {
|
||||||
|
it('check if the value is valid html', () => {
|
||||||
|
expect(validateHtml('<p>Paste your HTML in the input form on the left</p>')).toBeTruthy();
|
||||||
|
expect(validateHtml('<div>Paste your HTML in the input form on the left</div>')).toBeTruthy();
|
||||||
|
expect(validateHtml('<div><p>Paste your HTML in the input form on the left</p></div>')).toBeTruthy();
|
||||||
|
expect(validateHtml('<body><div><p>Paste your HTML in the input form on the left</p></div></body>')).toBeTruthy();
|
||||||
|
expect(validateHtml('<p>Paste your HTML in the input form on the left</p>')).toBeTruthy();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('check if the value is an html invlid', () => {
|
||||||
|
expect(validateHtml('<p>Paste your HTML in the input form on the left<p>')).toBeFalsy();
|
||||||
|
expect(validateHtml('Paste your HTML in the input form on the left<p>')).toBeFalsy();
|
||||||
|
expect(validateHtml('<p>Paste your HTML in the input form on the left')).toBeFalsy();
|
||||||
|
expect(validateHtml('<p>Paste your HTML in the input form on the left<>')).toBeFalsy();
|
||||||
|
expect(validateHtml('<>Paste your HTML in the input form on the left<>')).toBeFalsy();
|
||||||
|
expect(validateHtml('<p>Paste your HTML in the input form on the left</a>')).toBeFalsy();
|
||||||
|
expect(validateHtml('<div><p>Paste your HTML in the input form on the left</p>')).toBeTruthy();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('getTextFromHtml', () => {
|
||||||
|
it('must be return a string', () => {
|
||||||
|
expect(getTextFromHtml('<p>Paste your HTML in the input form on the left</p>')).toString();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('must be return text from html', () => {
|
||||||
|
expect(getTextFromHtml('<p>Paste your HTML in the input form on the left</p>')).toStrictEqual(
|
||||||
|
'Paste your HTML in the input form on the left',
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
|
@ -0,0 +1,21 @@
|
||||||
|
function validateHtml(value: string) {
|
||||||
|
try {
|
||||||
|
new DOMParser().parseFromString(value, 'text/html');
|
||||||
|
} catch (error) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const regex = /<([a-z][a-z0-9]*)\b[^>]*>(.*?)<\/\1>|<([a-z][a-z0-9]*)\b[^\/]*\/>/gi;
|
||||||
|
const matches = value.match(regex);
|
||||||
|
|
||||||
|
return Boolean(matches !== null && matches.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
function getTextFromHtml(value: string) {
|
||||||
|
const element = document.createElement('div');
|
||||||
|
element.innerHTML = value;
|
||||||
|
const text = element?.innerText || element?.textContent || '';
|
||||||
|
return text.replace(/\s+/g, ' ');
|
||||||
|
}
|
||||||
|
|
||||||
|
export { validateHtml, getTextFromHtml };
|
33
src/tools/extract-text-from-html/extract-text-from-html.vue
Normal file
33
src/tools/extract-text-from-html/extract-text-from-html.vue
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
<script setup lang="ts">
|
||||||
|
import { getTextFromHtml, validateHtml } from './extract-text-from-html.service';
|
||||||
|
import { withDefaultOnError } from '@/utils/defaults';
|
||||||
|
import type { UseValidationRule } from '@/composable/validation';
|
||||||
|
|
||||||
|
function transformer(value: string) {
|
||||||
|
return withDefaultOnError(() => {
|
||||||
|
if (value === '') {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
return getTextFromHtml(value);
|
||||||
|
}, '');
|
||||||
|
}
|
||||||
|
|
||||||
|
const rules: UseValidationRule<string>[] = [
|
||||||
|
{
|
||||||
|
validator: (value: string) => value === '' || validateHtml(value),
|
||||||
|
message: 'Provided HTML is not valid.',
|
||||||
|
},
|
||||||
|
];
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<template>
|
||||||
|
<format-transformer
|
||||||
|
input-label="Your raw HTML"
|
||||||
|
input-placeholder="Paste your raw HTML here..."
|
||||||
|
output-label="Text from your HTML"
|
||||||
|
:input-validation-rules="rules"
|
||||||
|
:transformer="transformer"
|
||||||
|
/>
|
||||||
|
</template>
|
||||||
|
|
||||||
|
<style lang="less" scoped></style>
|
13
src/tools/extract-text-from-html/index.ts
Normal file
13
src/tools/extract-text-from-html/index.ts
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
import { CursorText } from '@vicons/tabler';
|
||||||
|
import { defineTool } from '../tool';
|
||||||
|
|
||||||
|
export const tool = defineTool({
|
||||||
|
name: 'Extract text from HTML',
|
||||||
|
path: '/extract-text-from-html',
|
||||||
|
description:
|
||||||
|
'Paste your HTML in the input form on the left and you will get text instantly. Occasionally, you may need to extract plain text from an HTML page where CSS properties (like user-select: none;) prevent text selection. The typical workaround involves using the DevTools (F12) to select "Copy → outer HTML". The proposed tool would simplify this process by extracting the "inner Text" directly from the copied HTML.',
|
||||||
|
keywords: ['extract', 'text', 'from', 'html'],
|
||||||
|
component: () => import('./extract-text-from-html.vue'),
|
||||||
|
icon: CursorText,
|
||||||
|
createdAt: new Date('2024-05-10'),
|
||||||
|
});
|
|
@ -1,6 +1,7 @@
|
||||||
import { tool as base64FileConverter } from './base64-file-converter';
|
import { tool as base64FileConverter } from './base64-file-converter';
|
||||||
import { tool as base64StringConverter } from './base64-string-converter';
|
import { tool as base64StringConverter } from './base64-string-converter';
|
||||||
import { tool as basicAuthGenerator } from './basic-auth-generator';
|
import { tool as basicAuthGenerator } from './basic-auth-generator';
|
||||||
|
import { tool as extractTextFromHtml } from './extract-text-from-html';
|
||||||
|
|
||||||
import { tool as asciiTextDrawer } from './ascii-text-drawer';
|
import { tool as asciiTextDrawer } from './ascii-text-drawer';
|
||||||
|
|
||||||
|
@ -148,6 +149,7 @@ export const toolsByCategory: ToolCategory[] = [
|
||||||
dockerRunToDockerComposeConverter,
|
dockerRunToDockerComposeConverter,
|
||||||
xmlFormatter,
|
xmlFormatter,
|
||||||
yamlViewer,
|
yamlViewer,
|
||||||
|
extractTextFromHtml,
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue