From 5673c11c5d7ca3de5befb34babefa1d08393af1b Mon Sep 17 00:00:00 2001 From: Michael Rowley Date: Thu, 30 Dec 2021 23:23:13 +0000 Subject: [PATCH] Added function body. --- src/core/operations/HTMLToText.mjs | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/core/operations/HTMLToText.mjs b/src/core/operations/HTMLToText.mjs index ff90572a..7e42f5c7 100644 --- a/src/core/operations/HTMLToText.mjs +++ b/src/core/operations/HTMLToText.mjs @@ -33,7 +33,24 @@ class HTMLToText extends Operation { * @returns {string} */ run(input, args) { - return input; + // TODO: Add blacklisted tags via args. + // TODO: Extract from HTML comments. + let output = ""; + const blacklistedTags = [ "script", "style" ]; + const tagRegex = /<\w+>[\s?!\-_().,\/#{}*"£$%^&;:a-z]*/gis; + const tagMatches = input.match(tagRegex); + tagMatches.forEach((iterativeMatch) => { + const closingTagOffset = iterativeMatch.indexOf('>'); + const tag = iterativeMatch.substring(1, closingTagOffset); + for (let i = 0; i < blacklistedTags.length; i++) { + if (tag == blacklistedTags[i]) { + return; // This is why a forEach(...) loop couldn't be used for this nested one. + } + } + // The tag has been validated, extract all text. + output += iterativeMatch.substring(closingTagOffset + 1); + }); + return output; } }