diff --git a/src/core/operations/HTMLToText.mjs b/src/core/operations/HTMLToText.mjs
index 17b19838..42393546 100644
--- a/src/core/operations/HTMLToText.mjs
+++ b/src/core/operations/HTMLToText.mjs
@@ -1,6 +1,7 @@
/**
* @author tlwr [toby@toby.codes]
* @author Matt C [me@mitt.dev]
+ * @author Michael Rowley [michaellrowley@protonmail.com]
* @copyright Crown Copyright 2019
* @license Apache-2.0
*/
@@ -33,8 +34,6 @@ class HTMLToText extends Operation {
* @returns {string}
*/
run(input, args) {
- // TODO: Add blacklisted tags via args.
- // TODO: Extract from HTML comments.
let output = "";
const blacklistedTags = ["script", "style"];
const tagRegex = /<\w+>[\s?!\-_().,/#{}*"£$%^&;:a-z]*/gis;
@@ -43,7 +42,8 @@ class HTMLToText extends Operation {
const closingTagOffset = iterativeMatch.indexOf(">");
const tag = iterativeMatch.substring(1, closingTagOffset);
for (let i = 0; i < blacklistedTags.length; i++) {
- if (tag === blacklistedTags[i]) {
+ if (tag === blacklistedTags[i] ||
+ tag.split(' ')[0] == blacklistedTags[i]) {
return; // This is why a forEach(...) loop couldn't be used for this nested one.
}
}