Encoding HTML entities as well as ensuring no script tags slip by

This commit is contained in:
Glenn R. Martin 2025-06-09 00:02:01 -04:00
parent dd583a4943
commit 0b913d070a

View file

@ -95,7 +95,8 @@ class ParseAITokens extends Operation {
const tokenHtml = tokens.map((t, i) => { const tokenHtml = tokens.map((t, i) => {
const tok = const tok =
t.replaceAll(" ", "\u00A0") t.replace(/[\u00A0-\u9999<>&]/g, i => "&#"+i.charCodeAt(0)+";")
.replaceAll(" ", "\u00A0")
.replaceAll("\n", "<newline>"); .replaceAll("\n", "<newline>");
const css = [ const css = [
@ -127,21 +128,29 @@ class ParseAITokens extends Operation {
}; };
/** /**
* Replace all space not starting within the HTML tag. * Replace spaces outside HTML tags and sanitize <script> tags.
* @param {string} htmlString * @param {string} htmlString - The input HTML string.
* @returns {string} * @returns {string} - The sanitized and formatted HTML string.
*/ */
replaceSpacesOutsideTags(htmlString) { replaceSpacesOutsideTags(htmlString) {
return htmlString.replace(/(<[^>]*?>)|(\s+)/g, function(match, tag, spaces) { return htmlString
if (tag) { .replace(/(<script\b[^>]*>.*?<\/script>)|(<[^>]*?>)|(\s+)/gi, (match, scriptTag, htmlTag, spaces) => {
return tag; if (scriptTag) {
} else if (spaces) { // Sanitize the <script> tag by escaping it
return ""; return scriptTag
} .replace(/</g, "&lt;")
}).replace(/[\r\n]/g, ""); .replace(/>/g, "&gt;");
}; } else if (htmlTag) {
// Leave other HTML tags unchanged
return htmlTag;
} else if (spaces) {
// Replace spaces outside tags
return "";
}
})
.replace(/[\r\n]/g, "");
}
} }
export default ParseAITokens; export default ParseAITokens;