HTML outputs can now be selected and handle control characters correctly

This commit is contained in:
n1474335 2022-07-18 18:39:41 +01:00
parent 0dc2322269
commit 7c8a185a3d
16 changed files with 319 additions and 124 deletions

View file

@ -0,0 +1,125 @@
/**
* @author n1474335 [n1474335@gmail.com]
* @copyright Crown Copyright 2022
* @license Apache-2.0
*
* In order to render whitespace characters as control character pictures in the output, even
* when they are the designated line separator, CyberChef sometimes chooses to represent them
* internally using the Unicode Private Use Area (https://en.wikipedia.org/wiki/Private_Use_Areas).
* See `Utils.escapeWhitespace()` for an example of this.
*
* The `renderSpecialChar()` function understands that it should display these characters as
* control pictures. When copying data from the Output, we need to replace these PUA characters
* with their original values, so we override the DOM "copy" event and modify the copied data
* if required. This handler is based closely on the built-in CodeMirror handler and defers to the
* built-in handler if PUA characters are not present in the copied data, in order to minimise the
* impact of breaking changes.
*/
import {EditorView} from "@codemirror/view";
/**
* Copies the currently selected text from the state doc.
* Based on the built-in implementation with a few unrequired bits taken out:
* https://github.com/codemirror/view/blob/7d9c3e54396242d17b3164a0e244dcc234ee50ee/src/input.ts#L604
*
* @param {EditorState} state
* @returns {Object}
*/
function copiedRange(state) {
const content = [];
let linewise = false;
for (const range of state.selection.ranges) if (!range.empty) {
content.push(state.sliceDoc(range.from, range.to));
}
if (!content.length) {
// Nothing selected, do a line-wise copy
let upto = -1;
for (const {from} of state.selection.ranges) {
const line = state.doc.lineAt(from);
if (line.number > upto) {
content.push(line.text);
}
upto = line.number;
}
linewise = true;
}
return {text: content.join(state.lineBreak), linewise};
}
/**
* Regex to match characters in the Private Use Area of the Unicode table.
*/
const PUARegex = new RegExp("[\ue000-\uf8ff]");
const PUARegexG = new RegExp("[\ue000-\uf8ff]", "g");
/**
* Regex tto match Unicode Control Pictures.
*/
const CPRegex = new RegExp("[\u2400-\u243f]");
const CPRegexG = new RegExp("[\u2400-\u243f]", "g");
/**
* Overrides the DOM "copy" handler in the CodeMirror editor in order to return the original
* values of control characters that have been represented in the Unicode Private Use Area for
* visual purposes.
* Based on the built-in copy handler with some modifications:
* https://github.com/codemirror/view/blob/7d9c3e54396242d17b3164a0e244dcc234ee50ee/src/input.ts#L629
*
* This handler will defer to the built-in version if no PUA characters are present.
*
* @returns {Extension}
*/
export function copyOverride() {
return EditorView.domEventHandlers({
copy(event, view) {
const {text, linewise} = copiedRange(view.state);
if (!text && !linewise) return;
// If there are no PUA chars in the copied text, return false and allow the built-in
// copy handler to fire
if (!PUARegex.test(text)) return false;
// If PUA chars are detected, modify them back to their original values and copy that instead
const rawText = text.replace(PUARegexG, function(c) {
return String.fromCharCode(c.charCodeAt(0) - 0xe000);
});
event.preventDefault();
event.clipboardData.clearData();
event.clipboardData.setData("text/plain", rawText);
// Returning true prevents CodeMirror default handlers from firing
return true;
}
});
}
/**
* Handler for copy events in output-html decorations. If there are control pictures present,
* this handler will convert them back to their raw form before copying. If there are no
* control pictures present, it will do nothing and defer to the default browser handler.
*
* @param {ClipboardEvent} event
* @returns {boolean}
*/
export function htmlCopyOverride(event) {
const text = window.getSelection().toString();
if (!text) return;
// If there are no control picture chars in the copied text, return false and allow the built-in
// copy handler to fire
if (!CPRegex.test(text)) return false;
// If control picture chars are detected, modify them back to their original values and copy that instead
const rawText = text.replace(CPRegexG, function(c) {
return String.fromCharCode(c.charCodeAt(0) - 0x2400);
});
event.preventDefault();
event.clipboardData.clearData();
event.clipboardData.setData("text/plain", rawText);
return true;
}

View file

@ -6,12 +6,41 @@
* @license Apache-2.0
*/
import Utils from "../../core/Utils.mjs";
// Descriptions for named control characters
const Names = {
0: "null",
7: "bell",
8: "backspace",
10: "line feed",
11: "vertical tab",
13: "carriage return",
27: "escape",
8203: "zero width space",
8204: "zero width non-joiner",
8205: "zero width joiner",
8206: "left-to-right mark",
8207: "right-to-left mark",
8232: "line separator",
8237: "left-to-right override",
8238: "right-to-left override",
8233: "paragraph separator",
65279: "zero width no-break space",
65532: "object replacement"
};
// Regex for Special Characters to be replaced
const UnicodeRegexpSupport = /x/.unicode != null ? "gu" : "g";
const Specials = new RegExp("[\u0000-\u0008\u000a-\u001f\u007f-\u009f\u00ad\u061c\u200b\u200e\u200f\u2028\u2029\u202d\u202e\ufeff\ufff9-\ufffc\ue000-\uf8ff]", UnicodeRegexpSupport);
/**
* Override for rendering special characters.
* Should mirror the toDOM function in
* https://github.com/codemirror/view/blob/main/src/special-chars.ts#L150
* But reverts the replacement of line feeds with newline control pictures.
*
* @param {number} code
* @param {string} desc
* @param {string} placeholder
@ -19,10 +48,47 @@
*/
export function renderSpecialChar(code, desc, placeholder) {
const s = document.createElement("span");
// CodeMirror changes 0x0a to "NL" instead of "LF". We change it back.
s.textContent = code === 0x0a ? "\u240a" : placeholder;
// CodeMirror changes 0x0a to "NL" instead of "LF". We change it back along with its description.
if (code === 0x0a) {
placeholder = "\u240a";
desc = desc.replace("newline", "line feed");
}
// Render CyberChef escaped characters correctly - see Utils.escapeWhitespace
if (code >= 0xe000 && code <= 0xf8ff) {
code = code - 0xe000;
placeholder = String.fromCharCode(0x2400 + code);
desc = "Control character " + (Names[code] || "0x" + code.toString(16));
}
s.textContent = placeholder;
s.title = desc;
s.setAttribute("aria-label", desc);
s.className = "cm-specialChar";
return s;
}
/**
* Given a string, returns that string with any control characters replaced with HTML
* renderings of control pictures.
*
* @param {string} str
* @param {boolean} [preserveWs=false]
* @param {string} [lineBreak="\n"]
* @returns {html}
*/
export function escapeControlChars(str, preserveWs=false, lineBreak="\n") {
if (!preserveWs)
str = Utils.escapeWhitespace(str);
return str.replace(Specials, function(c) {
if (lineBreak.includes(c)) return c;
const code = c.charCodeAt(0);
const desc = "Control character " + (Names[code] || "0x" + code.toString(16));
const placeholder = code > 32 ? "\u2022" : String.fromCharCode(9216 + code);
const n = renderSpecialChar(code, desc, placeholder);
return n.outerHTML;
});
}

View file

@ -5,6 +5,9 @@
*/
import {WidgetType, Decoration, ViewPlugin} from "@codemirror/view";
import {escapeControlChars} from "./editorUtils.mjs";
import {htmlCopyOverride} from "./copyOverride.mjs";
/**
* Adds an HTML widget to the Code Mirror editor
@ -14,9 +17,10 @@ class HTMLWidget extends WidgetType {
/**
* HTMLWidget consructor
*/
constructor(html) {
constructor(html, view) {
super();
this.html = html;
this.view = view;
}
/**
@ -27,9 +31,45 @@ class HTMLWidget extends WidgetType {
const wrap = document.createElement("span");
wrap.setAttribute("id", "output-html");
wrap.innerHTML = this.html;
// Find text nodes and replace unprintable chars with control codes
this.walkTextNodes(wrap);
// Add a handler for copy events to ensure the control codes are copied correctly
wrap.addEventListener("copy", htmlCopyOverride);
return wrap;
}
/**
* Walks all text nodes in a given element
* @param {DOMNode} el
*/
walkTextNodes(el) {
for (const node of el.childNodes) {
switch (node.nodeType) {
case Node.TEXT_NODE:
this.replaceControlChars(node);
break;
default:
if (node.nodeName !== "SCRIPT" &&
node.nodeName !== "STYLE")
this.walkTextNodes(node);
break;
}
}
}
/**
* Renders control characters in text nodes
* @param {DOMNode} textNode
*/
replaceControlChars(textNode) {
const val = escapeControlChars(textNode.nodeValue, true, this.view.state.lineBreak);
const node = document.createElement("null");
node.innerHTML = val;
textNode.parentNode.replaceChild(node, textNode);
}
}
/**
@ -42,7 +82,7 @@ function decorateHTML(view, html) {
const widgets = [];
if (html.length) {
const deco = Decoration.widget({
widget: new HTMLWidget(html),
widget: new HTMLWidget(html, view),
side: 1
});
widgets.push(deco.range(0));
@ -79,7 +119,8 @@ export function htmlPlugin(htmlOutput) {
}
}
}, {
decorations: v => v.decorations
decorations: v => v.decorations,
}
);