mirror of
https://github.com/gchq/CyberChef.git
synced 2025-04-25 01:06:16 -04:00
Automatically detect UTF8 character encoding in output
This commit is contained in:
parent
16dfb3fac6
commit
65ffd8d65d
7 changed files with 270 additions and 147 deletions
|
@ -62,7 +62,8 @@ class InputWaiter {
|
|||
|
||||
this.inputTextEl = document.getElementById("input-text");
|
||||
this.inputChrEnc = 0;
|
||||
this.eolSetManually = false;
|
||||
this.eolState = 0; // 0 = unset, 1 = detected, 2 = manual
|
||||
this.encodingState = 0; // 0 = unset, 1 = detected, 2 = manual
|
||||
this.initEditor();
|
||||
|
||||
this.inputWorker = null;
|
||||
|
@ -116,7 +117,9 @@ class InputWaiter {
|
|||
label: "Input",
|
||||
eolHandler: this.eolChange.bind(this),
|
||||
chrEncHandler: this.chrEncChange.bind(this),
|
||||
chrEncGetter: this.getChrEnc.bind(this)
|
||||
chrEncGetter: this.getChrEnc.bind(this),
|
||||
getEncodingState: this.getEncodingState.bind(this),
|
||||
getEOLState: this.getEOLState.bind(this)
|
||||
}),
|
||||
|
||||
// Mutable state
|
||||
|
@ -156,6 +159,8 @@ class InputWaiter {
|
|||
]
|
||||
});
|
||||
|
||||
|
||||
if (this.inputEditorView) this.inputEditorView.destroy();
|
||||
this.inputEditorView = new EditorView({
|
||||
state: initialState,
|
||||
parent: this.inputTextEl
|
||||
|
@ -166,30 +171,18 @@ class InputWaiter {
|
|||
* Handler for EOL change events
|
||||
* Sets the line separator
|
||||
* @param {string} eol
|
||||
* @param {boolean} manual - a flag for whether this was set by the user or automatically
|
||||
* @param {boolean} [manual=false]
|
||||
*/
|
||||
eolChange(eol, manual=false) {
|
||||
const eolVal = eolCodeToSeq[eol];
|
||||
if (eolVal === undefined) return;
|
||||
|
||||
const eolBtn = document.querySelector("#input-text .eol-value");
|
||||
if (manual) {
|
||||
this.eolSetManually = true;
|
||||
eolBtn.classList.remove("font-italic");
|
||||
} else {
|
||||
eolBtn.classList.add("font-italic");
|
||||
}
|
||||
this.eolState = manual ? 2 : this.eolState;
|
||||
if (this.eolState < 2 && eolVal === this.getEOLSeq()) return;
|
||||
|
||||
if (eolVal === this.getEOLSeq()) return;
|
||||
|
||||
if (!manual) {
|
||||
// Pulse
|
||||
eolBtn.classList.add("pulse");
|
||||
setTimeout(() => {
|
||||
eolBtn.classList.remove("pulse");
|
||||
}, 2000);
|
||||
if (this.eolState === 1) {
|
||||
// Alert
|
||||
this.app.alert(`Input EOL separator has been changed to ${eolCodeToName[eol]}`, 5000);
|
||||
this.app.alert(`Input end of line separator has been detected and changed to ${eolCodeToName[eol]}`, 5000);
|
||||
}
|
||||
|
||||
// Update the EOL value
|
||||
|
@ -210,14 +203,24 @@ class InputWaiter {
|
|||
return this.inputEditorView.state.lineBreak;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the input EOL sequence was set manually or has been detected automatically
|
||||
* @returns {number} - 0 = unset, 1 = detected, 2 = manual
|
||||
*/
|
||||
getEOLState() {
|
||||
return this.eolState;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handler for Chr Enc change events
|
||||
* Sets the input character encoding
|
||||
* @param {number} chrEncVal
|
||||
* @param {boolean} [manual=false]
|
||||
*/
|
||||
chrEncChange(chrEncVal) {
|
||||
chrEncChange(chrEncVal, manual=false) {
|
||||
if (typeof chrEncVal !== "number") return;
|
||||
this.inputChrEnc = chrEncVal;
|
||||
this.encodingState = manual ? 2 : this.encodingState;
|
||||
this.inputChange();
|
||||
}
|
||||
|
||||
|
@ -229,6 +232,14 @@ class InputWaiter {
|
|||
return this.inputChrEnc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the input character encoding was set manually or has been detected automatically
|
||||
* @returns {number} - 0 = unset, 1 = detected, 2 = manual
|
||||
*/
|
||||
getEncodingState() {
|
||||
return this.encodingState;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets word wrap on the input editor
|
||||
* @param {boolean} wrap
|
||||
|
@ -908,7 +919,7 @@ class InputWaiter {
|
|||
*/
|
||||
afterPaste(e) {
|
||||
// If EOL has been fixed, skip this.
|
||||
if (this.eolSetManually) return;
|
||||
if (this.eolState > 1) return;
|
||||
|
||||
const inputText = this.getInput();
|
||||
|
||||
|
@ -930,17 +941,23 @@ class InputWaiter {
|
|||
}, 0);
|
||||
if (total === 0) return;
|
||||
|
||||
// If CRLF not zero and more than half the highest alternative, choose CRLF
|
||||
// Find most prevalent line ending sequence
|
||||
const highest = Object.entries(eolCharCounts).reduce((acc, curr) => {
|
||||
return curr[1] > acc[1] ? curr : acc;
|
||||
}, ["LF", 0]);
|
||||
let choice = highest[0];
|
||||
|
||||
// If CRLF not zero and more than half the highest alternative, choose CRLF
|
||||
if ((eolCharCounts.CRLF * 2) > highest[1]) {
|
||||
this.eolChange("CRLF");
|
||||
return;
|
||||
choice = "CRLF";
|
||||
}
|
||||
|
||||
// Else choose max
|
||||
this.eolChange(highest[0]);
|
||||
const eolVal = eolCodeToSeq[choice];
|
||||
if (eolVal === this.getEOLSeq()) return;
|
||||
|
||||
// Setting automatically
|
||||
this.eolState = 1;
|
||||
this.eolChange(choice);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1276,8 +1293,13 @@ class InputWaiter {
|
|||
this.manager.output.removeAllOutputs();
|
||||
this.manager.output.terminateZipWorker();
|
||||
|
||||
this.eolSetManually = false;
|
||||
this.manager.output.eolSetManually = false;
|
||||
this.eolState = 0;
|
||||
this.encodingState = 0;
|
||||
this.manager.output.eolState = 0;
|
||||
this.manager.output.encodingState = 0;
|
||||
|
||||
this.initEditor();
|
||||
this.manager.output.initEditor();
|
||||
|
||||
const tabsList = document.getElementById("input-tabs");
|
||||
const tabsListChildren = tabsList.children;
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
import Utils, {debounce} from "../../core/Utils.mjs";
|
||||
import Dish from "../../core/Dish.mjs";
|
||||
import {isUTF8, CHR_ENC_SIMPLE_REVERSE_LOOKUP} from "../../core/lib/ChrEnc.mjs";
|
||||
import {detectFileType} from "../../core/lib/FileType.mjs";
|
||||
import FileSaver from "file-saver";
|
||||
import ZipWorker from "worker-loader?inline=no-fallback!../workers/ZipWorker.mjs";
|
||||
|
@ -70,7 +71,8 @@ class OutputWaiter {
|
|||
this.zipWorker = null;
|
||||
this.maxTabs = this.manager.tabs.calcMaxTabs();
|
||||
this.tabTimeout = null;
|
||||
this.eolSetManually = false;
|
||||
this.eolState = 0; // 0 = unset, 1 = detected, 2 = manual
|
||||
this.encodingState = 0; // 0 = unset, 1 = detected, 2 = manual
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -110,6 +112,8 @@ class OutputWaiter {
|
|||
eolHandler: this.eolChange.bind(this),
|
||||
chrEncHandler: this.chrEncChange.bind(this),
|
||||
chrEncGetter: this.getChrEnc.bind(this),
|
||||
getEncodingState: this.getEncodingState.bind(this),
|
||||
getEOLState: this.getEOLState.bind(this),
|
||||
htmlOutput: this.htmlOutput
|
||||
}),
|
||||
htmlPlugin(this.htmlOutput),
|
||||
|
@ -138,6 +142,7 @@ class OutputWaiter {
|
|||
]
|
||||
});
|
||||
|
||||
if (this.outputEditorView) this.outputEditorView.destroy();
|
||||
this.outputEditorView = new EditorView({
|
||||
state: initialState,
|
||||
parent: this.outputTextEl
|
||||
|
@ -148,30 +153,18 @@ class OutputWaiter {
|
|||
* Handler for EOL change events
|
||||
* Sets the line separator
|
||||
* @param {string} eol
|
||||
* @param {boolean} manual - a flag for whether this was set by the user or automatically
|
||||
* @param {boolean} [manual=false]
|
||||
*/
|
||||
async eolChange(eol, manual=false) {
|
||||
const eolVal = eolCodeToSeq[eol];
|
||||
if (eolVal === undefined) return;
|
||||
|
||||
const eolBtn = document.querySelector("#output-text .eol-value");
|
||||
if (manual) {
|
||||
this.eolSetManually = true;
|
||||
eolBtn.classList.remove("font-italic");
|
||||
} else {
|
||||
eolBtn.classList.add("font-italic");
|
||||
}
|
||||
this.eolState = manual ? 2 : this.eolState;
|
||||
if (this.eolState < 2 && eolVal === this.getEOLSeq()) return;
|
||||
|
||||
if (eolVal === this.getEOLSeq()) return;
|
||||
|
||||
if (!manual) {
|
||||
// Pulse
|
||||
eolBtn.classList.add("pulse");
|
||||
setTimeout(() => {
|
||||
eolBtn.classList.remove("pulse");
|
||||
}, 2000);
|
||||
if (this.eolState === 1) {
|
||||
// Alert
|
||||
this.app.alert(`Output EOL separator has been changed to ${eolCodeToName[eol]}`, 5000);
|
||||
this.app.alert(`Output end of line separator has been detected and changed to ${eolCodeToName[eol]}`, 5000);
|
||||
}
|
||||
|
||||
const currentTabNum = this.manager.tabs.getActiveTab("output");
|
||||
|
@ -205,13 +198,23 @@ class OutputWaiter {
|
|||
return this.outputs[currentTabNum].eolSequence;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the output EOL sequence was set manually or has been detected automatically
|
||||
* @returns {number} - 0 = unset, 1 = detected, 2 = manual
|
||||
*/
|
||||
getEOLState() {
|
||||
return this.eolState;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handler for Chr Enc change events
|
||||
* Sets the output character encoding
|
||||
* @param {number} chrEncVal
|
||||
* @param {boolean} [manual=false]
|
||||
*/
|
||||
async chrEncChange(chrEncVal) {
|
||||
async chrEncChange(chrEncVal, manual=false) {
|
||||
if (typeof chrEncVal !== "number") return;
|
||||
const currentEnc = this.getChrEnc();
|
||||
|
||||
const currentTabNum = this.manager.tabs.getActiveTab("output");
|
||||
if (currentTabNum >= 0) {
|
||||
|
@ -220,10 +223,17 @@ class OutputWaiter {
|
|||
throw new Error(`Cannot change output ${currentTabNum} chrEnc to ${chrEncVal}`);
|
||||
}
|
||||
|
||||
// Reset the output, forcing it to re-decode the data with the new character encoding
|
||||
await this.setOutput(this.currentOutputCache, true);
|
||||
// Update the URL manually since we aren't firing a statechange event
|
||||
this.app.updateURL(true);
|
||||
this.encodingState = manual ? 2 : this.encodingState;
|
||||
|
||||
if (this.encodingState > 1) {
|
||||
// Reset the output, forcing it to re-decode the data with the new character encoding
|
||||
await this.setOutput(this.currentOutputCache, true);
|
||||
// Update the URL manually since we aren't firing a statechange event
|
||||
this.app.updateURL(true);
|
||||
} else if (currentEnc !== chrEncVal) {
|
||||
// Alert
|
||||
this.app.alert(`Output character encoding has been detected and changed to ${CHR_ENC_SIMPLE_REVERSE_LOOKUP[chrEncVal] || "Raw Bytes"}`, 5000);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -238,6 +248,14 @@ class OutputWaiter {
|
|||
return this.outputs[currentTabNum].encoding;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the output character encoding was set manually or has been detected automatically
|
||||
* @returns {number} - 0 = unset, 1 = detected, 2 = manual
|
||||
*/
|
||||
getEncodingState() {
|
||||
return this.encodingState;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets word wrap on the output editor
|
||||
* @param {boolean} wrap
|
||||
|
@ -273,6 +291,7 @@ class OutputWaiter {
|
|||
const tabNum = this.manager.tabs.getActiveTab("output");
|
||||
this.manager.timing.recordTime("outputDecodingStart", tabNum);
|
||||
if (data instanceof ArrayBuffer) {
|
||||
await this.detectEncoding(data);
|
||||
data = await this.bufferToStr(data);
|
||||
}
|
||||
this.manager.timing.recordTime("outputDecodingEnd", tabNum);
|
||||
|
@ -380,7 +399,7 @@ class OutputWaiter {
|
|||
*/
|
||||
detectEOLSequence(data) {
|
||||
// If EOL has been fixed, skip this.
|
||||
if (this.eolSetManually) return;
|
||||
if (this.eolState > 1) return;
|
||||
// If data is too long, skip this.
|
||||
if (data.length > 1000000) return;
|
||||
|
||||
|
@ -402,17 +421,54 @@ class OutputWaiter {
|
|||
}, 0);
|
||||
if (total === 0) return;
|
||||
|
||||
// If CRLF not zero and more than half the highest alternative, choose CRLF
|
||||
// Find most prevalent line ending sequence
|
||||
const highest = Object.entries(eolCharCounts).reduce((acc, curr) => {
|
||||
return curr[1] > acc[1] ? curr : acc;
|
||||
}, ["LF", 0]);
|
||||
let choice = highest[0];
|
||||
|
||||
// If CRLF not zero and more than half the highest alternative, choose CRLF
|
||||
if ((eolCharCounts.CRLF * 2) > highest[1]) {
|
||||
this.eolChange("CRLF");
|
||||
return;
|
||||
choice = "CRLF";
|
||||
}
|
||||
|
||||
// Else choose max
|
||||
this.eolChange(highest[0]);
|
||||
const eolVal = eolCodeToSeq[choice];
|
||||
if (eolVal === this.getEOLSeq()) return;
|
||||
|
||||
// Setting automatically
|
||||
this.eolState = 1;
|
||||
this.eolChange(choice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether the character encoding should be updated.
|
||||
*
|
||||
* @param {ArrayBuffer} data
|
||||
*/
|
||||
async detectEncoding(data) {
|
||||
// If encoding has been fixed, skip this.
|
||||
if (this.encodingState > 1) return;
|
||||
// If data is too long, skip this.
|
||||
if (data.byteLength > 1000000) return;
|
||||
|
||||
const enc = isUTF8(data); // 0 = not UTF8, 1 = ASCII, 2 = UTF8
|
||||
|
||||
switch (enc) {
|
||||
case 0: // not UTF8
|
||||
// Set to Raw Bytes
|
||||
this.encodingState = 1;
|
||||
await this.chrEncChange(0, false);
|
||||
break;
|
||||
case 2: // UTF8
|
||||
// Set to UTF8
|
||||
this.encodingState = 1;
|
||||
await this.chrEncChange(65001, false);
|
||||
break;
|
||||
case 1: // ASCII
|
||||
default:
|
||||
// Ignore
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue