Input and output character encodings can now be set

This commit is contained in:
n1474335 2022-09-02 12:56:04 +01:00
parent 7c8a185a3d
commit e93aa42697
15 changed files with 482 additions and 423 deletions

View file

@ -68,16 +68,10 @@ class Chef {
// Present the raw result
await recipe.present(this.dish);
// Depending on the size of the output, we may send it back as a string or an ArrayBuffer.
// This can prevent unnecessary casting as an ArrayBuffer can be easily downloaded as a file.
// The threshold is specified in KiB.
const threshold = (options.ioDisplayThreshold || 1024) * 1024;
const returnType =
this.dish.type === Dish.HTML ?
Dish.HTML :
this.dish.size > threshold ?
Dish.ARRAY_BUFFER :
Dish.STRING;
Dish.ARRAY_BUFFER;
return {
dish: rawDish,

View file

@ -101,14 +101,17 @@ async function bake(data) {
// Ensure the relevant modules are loaded
self.loadRequiredModules(data.recipeConfig);
try {
self.inputNum = (data.inputNum !== undefined) ? data.inputNum : -1;
self.inputNum = data.inputNum === undefined ? -1 : data.inputNum;
const response = await self.chef.bake(
data.input, // The user's input
data.recipeConfig, // The configuration of the recipe
data.options // Options set by the user
);
const transferable = (data.input instanceof ArrayBuffer) ? [data.input] : undefined;
const transferable = (response.dish.value instanceof ArrayBuffer) ?
[response.dish.value] :
undefined;
self.postMessage({
action: "bakeComplete",
data: Object.assign(response, {

View file

@ -406,6 +406,7 @@ class Utils {
* Utils.strToArrayBuffer("你好");
*/
static strToArrayBuffer(str) {
log.debug("Converting string to array buffer");
const arr = new Uint8Array(str.length);
let i = str.length, b;
while (i--) {
@ -432,6 +433,7 @@ class Utils {
* Utils.strToUtf8ArrayBuffer("你好");
*/
static strToUtf8ArrayBuffer(str) {
log.debug("Converting string to UTF8 array buffer");
const utf8Str = utf8.encode(str);
if (str.length !== utf8Str.length) {
@ -461,6 +463,7 @@ class Utils {
* Utils.strToByteArray("你好");
*/
static strToByteArray(str) {
log.debug("Converting string to byte array");
const byteArray = new Array(str.length);
let i = str.length, b;
while (i--) {
@ -487,6 +490,7 @@ class Utils {
* Utils.strToUtf8ByteArray("你好");
*/
static strToUtf8ByteArray(str) {
log.debug("Converting string to UTF8 byte array");
const utf8Str = utf8.encode(str);
if (str.length !== utf8Str.length) {
@ -515,6 +519,7 @@ class Utils {
* Utils.strToCharcode("你好");
*/
static strToCharcode(str) {
log.debug("Converting string to charcode");
const charcode = [];
for (let i = 0; i < str.length; i++) {
@ -549,6 +554,7 @@ class Utils {
* Utils.byteArrayToUtf8([228,189,160,229,165,189]);
*/
static byteArrayToUtf8(byteArray) {
log.debug("Converting byte array to UTF8");
const str = Utils.byteArrayToChars(byteArray);
try {
const utf8Str = utf8.decode(str);
@ -581,6 +587,7 @@ class Utils {
* Utils.byteArrayToChars([20320,22909]);
*/
static byteArrayToChars(byteArray) {
log.debug("Converting byte array to chars");
if (!byteArray) return "";
let str = "";
// String concatenation appears to be faster than an array join
@ -603,6 +610,7 @@ class Utils {
* Utils.arrayBufferToStr(Uint8Array.from([104,101,108,108,111]).buffer);
*/
static arrayBufferToStr(arrayBuffer, utf8=true) {
log.debug("Converting array buffer to str");
const arr = new Uint8Array(arrayBuffer);
return utf8 ? Utils.byteArrayToUtf8(arr) : Utils.byteArrayToChars(arr);
}

View file

@ -9,7 +9,7 @@
/**
* Character encoding format mappings.
*/
export const IO_FORMAT = {
export const CHR_ENC_CODE_PAGES = {
"UTF-8 (65001)": 65001,
"UTF-7 (65000)": 65000,
"UTF-16LE (1200)": 1200,
@ -164,6 +164,17 @@ export const IO_FORMAT = {
"Simplified Chinese GB18030 (54936)": 54936,
};
export const CHR_ENC_SIMPLE_LOOKUP = {};
export const CHR_ENC_SIMPLE_REVERSE_LOOKUP = {};
for (const name in CHR_ENC_CODE_PAGES) {
const simpleName = name.match(/(^.+)\([\d/]+\)$/)[1];
CHR_ENC_SIMPLE_LOOKUP[simpleName] = CHR_ENC_CODE_PAGES[name];
CHR_ENC_SIMPLE_REVERSE_LOOKUP[CHR_ENC_CODE_PAGES[name]] = simpleName;
}
/**
* Unicode Normalisation Forms
*

View file

@ -6,7 +6,7 @@
import Operation from "../Operation.mjs";
import cptable from "codepage";
import {IO_FORMAT} from "../lib/ChrEnc.mjs";
import {CHR_ENC_CODE_PAGES} from "../lib/ChrEnc.mjs";
/**
* Decode text operation
@ -26,7 +26,7 @@ class DecodeText extends Operation {
"<br><br>",
"Supported charsets are:",
"<ul>",
Object.keys(IO_FORMAT).map(e => `<li>${e}</li>`).join("\n"),
Object.keys(CHR_ENC_CODE_PAGES).map(e => `<li>${e}</li>`).join("\n"),
"</ul>",
].join("\n");
this.infoURL = "https://wikipedia.org/wiki/Character_encoding";
@ -36,7 +36,7 @@ class DecodeText extends Operation {
{
"name": "Encoding",
"type": "option",
"value": Object.keys(IO_FORMAT)
"value": Object.keys(CHR_ENC_CODE_PAGES)
}
];
}
@ -47,7 +47,7 @@ class DecodeText extends Operation {
* @returns {string}
*/
run(input, args) {
const format = IO_FORMAT[args[0]];
const format = CHR_ENC_CODE_PAGES[args[0]];
return cptable.utils.decode(format, new Uint8Array(input));
}

View file

@ -6,7 +6,7 @@
import Operation from "../Operation.mjs";
import cptable from "codepage";
import {IO_FORMAT} from "../lib/ChrEnc.mjs";
import {CHR_ENC_CODE_PAGES} from "../lib/ChrEnc.mjs";
/**
* Encode text operation
@ -26,7 +26,7 @@ class EncodeText extends Operation {
"<br><br>",
"Supported charsets are:",
"<ul>",
Object.keys(IO_FORMAT).map(e => `<li>${e}</li>`).join("\n"),
Object.keys(CHR_ENC_CODE_PAGES).map(e => `<li>${e}</li>`).join("\n"),
"</ul>",
].join("\n");
this.infoURL = "https://wikipedia.org/wiki/Character_encoding";
@ -36,7 +36,7 @@ class EncodeText extends Operation {
{
"name": "Encoding",
"type": "option",
"value": Object.keys(IO_FORMAT)
"value": Object.keys(CHR_ENC_CODE_PAGES)
}
];
}
@ -47,7 +47,7 @@ class EncodeText extends Operation {
* @returns {ArrayBuffer}
*/
run(input, args) {
const format = IO_FORMAT[args[0]];
const format = CHR_ENC_CODE_PAGES[args[0]];
const encoded = cptable.utils.encode(format, input);
return new Uint8Array(encoded).buffer;
}

View file

@ -8,7 +8,7 @@
import Operation from "../Operation.mjs";
import Utils from "../Utils.mjs";
import cptable from "codepage";
import {IO_FORMAT} from "../lib/ChrEnc.mjs";
import {CHR_ENC_CODE_PAGES} from "../lib/ChrEnc.mjs";
/**
* Text Encoding Brute Force operation
@ -28,7 +28,7 @@ class TextEncodingBruteForce extends Operation {
"<br><br>",
"Supported charsets are:",
"<ul>",
Object.keys(IO_FORMAT).map(e => `<li>${e}</li>`).join("\n"),
Object.keys(CHR_ENC_CODE_PAGES).map(e => `<li>${e}</li>`).join("\n"),
"</ul>"
].join("\n");
this.infoURL = "https://wikipedia.org/wiki/Character_encoding";
@ -51,15 +51,15 @@ class TextEncodingBruteForce extends Operation {
*/
run(input, args) {
const output = {},
charsets = Object.keys(IO_FORMAT),
charsets = Object.keys(CHR_ENC_CODE_PAGES),
mode = args[0];
charsets.forEach(charset => {
try {
if (mode === "Decode") {
output[charset] = cptable.utils.decode(IO_FORMAT[charset], input);
output[charset] = cptable.utils.decode(CHR_ENC_CODE_PAGES[charset], input);
} else {
output[charset] = Utils.arrayBufferToStr(cptable.utils.encode(IO_FORMAT[charset], input));
output[charset] = Utils.arrayBufferToStr(cptable.utils.encode(CHR_ENC_CODE_PAGES[charset], input));
}
} catch (err) {
output[charset] = "Could not decode.";