diff --git a/package-lock.json b/package-lock.json index ef2da3f0..28ca1197 100644 --- a/package-lock.json +++ b/package-lock.json @@ -58,6 +58,8 @@ "jsonwebtoken": "8.5.1", "jsqr": "^1.4.0", "jsrsasign": "^11.1.0", + "kaitai-struct": "^0.11.0-SNAPSHOT.3", + "kaitai-struct-compiler": "^0.11.0-SNAPSHOT20250330.110510.aa10f07", "kbpgp": "2.1.15", "libbzip2-wasm": "0.0.4", "libyara-wasm": "^1.2.1", @@ -12562,6 +12564,18 @@ "safe-buffer": "^5.0.1" } }, + "node_modules/kaitai-struct": { + "version": "0.11.0-SNAPSHOT.3", + "resolved": "https://registry.npmjs.org/kaitai-struct/-/kaitai-struct-0.11.0-SNAPSHOT.3.tgz", + "integrity": "sha512-VyqB075FfON7M/ajgEhyTO49+fx40Sj2OaoUX02hISE7JrS8E8k94PubXtpg/E8PVaYcFrQXjtRc448mwgLMIQ==", + "license": "Apache-2.0" + }, + "node_modules/kaitai-struct-compiler": { + "version": "0.11.0-SNAPSHOT20250330.110510.aa10f07", + "resolved": "https://registry.npmjs.org/kaitai-struct-compiler/-/kaitai-struct-compiler-0.11.0-SNAPSHOT20250330.110510.aa10f07.tgz", + "integrity": "sha512-/ab5yiAskn5fEskjlp/JIVSyalvueQL0OpIt9OIchTNMjWUiswtlArTszhhUdAEIM+J98c7jVq8DeM0e6A7tWg==", + "license": "GPL-3.0-or-later" + }, "node_modules/kbpgp": { "version": "2.1.15", "resolved": "https://registry.npmjs.org/kbpgp/-/kbpgp-2.1.15.tgz", diff --git a/package.json b/package.json index b3492a8e..3a9f3998 100644 --- a/package.json +++ b/package.json @@ -144,6 +144,8 @@ "jsonwebtoken": "8.5.1", "jsqr": "^1.4.0", "jsrsasign": "^11.1.0", + "kaitai-struct": "^0.11.0-SNAPSHOT.3", + "kaitai-struct-compiler": "^0.11.0-SNAPSHOT20250330.110510.aa10f07", "kbpgp": "2.1.15", "libbzip2-wasm": "0.0.4", "libyara-wasm": "^1.2.1", diff --git a/src/core/config/Categories.json b/src/core/config/Categories.json index 71b311e6..3bc42496 100644 --- a/src/core/config/Categories.json +++ b/src/core/config/Categories.json @@ -79,7 +79,8 @@ "Rison Decode", "To Modhex", "From Modhex", - "MIME Decoding" + "MIME Decoding", + "Kaitai Struct Decode" ] }, { diff --git a/src/core/operations/KaitaiStructDecode.mjs b/src/core/operations/KaitaiStructDecode.mjs new file mode 100644 index 00000000..43d1a1e9 --- /dev/null +++ b/src/core/operations/KaitaiStructDecode.mjs @@ -0,0 +1,209 @@ +/** + * @author kendallgoto [k@kgo.to] + * @copyright Crown Copyright 2025 + * @license Apache-2.0 + */ + +import Operation from "../Operation.mjs"; +import OperationError from "../errors/OperationError.mjs"; +import KaitaiStructCompiler from "kaitai-struct-compiler"; +import { KaitaiStream } from "kaitai-struct"; +import YAML from "yaml"; + +/** + * Kaitai Struct Decode operation + */ +class KaitaiStructDecode extends Operation { + + /** + * KaitaiStructDecode constructor + */ + constructor() { + super(); + + this.name = "Kaitai Struct Decode"; + this.module = "Kaitai"; + this.description = "Using a Kaitai Struct schema definition, read the provided input binary data into an annotated structure."; + this.infoURL = "https://kaitai.io/"; + this.inputType = "ArrayBuffer"; + this.outputType = "JSON"; + this.presentType = "string"; + this.args = [ + { + name: "Kaitai definition (.ksy)", + type: "text", + value: "seq:\n- id: value\n type: u2" + }, + { + "name": "Ignore errors", + "type": "boolean", + "value": false + }, + ]; + } + + /** + * @param {ArrayBuffer} input + * @param {Object[]} args + * @returns {Object} + */ + async run(input, args) { + const [ksyDef, errorsOk] = args; + let ksyDefObj = {}; + try { + // apply some default headers to simplify what the user has to provide + ksyDefObj = YAML.parse(ksyDef); + ksyDefObj.meta = Object.assign( + { "file-extension": "none", "endian": "le", "bit-endian": "be"}, + ksyDefObj.meta + ); + // ensure id is always 'generated' for deterministic output class / file name + ksyDefObj.meta.id = "generated"; + } catch (err) { + throw new OperationError(err); + } + + let parsed = {}; + try { + const files = await KaitaiStructCompiler.compile("javascript", ksyDefObj, null, true); + const ctx = { + Generated: {}, + KaitaiStream: KaitaiStream + }; + // for dynamic include, modify the wrapper function to store our generated content in a well-defined context object + // eslint-disable-next-line no-eval + eval(files["Generated.js"].replace(/\(root, factory\) {/g, "(_, factory) { return factory(ctx.Generated, ctx.KaitaiStream);")); + parsed = new ctx.Generated.Generated(new KaitaiStream(input)); + parsed._read(); + } catch (err) { + if (!errorsOk) { + throw new OperationError(err); + } + } + + return this.cleanKaitai(parsed.constructor, parsed); + } + + /** + * Given a Kaitai Struct object, clean it up by removing Kaitai internal keys + * while annotating values using the underlying debug data + * + * @param {Object} inp Raw Kaitai Object + * @returns {Object} Cleaned object + */ + cleanKaitai(baseobj, inp, debug=null) { + if (typeof inp !== "object" || !inp) { // Replace primitives with annotated, wrapped objects + let out; + switch (typeof inp) { + case "string": out = new String(inp); break; + case "number": out = new Number(inp); break; + case "boolean": out = new Boolean(inp); break; + } + // values that are assigned to enumerations should receive their enum type and string value as annotations + if (debug && "enumName" in debug) { + let enumParent = baseobj; + const enumPath = debug.enumName.split(".").slice(1); + const enumTypeName = enumPath.pop(); + enumPath.forEach(path => enumParent = enumParent[path]); + out._type = enumTypeName; + out._valstr = enumParent[enumTypeName][out]; + } + out.start = debug.start; + out.end = debug.end; + return out; + } else if (Array.isArray(inp) || ArrayBuffer.isView(inp)) { // Recursively clean arrays of elements + const out = []; + for (let i = 0; i < inp.length; i++) { + let elementDebug = {}; + if ("arr" in debug) { + elementDebug = debug.arr[i]; + } else if (ArrayBuffer.isView(inp)) { + // for ArrayBuffers, Kaitai doesn't add debug arguments since all elements are fixed-size + // instead, we can look at the ArrayBuffer parameters + elementDebug = { + start: debug.start + (i * inp.BYTES_PER_ELEMENT), + end: debug.start + (i * inp.BYTES_PER_ELEMENT) + inp.BYTES_PER_ELEMENT + }; + } + out.push(this.cleanKaitai(baseobj, inp[i], elementDebug)); + } + Object.defineProperty(out, "start", { + value: debug.start, + enumerable: false + }); + Object.defineProperty(out, "end", { + value: debug.end, + enumerable: false + }); + return out; + } else { // Recursively clean each key in objects + const out = {}; + Object.defineProperty(out, "_type", { + value: inp.constructor.name, + enumerable: false + }); + if (debug) { + Object.defineProperty(out, "start", { + value: debug.start, + enumerable: false + }); + Object.defineProperty(out, "end", { + value: debug.end, + enumerable: false + }); + } + for (const [key, value] of Object.entries(inp)) { + // debug structure contains all real keys; ignoring Kaitai internal objects or type parametrization values + if (!(key in inp._debug)) continue; + out[key] = this.cleanKaitai(baseobj, value, inp._debug[key]); + } + return out; + } + } + + /** + * Given a Kaitai Struct object, walk the structure to provide printout with type annotations + * + * @param {Object} inp Raw Kaitai Object + * @param {Number} indent Current depth in printout for prefixed whitespace + * @returns {string} Formatted printout text + */ + printKaitai(inp, indent=0) { + if (typeof inp !== "object") { + return ""; + } else { + let out = ""; + for (const [key, value] of Object.entries(inp)) { + if (value.toString() !== "[object Object]" && !Array.isArray(value)) { + if ("_valstr" in value) + out += `${"\t".repeat(indent)}${key}[${value.start}:${value.end ?? ""}]: ${value._valstr} (${value.valueOf()})\n`; + else + out += `${"\t".repeat(indent)}${key}[${value.start}:${value.end ?? ""}]: ${value.valueOf()}\n`; + } else { + if ("_type" in value) + out += `${"\t".repeat(indent)}${key}[${value.start}:${value.end ?? ""}]: [${value._type}]\n`; + else if ("start" in value) + out += `${"\t".repeat(indent)}${key}[${value.start}:${value.end ?? ""}]:\n`; + else + out += `${"\t".repeat(indent)}${key}:\n`; + out += this.printKaitai(value, indent+1); + } + } + return out; + } + } + + /** + * Creates an annotated tree of a Kaitai object by walking the structure and expanding debug + * annotations including type hints, binary offsets, and enum strings + * + * @param {Object} o Kaitai result object with debug annotations applied + * @returns {string} Annotated tree of the Kaitai structure + */ + present(o) { + return this.printKaitai(o, 0); + } + +} + +export default KaitaiStructDecode; diff --git a/tests/operations/index.mjs b/tests/operations/index.mjs index bb7016bb..9b4c846c 100644 --- a/tests/operations/index.mjs +++ b/tests/operations/index.mjs @@ -97,6 +97,7 @@ import "./tests/JWK.mjs"; import "./tests/JWTDecode.mjs"; import "./tests/JWTSign.mjs"; import "./tests/JWTVerify.mjs"; +import "./tests/KaitaiStructDecode.mjs"; import "./tests/LevenshteinDistance.mjs"; import "./tests/Lorenz.mjs"; import "./tests/LS47.mjs"; diff --git a/tests/operations/tests/KaitaiStructDecode.mjs b/tests/operations/tests/KaitaiStructDecode.mjs new file mode 100644 index 00000000..74966c7a --- /dev/null +++ b/tests/operations/tests/KaitaiStructDecode.mjs @@ -0,0 +1,66 @@ +/** + * @author kendallgoto [k@kgo.to] + * @copyright Crown Copyright 2025 + * @license Apache-2.0 + */ + +import TestRegister from "../../lib/TestRegister.mjs"; +TestRegister.addTests([ + { + "name": "Kaitai Struct Decode: Gif Decode", + "input": "R0lGODdhIAA0APABAP", + "expectedOutput": "[71,73,70]", + "recipeConfig": [ + { + "op": "From Base64", + "args": ["A-Za-z0-9+/=", true] + }, + { + "op": "Kaitai Struct Decode", + "args": [ + // https://kaitai.io/#quick-start + "meta:\n id: gif\n file-extension: gif\n endian: le\nseq:\n - id: header\n type: header\n - id: logical_screen\n type: logical_screen\ntypes:\n header:\n seq:\n - id: magic\n contents: 'GIF'\n - id: version\n size: 3\n logical_screen:\n seq:\n - id: image_width\n type: u2\n - id: image_height\n type: u2\n - id: flags\n type: u1\n - id: bg_color_index\n type: u1\n - id: pixel_aspect_ratio\n type: u1", + ], + }, + { + "op": "Jq", + "args": [ + ".header.magic", + ], + }, + ], + }, + { + "name": "Kaitai Struct Decode: Incomplete Error", + "input": "", + "expectedOutput": "EOFError: requested 1 bytes, but only 0 bytes available", + "recipeConfig": [ + { + "op": "Kaitai Struct Decode", + "args": [ + "seq:\n- id: entry\n type: u1\n repeat: expr\n repeat-expr: 10", // read 10 uint8s, one by one + ], + } + ], + }, + { + "name": "Kaitai Struct Decode: Incomplete Error (ignored)", + "input": "\x00\x01\x02\x03\x04", + "expectedOutput": "[0,1,2,3,4]", + "recipeConfig": [ + { + "op": "Kaitai Struct Decode", + "args": [ + "seq:\n- id: entry\n type: u1\n repeat: expr\n repeat-expr: 10", // read 10 uint8s, one by one + true + ], + }, + { + "op": "Jq", + "args": [ + ".entry", + ], + }, + ], + } +]);