Added 'Extract Files' operation and 'Forensics' category.

2025-06-17 03:35:07 -04:00 · 2018-12-14 16:43:03 +00:00 · 2018-12-14 16:43:03 +00:00 · 6aa9d2b492
commit 6aa9d2b492
parent 15fbe5a459
5 changed files with 572 additions and 74 deletions
--- a/src/core/config/Categories.json
+++ b/src/core/config/Categories.json
@ -245,7 +245,8 @@
            "XPath expression",
            "JPath expression",
            "CSS selector",
-            "Extract EXIF"
+            "Extract EXIF",
+            "Extract Files"
        ]
    },
    {
@ -336,14 +337,23 @@
            "From MessagePack"
        ]
    },
+    {
+        "name": "Forensics",
+        "ops": [
+            "Detect File Type",
+            "Scan for Embedded Files",
+            "Extract Files",
+            "Remove EXIF",
+            "Extract EXIF",
+            "Render Image"
+        ]
+    },
    {
        "name": "Other",
        "ops": [
            "Entropy",
            "Frequency distribution",
            "Chi Square",
-            "Detect File Type",
-            "Scan for Embedded Files",
            "Disassemble x86",
            "Pseudo-Random Number Generator",
            "Generate UUID",
@ -351,8 +361,6 @@
            "Generate HOTP",
            "Haversine distance",
            "Render Image",
-            "Remove EXIF",
-            "Extract EXIF",
            "Numberwang",
            "XKCD Random Number"
        ]
--- a/src/core/lib/FileExtraction.mjs
+++ b/src/core/lib/FileExtraction.mjs
@ -0,0 +1,231 @@
+/**
+ * File extraction functions
+ *
+ * @author n1474335 [n1474335@gmail.com]
+ * @copyright Crown Copyright 2018
+ * @license Apache-2.0
+ *
+ */
+import Stream from "./Stream";
+
+/**
+ * Attempts to extract a file from a data stream given its mime type and offset.
+ *
+ * @param {Uint8Array} bytes
+ * @param {Object} fileDetail
+ * @param {string} fileDetail.mime
+ * @param {string} fileDetail.ext
+ * @param {number} fileDetail.offset
+ * @returns {File}
+ */
+export function extractFile(bytes, fileDetail) {
+    let fileData;
+    switch (fileDetail.mime) {
+        case "image/jpeg":
+            fileData = extractJPEG(bytes, fileDetail.offset);
+            break;
+        case "application/x-msdownload":
+            fileData = extractMZPE(bytes, fileDetail.offset);
+            break;
+        case "application/pdf":
+            fileData = extractPDF(bytes, fileDetail.offset);
+            break;
+        case "application/zip":
+            fileData = extractZIP(bytes, fileDetail.offset);
+            break;
+        default:
+            throw new Error(`No extraction algorithm available for "${fileDetail.mime}" files`);
+    }
+
+    return new File([fileData], `extracted_at_0x${fileDetail.offset.toString(16)}.${fileDetail.ext}`);
+}
+
+
+/**
+ * JPEG extractor.
+ *
+ * @param {Uint8Array} bytes
+ * @param {number} offset
+ * @returns {Uint8Array}
+ */
+export function extractJPEG(bytes, offset) {
+    const stream = new Stream(bytes.slice(offset));
+
+    while (stream.hasMore()) {
+        const marker = stream.getBytes(2);
+        if (marker[0] !== 0xff) throw new Error("Invalid JPEG marker: " + marker);
+
+        let segmentSize = 0;
+        switch (marker[1]) {
+            // No length
+            case 0xd8: // Start of Image
+            case 0x01: // For temporary use in arithmetic coding
+                break;
+            case 0xd9: // End found
+                return stream.carve();
+
+            // Variable size segment
+            case 0xc0: // Start of frame (Baseline DCT)
+            case 0xc1: // Start of frame (Extended sequential DCT)
+            case 0xc2: // Start of frame (Progressive DCT)
+            case 0xc3: // Start of frame (Lossless sequential)
+            case 0xc4: // Define Huffman Table
+            case 0xc5: // Start of frame (Differential sequential DCT)
+            case 0xc6: // Start of frame (Differential progressive DCT)
+            case 0xc7: // Start of frame (Differential lossless)
+            case 0xc8: // Reserved for JPEG extensions
+            case 0xc9: // Start of frame (Extended sequential DCT)
+            case 0xca: // Start of frame (Progressive DCT)
+            case 0xcb: // Start of frame (Lossless sequential)
+            case 0xcc: // Define arithmetic conditioning table
+            case 0xcd: // Start of frame (Differential sequential DCT)
+            case 0xce: // Start of frame (Differential progressive DCT)
+            case 0xcf: // Start of frame (Differential lossless)
+            case 0xdb: // Define Quantization Table
+            case 0xde: // Define hierarchical progression
+            case 0xe0: // Application-specific
+            case 0xe1: // Application-specific
+            case 0xe2: // Application-specific
+            case 0xe3: // Application-specific
+            case 0xe4: // Application-specific
+            case 0xe5: // Application-specific
+            case 0xe6: // Application-specific
+            case 0xe7: // Application-specific
+            case 0xe8: // Application-specific
+            case 0xe9: // Application-specific
+            case 0xea: // Application-specific
+            case 0xeb: // Application-specific
+            case 0xec: // Application-specific
+            case 0xed: // Application-specific
+            case 0xee: // Application-specific
+            case 0xef: // Application-specific
+            case 0xfe: // Comment
+                segmentSize = stream.readInt(2, "be");
+                stream.position += segmentSize - 2;
+                break;
+
+            // 1 byte
+            case 0xdf: // Expand reference image
+                stream.position++;
+                break;
+
+            // 2 bytes
+            case 0xdc: // Define number of lines
+            case 0xdd: // Define restart interval
+                stream.position += 2;
+                break;
+
+            // Start scan
+            case 0xda: // Start of scan
+                segmentSize = stream.readInt(2, "be");
+                stream.position += segmentSize - 2;
+                stream.continueUntil(0xff);
+                break;
+
+            // Continue through encoded data
+            case 0x00: // Byte stuffing
+            case 0xd0: // Restart
+            case 0xd1: // Restart
+            case 0xd2: // Restart
+            case 0xd3: // Restart
+            case 0xd4: // Restart
+            case 0xd5: // Restart
+            case 0xd6: // Restart
+            case 0xd7: // Restart
+                stream.continueUntil(0xff);
+                break;
+
+            default:
+                stream.continueUntil(0xff);
+                break;
+        }
+    }
+
+    throw new Error("Unable to parse JPEG successfully");
+}
+
+
+/**
+ * Portable executable extractor.
+ * Assumes that the offset refers to an MZ header.
+ *
+ * @param {Uint8Array} bytes
+ * @param {number} offset
+ * @returns {Uint8Array}
+ */
+export function extractMZPE(bytes, offset) {
+    const stream = new Stream(bytes.slice(offset));
+
+    // Move to PE header pointer
+    stream.moveTo(0x3c);
+    const peAddress = stream.readInt(4, "le");
+
+    // Move to PE header
+    stream.moveTo(peAddress);
+
+    // Get number of sections
+    stream.moveForwardsBy(6);
+    const numSections = stream.readInt(2, "le");
+
+    // Get optional header size
+    stream.moveForwardsBy(12);
+    const optionalHeaderSize = stream.readInt(2, "le");
+
+    // Move past optional header to section header
+    stream.moveForwardsBy(2 + optionalHeaderSize);
+
+    // Move to final section header
+    stream.moveForwardsBy((numSections - 1) * 0x28);
+
+    // Get raw data info
+    stream.moveForwardsBy(16);
+    const rawDataSize = stream.readInt(4, "le");
+    const rawDataAddress = stream.readInt(4, "le");
+
+    // Move to end of final section
+    stream.moveTo(rawDataAddress + rawDataSize);
+
+    return stream.carve();
+}
+
+
+/**
+ * PDF extractor.
+ *
+ * @param {Uint8Array} bytes
+ * @param {number} offset
+ * @returns {Uint8Array}
+ */
+export function extractPDF(bytes, offset) {
+    const stream = new Stream(bytes.slice(offset));
+
+    // Find end-of-file marker (%%EOF)
+    stream.continueUntil([0x25, 0x25, 0x45, 0x4f, 0x46]);
+    stream.moveForwardsBy(5);
+    stream.consumeIf(0x0d);
+    stream.consumeIf(0x0a);
+
+    return stream.carve();
+}
+
+
+/**
+ * ZIP extractor.
+ *
+ * @param {Uint8Array} bytes
+ * @param {number} offset
+ * @returns {Uint8Array}
+ */
+export function extractZIP(bytes, offset) {
+    const stream = new Stream(bytes.slice(offset));
+
+    // Find End of central directory record
+    stream.continueUntil([0x50, 0x4b, 0x05, 0x06]);
+
+    // Get comment length and consume
+    stream.moveForwardsBy(20);
+    const commentLength = stream.readInt(2, "le");
+    stream.moveForwardsBy(commentLength);
+
+    return stream.carve();
+}
--- a/src/core/lib/Stream.mjs
+++ b/src/core/lib/Stream.mjs
@ -0,0 +1,164 @@
+/**
+ * Stream class for parsing binary protocols.
+ *
+ * @author n1474335 [n1474335@gmail.com]
+ * @author tlwr [toby@toby.codes]
+ * @copyright Crown Copyright 2018
+ * @license Apache-2.0
+ *
+ */
+
+/**
+ * A Stream can be used to traverse a binary blob, interpreting sections of it
+ * as various data types.
+ *
+ * @param {Uint8Array} bytes
+ * @param {Object} fileDetail
+ * @param {string} fileDetail.mime
+ * @param {string} fileDetail.ext
+ * @param {number} fileDetail.offset
+ * @returns {File}
+ */
+export default class Stream {
+
+    /**
+     * Stream constructor.
+     *
+     * @param {Uint8Array} input
+     */
+    constructor(input) {
+        this.bytes = input;
+        this.position = 0;
+    }
+
+    /**
+     * Get a number of bytes from the current position.
+     *
+     * @param {number} numBytes
+     * @returns {Uint8Array}
+     */
+    getBytes(numBytes) {
+        const newPosition = this.position + numBytes;
+        const bytes = this.bytes.slice(this.position, newPosition);
+        this.position = newPosition;
+        return bytes;
+    }
+
+    /**
+     * Interpret the following bytes as a string, stopping at the next null byte or
+     * the supplied limit.
+     *
+     * @param {number} numBytes
+     * @returns {string}
+     */
+    readString(numBytes) {
+        let result = "";
+        for (let i = this.position; i < this.position + numBytes; i++) {
+            const currentByte = this.bytes[i];
+            if (currentByte === 0) break;
+            result += String.fromCharCode(currentByte);
+        }
+        this.position += numBytes;
+        return result;
+    }
+
+    /**
+     * Interpret the following bytes as an integer in big or little endian.
+     *
+     * @param {number} numBytes
+     * @param {string} [endianness="be"]
+     * @returns {number}
+     */
+    readInt(numBytes, endianness="be") {
+        let val = 0;
+        if (endianness === "be") {
+            for (let i = this.position; i < this.position + numBytes; i++) {
+                val = val << 8;
+                val |= this.bytes[i];
+            }
+        } else {
+            for (let i = this.position + numBytes - 1; i >= this.position; i--) {
+                val = val << 8;
+                val |= this.bytes[i];
+            }
+        }
+        this.position += numBytes;
+        return val;
+    }
+
+    /**
+     * Consume the stream until we reach the specified byte or sequence of bytes.
+     *
+     * @param {number|List<number>} val
+     */
+    continueUntil(val) {
+        if (typeof val === "number") {
+            while (++this.position < this.bytes.length && this.bytes[this.position] !== val) {
+                continue;
+            }
+            return;
+        }
+
+        // val is an array
+        let found = false;
+        while (!found && this.position < this.bytes.length) {
+            while (++this.position < this.bytes.length && this.bytes[this.position] !== val[0]) {
+                continue;
+            }
+            found = true;
+            for (let i = 1; i < val.length; i++) {
+                if (this.position + i > this.bytes.length || this.bytes[this.position + i] !== val[i])
+                    found = false;
+            }
+        }
+    }
+
+    /**
+     * Consume the next byte if it matches the supplied value.
+     *
+     * @param {number} val
+     */
+    consumeIf(val) {
+        if (this.bytes[this.position] === val)
+            this.position++;
+    }
+
+    /**
+     * Move forwards through the stream by the specified number of bytes.
+     *
+     * @param {number} numBytes
+     */
+    moveForwardsBy(numBytes) {
+        this.position += numBytes;
+    }
+
+    /**
+     * Move to a specified position in the stream.
+     *
+     * @param {number} pos
+     */
+    moveTo(pos) {
+        if (pos < 0 || pos > this.bytes.length - 1)
+            throw new Error("Cannot move to position " + pos + " in stream. Out of bounds.");
+        this.position = pos;
+    }
+
+    /**
+     * Returns true if there are more bytes left in the stream.
+     *
+     * @returns {boolean}
+     */
+    hasMore() {
+        return this.position < this.bytes.length;
+    }
+
+    /**
+     * Returns a slice of the stream up to the current position.
+     *
+     * @returns {Uint8Array}
+     */
+    carve() {
+        return this.bytes.slice(0, this.position);
+    }
+
+}
--- a/src/core/operations/ExtractFiles.mjs
+++ b/src/core/operations/ExtractFiles.mjs
@ -0,0 +1,91 @@
+/**
+ * @author n1474335 [n1474335@gmail.com]
+ * @copyright Crown Copyright 2018
+ * @license Apache-2.0
+ */
+
+import Operation from "../Operation";
+// import OperationError from "../errors/OperationError";
+import Magic from "../lib/Magic";
+import Utils from "../Utils";
+import {extractFile} from "../lib/FileExtraction";
+
+/**
+ * Extract Files operation
+ */
+class ExtractFiles extends Operation {
+
+    /**
+     * ExtractFiles constructor
+     */
+    constructor() {
+        super();
+
+        this.name = "Extract Files";
+        this.module = "Default";
+        this.description = "TODO";
+        this.infoURL = "https://forensicswiki.org/wiki/File_Carving";
+        this.inputType = "ArrayBuffer";
+        this.outputType = "List<File>";
+        this.presentType = "html";
+        this.args = [];
+    }
+
+    /**
+     * @param {ArrayBuffer} input
+     * @param {Object[]} args
+     * @returns {List<File>}
+     */
+    run(input, args) {
+        const bytes = new Uint8Array(input);
+
+        // Scan for embedded files
+        const fileDetails = scanForEmbeddedFiles(bytes);
+
+        // Extract each file that we support
+        const files = [];
+        fileDetails.forEach(fileDetail => {
+            try {
+                files.push(extractFile(bytes, fileDetail));
+            } catch (err) {}
+        });
+
+        return files;
+    }
+
+    /**
+     * Displays the files in HTML for web apps.
+     *
+     * @param {File[]} files
+     * @returns {html}
+     */
+    async present(files) {
+        return await Utils.displayFilesAsHTML(files);
+    }
+
+}
+
+/**
+ * TODO refactor
+ * @param data
+ */
+function scanForEmbeddedFiles(data) {
+    let type;
+    const types = [];
+
+    for (let i = 0; i < data.length; i++) {
+        type = Magic.magicFileType(data.slice(i));
+        if (type) {
+            types.push({
+                offset: i,
+                ext: type.ext,
+                mime: type.mime,
+                desc: type.desc
+            });
+        }
+    }
+
+    return types;
+}
+
+export default ExtractFiles;