Merge branch 'feature/bombe' into feature/typex

2025-07-05 12:22:21 -04:00 · 2019-03-12 18:21:34 +00:00 · 2019-03-12 18:21:34 +00:00 · 1a707eab86
commit 1a707eab86
parent 9a0b784153 e2efc3e8e8
52 changed files with 4257 additions and 750 deletions
--- a/src/core/lib/BCD.mjs
+++ b/src/core/lib/BCD.mjs
--- a/src/core/lib/Base58.mjs
+++ b/src/core/lib/Base58.mjs
--- a/src/core/lib/Base64.mjs
+++ b/src/core/lib/Base64.mjs
--- a/src/core/lib/CanvasComponents.mjs
+++ b/src/core/lib/CanvasComponents.mjs
--- a/src/core/lib/FileSignatures.mjs
+++ b/src/core/lib/FileSignatures.mjs
--- a/src/core/lib/FileType.mjs
+++ b/src/core/lib/FileType.mjs
@ -0,0 +1,263 @@
+/**
+ * File type functions
+ *
+ * @author n1474335 [n1474335@gmail.com]
+ * @copyright Crown Copyright 2018
+ * @license Apache-2.0
+ *
+ */
+import {FILE_SIGNATURES} from "./FileSignatures";
+import {sendStatusMessage} from "../Utils";
+
+
+/**
+ * Checks whether a signature matches a buffer.
+ *
+ * @param {Object|Object[]} sig - A dictionary of offsets with values assigned to them.
+ *   These values can be numbers for static checks, arrays of potential valid matches,
+ *   or bespoke functions to check the validity of the buffer value at that offset.
+ * @param {Uint8Array} buf
+ * @param {number} [offset=0] Where in the buffer to start searching from
+ * @returns {boolean}
+ */
+function signatureMatches(sig, buf, offset=0) {
+    // Using a length check seems to be more performant than `sig instanceof Array`
+    if (sig.length) {
+        // sig is an Array - return true if any of them match
+        // The following `reduce` method is nice, but performance matters here, so we
+        // opt for a faster, if less elegant, for loop.
+        // return sig.reduce((acc, s) => acc || bytesMatch(s, buf, offset), false);
+        for (let i = 0; i < sig.length; i++) {
+            if (bytesMatch(sig[i], buf, offset)) return true;
+        }
+        return false;
+    } else {
+        return bytesMatch(sig, buf, offset);
+    }
+}
+
+
+/**
+ * Checks whether a set of bytes match the given buffer.
+ *
+ * @param {Object} sig - A dictionary of offsets with values assigned to them.
+ *   These values can be numbers for static checks, arrays of potential valid matches,
+ *   or bespoke functions to check the validity of the buffer value at that offset.
+ * @param {Uint8Array} buf
+ * @param {number} [offset=0] Where in the buffer to start searching from
+ * @returns {boolean}
+ */
+function bytesMatch(sig, buf, offset=0) {
+    for (const sigoffset in sig) {
+        const pos = parseInt(sigoffset, 10) + offset;
+        switch (typeof sig[sigoffset]) {
+            case "number": // Static check
+                if (buf[pos] !== sig[sigoffset])
+                    return false;
+                break;
+            case "object": // Array of options
+                if (sig[sigoffset].indexOf(buf[pos]) < 0)
+                    return false;
+                break;
+            case "function": // More complex calculation
+                if (!sig[sigoffset](buf[pos]))
+                    return false;
+                break;
+            default:
+                throw new Error(`Unrecognised signature type at offset ${sigoffset}`);
+        }
+    }
+    return true;
+}
+
+
+/**
+ * Given a buffer, detects magic byte sequences at specific positions and returns the
+ * extension and mime type.
+ *
+ * @param {Uint8Array} buf
+ * @param {string[]} [categories=All] - Which categories of file to look for
+ * @returns {Object[]} types
+ * @returns {string} type.name - Name of file type
+ * @returns {string} type.ext - File extension
+ * @returns {string} type.mime - Mime type
+ * @returns {string} [type.desc] - Description
+ */
+export function detectFileType(buf, categories=Object.keys(FILE_SIGNATURES)) {
+    if (!(buf && buf.length > 1)) {
+        return [];
+    }
+
+    const matchingFiles = [];
+    const signatures = {};
+
+    for (const cat in FILE_SIGNATURES) {
+        if (categories.includes(cat)) {
+            signatures[cat] = FILE_SIGNATURES[cat];
+        }
+    }
+
+    for (const cat in signatures) {
+        const category = signatures[cat];
+
+        category.forEach(filetype => {
+            if (signatureMatches(filetype.signature, buf)) {
+                matchingFiles.push(filetype);
+            }
+        });
+    }
+    return matchingFiles;
+}
+
+
+/**
+ * Given a buffer, searches for magic byte sequences at all possible positions and returns
+ * the extensions and mime types.
+ *
+ * @param {Uint8Array} buf
+ * @param {string[]} [categories=All] - Which categories of file to look for
+ * @returns {Object[]} foundFiles
+ * @returns {number} foundFiles.offset - The position in the buffer at which this file was found
+ * @returns {Object} foundFiles.fileDetails
+ * @returns {string} foundFiles.fileDetails.name - Name of file type
+ * @returns {string} foundFiles.fileDetails.ext - File extension
+ * @returns {string} foundFiles.fileDetails.mime - Mime type
+ * @returns {string} [foundFiles.fileDetails.desc] - Description
+ */
+export function scanForFileTypes(buf, categories=Object.keys(FILE_SIGNATURES)) {
+    if (!(buf && buf.length > 1)) {
+        return [];
+    }
+
+    const foundFiles = [];
+    const signatures = {};
+
+    for (const cat in FILE_SIGNATURES) {
+        if (categories.includes(cat)) {
+            signatures[cat] = FILE_SIGNATURES[cat];
+        }
+    }
+
+    for (const cat in signatures) {
+        const category = signatures[cat];
+
+        for (let i = 0; i < category.length; i++) {
+            const filetype = category[i];
+            const sigs = filetype.signature.length ? filetype.signature : [filetype.signature];
+
+            sigs.forEach(sig => {
+                let pos = 0;
+                while ((pos = locatePotentialSig(buf, sig, pos)) >= 0) {
+                    if (bytesMatch(sig, buf, pos)) {
+                        sendStatusMessage(`Found potential signature for ${filetype.name} at pos ${pos}`);
+                        foundFiles.push({
+                            offset: pos,
+                            fileDetails: filetype
+                        });
+                    }
+                    pos++;
+                }
+            });
+        }
+    }
+
+    // Return found files in order of increasing offset
+    return foundFiles.sort((a, b) => {
+        return a.offset - b.offset;
+    });
+}
+
+
+/**
+ * Fastcheck function to quickly scan the buffer for the first byte in a signature.
+ *
+ * @param {Uint8Array} buf - The buffer to search
+ * @param {Object} sig - A single signature object (Not an array of signatures)
+ * @param {number} offset - Where to start search from
+ * @returs {number} The position of the match or -1 if one cannot be found.
+ */
+function locatePotentialSig(buf, sig, offset) {
+    // Find values for first key and value in sig
+    const k = parseInt(Object.keys(sig)[0], 10);
+    const v = Object.values(sig)[0];
+    switch (typeof v) {
+        case "number":
+            return buf.indexOf(v, offset + k) - k;
+        case "object":
+            for (let i = offset + k; i < buf.length; i++) {
+                if (v.indexOf(buf[i]) >= 0) return i - k;
+            }
+            return -1;
+        case "function":
+            for (let i = offset + k; i < buf.length; i++) {
+                if (v(buf[i])) return i - k;
+            }
+            return -1;
+        default:
+            throw new Error("Unrecognised signature type");
+    }
+}
+
+
+/**
+ * Detects whether the given buffer is a file of the type specified.
+ *
+ * @param {string|RegExp} type
+ * @param {Uint8Array} buf
+ * @returns {string|false} The mime type or false if the type does not match
+ */
+export function isType(type, buf) {
+    const types = detectFileType(buf);
+
+    if (!(types && types.length)) return false;
+
+    if (typeof type === "string") {
+        return types.reduce((acc, t) => {
+            const mime = t.mime.startsWith(type) ? t.mime : false;
+            return acc || mime;
+        }, false);
+    } else if (type instanceof RegExp) {
+        return types.reduce((acc, t) => {
+            const mime = type.test(t.mime) ? t.mime : false;
+            return acc || mime;
+        }, false);
+    } else {
+        throw new Error("Invalid type input.");
+    }
+}
+
+
+/**
+ * Detects whether the given buffer contains an image file.
+ *
+ * @param {Uint8Array} buf
+ * @returns {string|false} The mime type or false if the type does not match
+ */
+export function isImage(buf) {
+    return isType("image", buf);
+}
+
+
+/**
+ * Attempts to extract a file from a data stream given its offset and extractor function.
+ *
+ * @param {Uint8Array} bytes
+ * @param {Object} fileDetail
+ * @param {string} fileDetail.mime
+ * @param {string} fileDetail.extension
+ * @param {Function} fileDetail.extractor
+ * @param {number} offset
+ * @returns {File}
+ */
+export function extractFile(bytes, fileDetail, offset) {
+    if (fileDetail.extractor) {
+        sendStatusMessage(`Attempting to extract ${fileDetail.name} at pos ${offset}...`);
+        const fileData = fileDetail.extractor(bytes, offset);
+        const ext = fileDetail.extension.split(",")[0];
+        return new File([fileData], `extracted_at_0x${offset.toString(16)}.${ext}`, {
+            type: fileDetail.mime
+        });
+    }
+
+    throw new Error(`No extraction algorithm available for "${fileDetail.mime}" files`);
+}
--- a/src/core/lib/Magic.mjs
+++ b/src/core/lib/Magic.mjs
@ -2,6 +2,7 @@ import OperationConfig from "../config/OperationConfig.json";
 import Utils from "../Utils";
 import Recipe from "../Recipe";
 import Dish from "../Dish";
+import {detectFileType} from "./FileType";
 import chiSquared from "chi-squared";

 /**
@ -92,7 +93,14 @@ class Magic {
     * @returns {string} [type.desc] - Description
     */
    detectFileType() {
-        return Magic.magicFileType(this.inputBuffer);
+        const fileType = detectFileType(this.inputBuffer);
+
+        if (!fileType.length) return null;
+        return {
+            ext: fileType[0].extension,
+            mime: fileType[0].mime,
+            desc: fileType[0].description
+        };
    }

    /**
@ -785,452 +793,9 @@ class Magic {
        }[code];
    }

-
-    /**
-     * Given a buffer, detects magic byte sequences at specific positions and returns the
-     * extension and mime type.
-     *
-     * @param {Uint8Array} buf
-     * @returns {Object} type
-     * @returns {string} type.ext - File extension
-     * @returns {string} type.mime - Mime type
-     * @returns {string} [type.desc] - Description
-     */
-    static magicFileType(buf) {
-        if (!(buf && buf.length > 1)) {
-            return null;
-        }
-
-        if (buf[0] === 0xFF && buf[1] === 0xD8 && buf[2] === 0xFF) {
-            return {
-                ext: "jpg",
-                mime: "image/jpeg"
-            };
-        }
-
-        if (buf[0] === 0x89 && buf[1] === 0x50 && buf[2] === 0x4E && buf[3] === 0x47) {
-            return {
-                ext: "png",
-                mime: "image/png"
-            };
-        }
-
-        if (buf[0] === 0x47 && buf[1] === 0x49 && buf[2] === 0x46) {
-            return {
-                ext: "gif",
-                mime: "image/gif"
-            };
-        }
-
-        if (buf[8] === 0x57 && buf[9] === 0x45 && buf[10] === 0x42 && buf[11] === 0x50) {
-            return {
-                ext: "webp",
-                mime: "image/webp"
-            };
-        }
-
-        // needs to be before `tif` check
-        if (((buf[0] === 0x49 && buf[1] === 0x49 && buf[2] === 0x2A && buf[3] === 0x0) || (buf[0] === 0x4D && buf[1] === 0x4D && buf[2] === 0x0 && buf[3] === 0x2A)) && buf[8] === 0x43 && buf[9] === 0x52) {
-            return {
-                ext: "cr2",
-                mime: "image/x-canon-cr2"
-            };
-        }
-
-        if ((buf[0] === 0x49 && buf[1] === 0x49 && buf[2] === 0x2A && buf[3] === 0x0) || (buf[0] === 0x4D && buf[1] === 0x4D && buf[2] === 0x0 && buf[3] === 0x2A)) {
-            return {
-                ext: "tif",
-                mime: "image/tiff"
-            };
-        }
-
-        if (buf[0] === 0x42 && buf[1] === 0x4D) {
-            return {
-                ext: "bmp",
-                mime: "image/bmp"
-            };
-        }
-
-        if (buf[0] === 0x49 && buf[1] === 0x49 && buf[2] === 0xBC) {
-            return {
-                ext: "jxr",
-                mime: "image/vnd.ms-photo"
-            };
-        }
-
-        if (buf[0] === 0x38 && buf[1] === 0x42 && buf[2] === 0x50 && buf[3] === 0x53) {
-            return {
-                ext: "psd",
-                mime: "image/vnd.adobe.photoshop"
-            };
-        }
-
-        // needs to be before `zip` check
-        if (buf[0] === 0x50 && buf[1] === 0x4B && buf[2] === 0x3 && buf[3] === 0x4 && buf[30] === 0x6D && buf[31] === 0x69 && buf[32] === 0x6D && buf[33] === 0x65 && buf[34] === 0x74 && buf[35] === 0x79 && buf[36] === 0x70 && buf[37] === 0x65 && buf[38] === 0x61 && buf[39] === 0x70 && buf[40] === 0x70 && buf[41] === 0x6C && buf[42] === 0x69 && buf[43] === 0x63 && buf[44] === 0x61 && buf[45] === 0x74 && buf[46] === 0x69 && buf[47] === 0x6F && buf[48] === 0x6E && buf[49] === 0x2F && buf[50] === 0x65 && buf[51] === 0x70 && buf[52] === 0x75 && buf[53] === 0x62 && buf[54] === 0x2B && buf[55] === 0x7A && buf[56] === 0x69 && buf[57] === 0x70) {
-            return {
-                ext: "epub",
-                mime: "application/epub+zip"
-            };
-        }
-
-        if (buf[0] === 0x50 && buf[1] === 0x4B && (buf[2] === 0x3 || buf[2] === 0x5 || buf[2] === 0x7) && (buf[3] === 0x4 || buf[3] === 0x6 || buf[3] === 0x8)) {
-            return {
-                ext: "zip",
-                mime: "application/zip"
-            };
-        }
-
-        if (buf[257] === 0x75 && buf[258] === 0x73 && buf[259] === 0x74 && buf[260] === 0x61 && buf[261] === 0x72) {
-            return {
-                ext: "tar",
-                mime: "application/x-tar"
-            };
-        }
-
-        if (buf[0] === 0x52 && buf[1] === 0x61 && buf[2] === 0x72 && buf[3] === 0x21 && buf[4] === 0x1A && buf[5] === 0x7 && (buf[6] === 0x0 || buf[6] === 0x1)) {
-            return {
-                ext: "rar",
-                mime: "application/x-rar-compressed"
-            };
-        }
-
-        if (buf[0] === 0x1F && buf[1] === 0x8B && buf[2] === 0x8) {
-            return {
-                ext: "gz",
-                mime: "application/gzip"
-            };
-        }
-
-        if (buf[0] === 0x42 && buf[1] === 0x5A && buf[2] === 0x68) {
-            return {
-                ext: "bz2",
-                mime: "application/x-bzip2"
-            };
-        }
-
-        if (buf[0] === 0x37 && buf[1] === 0x7A && buf[2] === 0xBC && buf[3] === 0xAF && buf[4] === 0x27 && buf[5] === 0x1C) {
-            return {
-                ext: "7z",
-                mime: "application/x-7z-compressed"
-            };
-        }
-
-        if (buf[0] === 0x78 && buf[1] === 0x01) {
-            return {
-                ext: "dmg, zlib",
-                mime: "application/x-apple-diskimage, application/x-deflate"
-            };
-        }
-
-        if ((buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x0 && (buf[3] === 0x18 || buf[3] === 0x20) && buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70) || (buf[0] === 0x33 && buf[1] === 0x67 && buf[2] === 0x70 && buf[3] === 0x35) || (buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x0 && buf[3] === 0x1C && buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70 && buf[8] === 0x6D && buf[9] === 0x70 && buf[10] === 0x34 && buf[11] === 0x32 && buf[16] === 0x6D && buf[17] === 0x70 && buf[18] === 0x34 && buf[19] === 0x31 && buf[20] === 0x6D && buf[21] === 0x70 && buf[22] === 0x34 && buf[23] === 0x32 && buf[24] === 0x69 && buf[25] === 0x73 && buf[26] === 0x6F && buf[27] === 0x6D)) {
-            return {
-                ext: "mp4",
-                mime: "video/mp4"
-            };
-        }
-
-        if ((buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x0 && buf[3] === 0x1C && buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70 && buf[8] === 0x4D && buf[9] === 0x34 && buf[10] === 0x56)) {
-            return {
-                ext: "m4v",
-                mime: "video/x-m4v"
-            };
-        }
-
-        if (buf[0] === 0x4D && buf[1] === 0x54 && buf[2] === 0x68 && buf[3] === 0x64) {
-            return {
-                ext: "mid",
-                mime: "audio/midi"
-            };
-        }
-
-        // needs to be before the `webm` check
-        if (buf[31] === 0x6D && buf[32] === 0x61 && buf[33] === 0x74 && buf[34] === 0x72 && buf[35] === 0x6f && buf[36] === 0x73 && buf[37] === 0x6B && buf[38] === 0x61) {
-            return {
-                ext: "mkv",
-                mime: "video/x-matroska"
-            };
-        }
-
-        if (buf[0] === 0x1A && buf[1] === 0x45 && buf[2] === 0xDF && buf[3] === 0xA3) {
-            return {
-                ext: "webm",
-                mime: "video/webm"
-            };
-        }
-
-        if (buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x0 && buf[3] === 0x14 && buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70) {
-            return {
-                ext: "mov",
-                mime: "video/quicktime"
-            };
-        }
-
-        if (buf[0] === 0x52 && buf[1] === 0x49 && buf[2] === 0x46 && buf[3] === 0x46 && buf[8] === 0x41 && buf[9] === 0x56 && buf[10] === 0x49) {
-            return {
-                ext: "avi",
-                mime: "video/x-msvideo"
-            };
-        }
-
-        if (buf[0] === 0x30 && buf[1] === 0x26 && buf[2] === 0xB2 && buf[3] === 0x75 && buf[4] === 0x8E && buf[5] === 0x66 && buf[6] === 0xCF && buf[7] === 0x11 && buf[8] === 0xA6 && buf[9] === 0xD9) {
-            return {
-                ext: "wmv",
-                mime: "video/x-ms-wmv"
-            };
-        }
-
-        if (buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x1 && buf[3].toString(16)[0] === "b") {
-            return {
-                ext: "mpg",
-                mime: "video/mpeg"
-            };
-        }
-
-        if ((buf[0] === 0x49 && buf[1] === 0x44 && buf[2] === 0x33) || (buf[0] === 0xFF && buf[1] === 0xfb)) {
-            return {
-                ext: "mp3",
-                mime: "audio/mpeg"
-            };
-        }
-
-        if ((buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70 && buf[8] === 0x4D && buf[9] === 0x34 && buf[10] === 0x41) || (buf[0] === 0x4D && buf[1] === 0x34 && buf[2] === 0x41 && buf[3] === 0x20)) {
-            return {
-                ext: "m4a",
-                mime: "audio/m4a"
-            };
-        }
-
-        if (buf[0] === 0x4F && buf[1] === 0x67 && buf[2] === 0x67 && buf[3] === 0x53) {
-            return {
-                ext: "ogg",
-                mime: "audio/ogg"
-            };
-        }
-
-        if (buf[0] === 0x66 && buf[1] === 0x4C && buf[2] === 0x61 && buf[3] === 0x43) {
-            return {
-                ext: "flac",
-                mime: "audio/x-flac"
-            };
-        }
-
-        if (buf[0] === 0x52 && buf[1] === 0x49 && buf[2] === 0x46 && buf[3] === 0x46 && buf[8] === 0x57 && buf[9] === 0x41 && buf[10] === 0x56 && buf[11] === 0x45) {
-            return {
-                ext: "wav",
-                mime: "audio/x-wav"
-            };
-        }
-
-        if (buf[0] === 0x23 && buf[1] === 0x21 && buf[2] === 0x41 && buf[3] === 0x4D && buf[4] === 0x52 && buf[5] === 0x0A) {
-            return {
-                ext: "amr",
-                mime: "audio/amr"
-            };
-        }
-
-        if (buf[0] === 0x25 && buf[1] === 0x50 && buf[2] === 0x44 && buf[3] === 0x46) {
-            return {
-                ext: "pdf",
-                mime: "application/pdf"
-            };
-        }
-
-        if (buf[0] === 0x4D && buf[1] === 0x5A) {
-            return {
-                ext: "exe",
-                mime: "application/x-msdownload"
-            };
-        }
-
-        if ((buf[0] === 0x43 || buf[0] === 0x46) && buf[1] === 0x57 && buf[2] === 0x53) {
-            return {
-                ext: "swf",
-                mime: "application/x-shockwave-flash"
-            };
-        }
-
-        if (buf[0] === 0x7B && buf[1] === 0x5C && buf[2] === 0x72 && buf[3] === 0x74 && buf[4] === 0x66) {
-            return {
-                ext: "rtf",
-                mime: "application/rtf"
-            };
-        }
-
-        if (buf[0] === 0x77 && buf[1] === 0x4F && buf[2] === 0x46 && buf[3] === 0x46 && buf[4] === 0x00 && buf[5] === 0x01 && buf[6] === 0x00 && buf[7] === 0x00) {
-            return {
-                ext: "woff",
-                mime: "application/font-woff"
-            };
-        }
-
-        if (buf[0] === 0x77 && buf[1] === 0x4F && buf[2] === 0x46 && buf[3] === 0x32 && buf[4] === 0x00 && buf[5] === 0x01 && buf[6] === 0x00 && buf[7] === 0x00) {
-            return {
-                ext: "woff2",
-                mime: "application/font-woff"
-            };
-        }
-
-        if (buf[34] === 0x4C && buf[35] === 0x50 && ((buf[8] === 0x02 && buf[9] === 0x00 && buf[10] === 0x01) || (buf[8] === 0x01 && buf[9] === 0x00 && buf[10] === 0x00) || (buf[8] === 0x02 && buf[9] === 0x00 && buf[10] === 0x02))) {
-            return {
-                ext: "eot",
-                mime: "application/octet-stream"
-            };
-        }
-
-        if (buf[0] === 0x00 && buf[1] === 0x01 && buf[2] === 0x00 && buf[3] === 0x00 && buf[4] === 0x00) {
-            return {
-                ext: "ttf",
-                mime: "application/font-sfnt"
-            };
-        }
-
-        if (buf[0] === 0x4F && buf[1] === 0x54 && buf[2] === 0x54 && buf[3] === 0x4F && buf[4] === 0x00) {
-            return {
-                ext: "otf",
-                mime: "application/font-sfnt"
-            };
-        }
-
-        if (buf[0] === 0x00 && buf[1] === 0x00 && buf[2] === 0x01 && buf[3] === 0x00) {
-            return {
-                ext: "ico",
-                mime: "image/x-icon"
-            };
-        }
-
-        if (buf[0] === 0x46 && buf[1] === 0x4C && buf[2] === 0x56 && buf[3] === 0x01) {
-            return {
-                ext: "flv",
-                mime: "video/x-flv"
-            };
-        }
-
-        if (buf[0] === 0x25 && buf[1] === 0x21) {
-            return {
-                ext: "ps",
-                mime: "application/postscript"
-            };
-        }
-
-        if (buf[0] === 0xFD && buf[1] === 0x37 && buf[2] === 0x7A && buf[3] === 0x58 && buf[4] === 0x5A && buf[5] === 0x00) {
-            return {
-                ext: "xz",
-                mime: "application/x-xz"
-            };
-        }
-
-        if (buf[0] === 0x53 && buf[1] === 0x51 && buf[2] === 0x4C && buf[3] === 0x69) {
-            return {
-                ext: "sqlite",
-                mime: "application/x-sqlite3"
-            };
-        }
-
-        /**
-         *
-         * Added by n1474335 [n1474335@gmail.com] from here on
-         *
-         */
-        if ((buf[0] === 0x1F && buf[1] === 0x9D) || (buf[0] === 0x1F && buf[1] === 0xA0)) {
-            return {
-                ext: "z, tar.z",
-                mime: "application/x-gtar"
-            };
-        }
-
-        if (buf[0] === 0x7F && buf[1] === 0x45 && buf[2] === 0x4C && buf[3] === 0x46) {
-            return {
-                ext: "none, axf, bin, elf, o, prx, puff, so",
-                mime: "application/x-executable",
-                desc: "Executable and Linkable Format file. No standard file extension."
-            };
-        }
-
-        if (buf[0] === 0xCA && buf[1] === 0xFE && buf[2] === 0xBA && buf[3] === 0xBE) {
-            return {
-                ext: "class",
-                mime: "application/java-vm"
-            };
-        }
-
-        if (buf[0] === 0xEF && buf[1] === 0xBB && buf[2] === 0xBF) {
-            return {
-                ext: "txt",
-                mime: "text/plain",
-                desc: "UTF-8 encoded Unicode byte order mark detected, commonly but not exclusively seen in text files."
-            };
-        }
-
-        // Must be before Little-endian UTF-16 BOM
-        if (buf[0] === 0xFF && buf[1] === 0xFE && buf[2] === 0x00 && buf[3] === 0x00) {
-            return {
-                ext: "UTF32LE",
-                mime: "charset/utf32le",
-                desc: "Little-endian UTF-32 encoded Unicode byte order mark detected."
-            };
-        }
-
-        if (buf[0] === 0xFF && buf[1] === 0xFE) {
-            return {
-                ext: "UTF16LE",
-                mime: "charset/utf16le",
-                desc: "Little-endian UTF-16 encoded Unicode byte order mark detected."
-            };
-        }
-
-        if ((buf[0x8001] === 0x43 && buf[0x8002] === 0x44 && buf[0x8003] === 0x30 && buf[0x8004] === 0x30 && buf[0x8005] === 0x31) ||
-            (buf[0x8801] === 0x43 && buf[0x8802] === 0x44 && buf[0x8803] === 0x30 && buf[0x8804] === 0x30 && buf[0x8805] === 0x31) ||
-            (buf[0x9001] === 0x43 && buf[0x9002] === 0x44 && buf[0x9003] === 0x30 && buf[0x9004] === 0x30 && buf[0x9005] === 0x31)) {
-            return {
-                ext: "iso",
-                mime: "application/octet-stream",
-                desc: "ISO 9660 CD/DVD image file"
-            };
-        }
-
-        if (buf[0] === 0xD0 && buf[1] === 0xCF && buf[2] === 0x11 && buf[3] === 0xE0 && buf[4] === 0xA1 && buf[5] === 0xB1 && buf[6] === 0x1A && buf[7] === 0xE1) {
-            return {
-                ext: "doc, xls, ppt",
-                mime: "application/msword, application/vnd.ms-excel, application/vnd.ms-powerpoint",
-                desc: "Microsoft Office documents"
-            };
-        }
-
-        if (buf[0] === 0x64 && buf[1] === 0x65 && buf[2] === 0x78 && buf[3] === 0x0A && buf[4] === 0x30 && buf[5] === 0x33 && buf[6] === 0x35 && buf[7] === 0x00) {
-            return {
-                ext: "dex",
-                mime: "application/octet-stream",
-                desc: "Dalvik Executable (Android)"
-            };
-        }
-
-        if (buf[0] === 0x4B && buf[1] === 0x44 && buf[2] === 0x4D) {
-            return {
-                ext: "vmdk",
-                mime: "application/vmdk, application/x-virtualbox-vmdk"
-            };
-        }
-
-        if (buf[0] === 0x43 && buf[1] === 0x72 && buf[2] === 0x32 && buf[3] === 0x34) {
-            return {
-                ext: "crx",
-                mime: "application/crx",
-                desc: "Google Chrome extension or packaged app"
-            };
-        }
-
-        if (buf[0] === 0x78 && (buf[1] === 0x01 || buf[1] === 0x9C || buf[1] === 0xDA || buf[1] === 0x5e)) {
-            return {
-                ext: "zlib",
-                mime: "application/x-deflate"
-            };
-        }
-
-        return null;
-    }
-
 }

+
 /**
 * Byte frequencies of various languages generated from Wikipedia dumps taken in late 2017 and early 2018.
 * The Chi-Squared test cannot accept expected values of 0, so 0.0001 has been used to account for bytes
--- a/src/core/lib/Stream.mjs
+++ b/src/core/lib/Stream.mjs
@ -0,0 +1,263 @@
+/**
+ * Stream class for parsing binary protocols.
+ *
+ * @author n1474335 [n1474335@gmail.com]
+ * @author tlwr [toby@toby.codes]
+ * @copyright Crown Copyright 2018
+ * @license Apache-2.0
+ *
+ */
+
+/**
+ * A Stream can be used to traverse a binary blob, interpreting sections of it
+ * as various data types.
+ */
+export default class Stream {
+
+    /**
+     * Stream constructor.
+     *
+     * @param {Uint8Array} input
+     */
+    constructor(input) {
+        this.bytes = input;
+        this.length = this.bytes.length;
+        this.position = 0;
+        this.bitPos = 0;
+    }
+
+    /**
+     * Get a number of bytes from the current position.
+     *
+     * @param {number} numBytes
+     * @returns {Uint8Array}
+     */
+    getBytes(numBytes) {
+        if (this.position > this.length) return undefined;
+
+        const newPosition = this.position + numBytes;
+        const bytes = this.bytes.slice(this.position, newPosition);
+        this.position = newPosition;
+        this.bitPos = 0;
+        return bytes;
+    }
+
+    /**
+     * Interpret the following bytes as a string, stopping at the next null byte or
+     * the supplied limit.
+     *
+     * @param {number} numBytes
+     * @returns {string}
+     */
+    readString(numBytes) {
+        if (this.position > this.length) return undefined;
+
+        let result = "";
+        for (let i = this.position; i < this.position + numBytes; i++) {
+            const currentByte = this.bytes[i];
+            if (currentByte === 0) break;
+            result += String.fromCharCode(currentByte);
+        }
+        this.position += numBytes;
+        this.bitPos = 0;
+        return result;
+    }
+
+    /**
+     * Interpret the following bytes as an integer in big or little endian.
+     *
+     * @param {number} numBytes
+     * @param {string} [endianness="be"]
+     * @returns {number}
+     */
+    readInt(numBytes, endianness="be") {
+        if (this.position > this.length) return undefined;
+
+        let val = 0;
+        if (endianness === "be") {
+            for (let i = this.position; i < this.position + numBytes; i++) {
+                val = val << 8;
+                val |= this.bytes[i];
+            }
+        } else {
+            for (let i = this.position + numBytes - 1; i >= this.position; i--) {
+                val = val << 8;
+                val |= this.bytes[i];
+            }
+        }
+        this.position += numBytes;
+        this.bitPos = 0;
+        return val;
+    }
+
+    /**
+     * Reads a number of bits from the buffer.
+     *
+     * @TODO Add endianness
+     *
+     * @param {number} numBits
+     * @returns {number}
+     */
+    readBits(numBits) {
+        if (this.position > this.length) return undefined;
+
+        let bitBuf = 0,
+            bitBufLen = 0;
+
+        // Add remaining bits from current byte
+        bitBuf = (this.bytes[this.position++] & bitMask(this.bitPos)) >>> this.bitPos;
+        bitBufLen = 8 - this.bitPos;
+        this.bitPos = 0;
+
+        // Not enough bits yet
+        while (bitBufLen < numBits) {
+            bitBuf |= this.bytes[this.position++] << bitBufLen;
+            bitBufLen += 8;
+        }
+
+        // Reverse back to numBits
+        if (bitBufLen > numBits) {
+            const excess = bitBufLen - numBits;
+            bitBuf &= (1 << numBits) - 1;
+            bitBufLen -= excess;
+            this.position--;
+            this.bitPos = 8 - excess;
+        }
+
+        return bitBuf;
+
+        /**
+         * Calculates the bit mask based on the current bit position.
+         *
+         * @param {number} bitPos
+         * @returns {number} The bit mask
+         */
+        function bitMask(bitPos) {
+            return 256 - (1 << bitPos);
+        }
+    }
+
+    /**
+     * Consume the stream until we reach the specified byte or sequence of bytes.
+     *
+     * @param {number|List<number>} val
+     */
+    continueUntil(val) {
+        if (this.position > this.length) return;
+
+        this.bitPos = 0;
+
+        if (typeof val === "number") {
+            while (++this.position < this.length && this.bytes[this.position] !== val) {
+                continue;
+            }
+            return;
+        }
+
+        // val is an array
+        let found = false;
+        while (!found && this.position < this.length) {
+            while (++this.position < this.length && this.bytes[this.position] !== val[0]) {
+                continue;
+            }
+            found = true;
+            for (let i = 1; i < val.length; i++) {
+                if (this.position + i > this.length || this.bytes[this.position + i] !== val[i])
+                    found = false;
+            }
+        }
+    }
+
+    /**
+     * Consume the next byte if it matches the supplied value.
+     *
+     * @param {number} val
+     */
+    consumeIf(val) {
+        if (this.bytes[this.position] === val) {
+            this.position++;
+            this.bitPos = 0;
+        }
+    }
+
+    /**
+     * Move forwards through the stream by the specified number of bytes.
+     *
+     * @param {number} numBytes
+     */
+    moveForwardsBy(numBytes) {
+        const pos = this.position + numBytes;
+        if (pos < 0 || pos > this.length)
+            throw new Error("Cannot move to position " + pos + " in stream. Out of bounds.");
+        this.position = pos;
+        this.bitPos = 0;
+    }
+
+    /**
+     * Move backwards through the stream by the specified number of bytes.
+     *
+     * @param {number} numBytes
+     */
+    moveBackwardsBy(numBytes) {
+        const pos = this.position - numBytes;
+        if (pos < 0 || pos > this.length)
+            throw new Error("Cannot move to position " + pos + " in stream. Out of bounds.");
+        this.position = pos;
+        this.bitPos = 0;
+    }
+
+    /**
+     * Move backwards through the strem by the specified number of bits.
+     *
+     * @param {number} numBits
+     */
+    moveBackwardsByBits(numBits) {
+        if (numBits <= this.bitPos) {
+            this.bitPos -= numBits;
+        } else {
+            if (this.bitPos > 0) {
+                numBits -= this.bitPos;
+                this.bitPos = 0;
+            }
+
+            while (numBits > 0) {
+                this.moveBackwardsBy(1);
+                this.bitPos = 8;
+                this.moveBackwardsByBits(numBits);
+                numBits -= 8;
+            }
+        }
+    }
+
+    /**
+     * Move to a specified position in the stream.
+     *
+     * @param {number} pos
+     */
+    moveTo(pos) {
+        if (pos < 0 || pos > this.length)
+            throw new Error("Cannot move to position " + pos + " in stream. Out of bounds.");
+        this.position = pos;
+        this.bitPos = 0;
+    }
+
+    /**
+     * Returns true if there are more bytes left in the stream.
+     *
+     * @returns {boolean}
+     */
+    hasMore() {
+        return this.position < this.length;
+    }
+
+    /**
+     * Returns a slice of the stream up to the current position.
+     *
+     * @returns {Uint8Array}
+     */
+    carve() {
+        if (this.bitPos > 0) this.position++;
+        return this.bytes.slice(0, this.position);
+    }
+
+}