Added 'Extract Files' operation and 'Forensics' category.

This commit is contained in:
n1474335 2018-12-14 16:43:03 +00:00
parent 15fbe5a459
commit 6aa9d2b492
5 changed files with 572 additions and 74 deletions

View file

@ -245,7 +245,8 @@
"XPath expression",
"JPath expression",
"CSS selector",
"Extract EXIF"
"Extract EXIF",
"Extract Files"
]
},
{
@ -336,14 +337,23 @@
"From MessagePack"
]
},
{
"name": "Forensics",
"ops": [
"Detect File Type",
"Scan for Embedded Files",
"Extract Files",
"Remove EXIF",
"Extract EXIF",
"Render Image"
]
},
{
"name": "Other",
"ops": [
"Entropy",
"Frequency distribution",
"Chi Square",
"Detect File Type",
"Scan for Embedded Files",
"Disassemble x86",
"Pseudo-Random Number Generator",
"Generate UUID",
@ -351,8 +361,6 @@
"Generate HOTP",
"Haversine distance",
"Render Image",
"Remove EXIF",
"Extract EXIF",
"Numberwang",
"XKCD Random Number"
]

View file

@ -0,0 +1,231 @@
/**
* File extraction functions
*
* @author n1474335 [n1474335@gmail.com]
* @copyright Crown Copyright 2018
* @license Apache-2.0
*
*/
import Stream from "./Stream";
/**
* Attempts to extract a file from a data stream given its mime type and offset.
*
* @param {Uint8Array} bytes
* @param {Object} fileDetail
* @param {string} fileDetail.mime
* @param {string} fileDetail.ext
* @param {number} fileDetail.offset
* @returns {File}
*/
export function extractFile(bytes, fileDetail) {
let fileData;
switch (fileDetail.mime) {
case "image/jpeg":
fileData = extractJPEG(bytes, fileDetail.offset);
break;
case "application/x-msdownload":
fileData = extractMZPE(bytes, fileDetail.offset);
break;
case "application/pdf":
fileData = extractPDF(bytes, fileDetail.offset);
break;
case "application/zip":
fileData = extractZIP(bytes, fileDetail.offset);
break;
default:
throw new Error(`No extraction algorithm available for "${fileDetail.mime}" files`);
}
return new File([fileData], `extracted_at_0x${fileDetail.offset.toString(16)}.${fileDetail.ext}`);
}
/**
* JPEG extractor.
*
* @param {Uint8Array} bytes
* @param {number} offset
* @returns {Uint8Array}
*/
export function extractJPEG(bytes, offset) {
const stream = new Stream(bytes.slice(offset));
while (stream.hasMore()) {
const marker = stream.getBytes(2);
if (marker[0] !== 0xff) throw new Error("Invalid JPEG marker: " + marker);
let segmentSize = 0;
switch (marker[1]) {
// No length
case 0xd8: // Start of Image
case 0x01: // For temporary use in arithmetic coding
break;
case 0xd9: // End found
return stream.carve();
// Variable size segment
case 0xc0: // Start of frame (Baseline DCT)
case 0xc1: // Start of frame (Extended sequential DCT)
case 0xc2: // Start of frame (Progressive DCT)
case 0xc3: // Start of frame (Lossless sequential)
case 0xc4: // Define Huffman Table
case 0xc5: // Start of frame (Differential sequential DCT)
case 0xc6: // Start of frame (Differential progressive DCT)
case 0xc7: // Start of frame (Differential lossless)
case 0xc8: // Reserved for JPEG extensions
case 0xc9: // Start of frame (Extended sequential DCT)
case 0xca: // Start of frame (Progressive DCT)
case 0xcb: // Start of frame (Lossless sequential)
case 0xcc: // Define arithmetic conditioning table
case 0xcd: // Start of frame (Differential sequential DCT)
case 0xce: // Start of frame (Differential progressive DCT)
case 0xcf: // Start of frame (Differential lossless)
case 0xdb: // Define Quantization Table
case 0xde: // Define hierarchical progression
case 0xe0: // Application-specific
case 0xe1: // Application-specific
case 0xe2: // Application-specific
case 0xe3: // Application-specific
case 0xe4: // Application-specific
case 0xe5: // Application-specific
case 0xe6: // Application-specific
case 0xe7: // Application-specific
case 0xe8: // Application-specific
case 0xe9: // Application-specific
case 0xea: // Application-specific
case 0xeb: // Application-specific
case 0xec: // Application-specific
case 0xed: // Application-specific
case 0xee: // Application-specific
case 0xef: // Application-specific
case 0xfe: // Comment
segmentSize = stream.readInt(2, "be");
stream.position += segmentSize - 2;
break;
// 1 byte
case 0xdf: // Expand reference image
stream.position++;
break;
// 2 bytes
case 0xdc: // Define number of lines
case 0xdd: // Define restart interval
stream.position += 2;
break;
// Start scan
case 0xda: // Start of scan
segmentSize = stream.readInt(2, "be");
stream.position += segmentSize - 2;
stream.continueUntil(0xff);
break;
// Continue through encoded data
case 0x00: // Byte stuffing
case 0xd0: // Restart
case 0xd1: // Restart
case 0xd2: // Restart
case 0xd3: // Restart
case 0xd4: // Restart
case 0xd5: // Restart
case 0xd6: // Restart
case 0xd7: // Restart
stream.continueUntil(0xff);
break;
default:
stream.continueUntil(0xff);
break;
}
}
throw new Error("Unable to parse JPEG successfully");
}
/**
* Portable executable extractor.
* Assumes that the offset refers to an MZ header.
*
* @param {Uint8Array} bytes
* @param {number} offset
* @returns {Uint8Array}
*/
export function extractMZPE(bytes, offset) {
const stream = new Stream(bytes.slice(offset));
// Move to PE header pointer
stream.moveTo(0x3c);
const peAddress = stream.readInt(4, "le");
// Move to PE header
stream.moveTo(peAddress);
// Get number of sections
stream.moveForwardsBy(6);
const numSections = stream.readInt(2, "le");
// Get optional header size
stream.moveForwardsBy(12);
const optionalHeaderSize = stream.readInt(2, "le");
// Move past optional header to section header
stream.moveForwardsBy(2 + optionalHeaderSize);
// Move to final section header
stream.moveForwardsBy((numSections - 1) * 0x28);
// Get raw data info
stream.moveForwardsBy(16);
const rawDataSize = stream.readInt(4, "le");
const rawDataAddress = stream.readInt(4, "le");
// Move to end of final section
stream.moveTo(rawDataAddress + rawDataSize);
return stream.carve();
}
/**
* PDF extractor.
*
* @param {Uint8Array} bytes
* @param {number} offset
* @returns {Uint8Array}
*/
export function extractPDF(bytes, offset) {
const stream = new Stream(bytes.slice(offset));
// Find end-of-file marker (%%EOF)
stream.continueUntil([0x25, 0x25, 0x45, 0x4f, 0x46]);
stream.moveForwardsBy(5);
stream.consumeIf(0x0d);
stream.consumeIf(0x0a);
return stream.carve();
}
/**
* ZIP extractor.
*
* @param {Uint8Array} bytes
* @param {number} offset
* @returns {Uint8Array}
*/
export function extractZIP(bytes, offset) {
const stream = new Stream(bytes.slice(offset));
// Find End of central directory record
stream.continueUntil([0x50, 0x4b, 0x05, 0x06]);
// Get comment length and consume
stream.moveForwardsBy(20);
const commentLength = stream.readInt(2, "le");
stream.moveForwardsBy(commentLength);
return stream.carve();
}

164
src/core/lib/Stream.mjs Normal file
View file

@ -0,0 +1,164 @@
/**
* Stream class for parsing binary protocols.
*
* @author n1474335 [n1474335@gmail.com]
* @author tlwr [toby@toby.codes]
* @copyright Crown Copyright 2018
* @license Apache-2.0
*
*/
/**
* A Stream can be used to traverse a binary blob, interpreting sections of it
* as various data types.
*
* @param {Uint8Array} bytes
* @param {Object} fileDetail
* @param {string} fileDetail.mime
* @param {string} fileDetail.ext
* @param {number} fileDetail.offset
* @returns {File}
*/
export default class Stream {
/**
* Stream constructor.
*
* @param {Uint8Array} input
*/
constructor(input) {
this.bytes = input;
this.position = 0;
}
/**
* Get a number of bytes from the current position.
*
* @param {number} numBytes
* @returns {Uint8Array}
*/
getBytes(numBytes) {
const newPosition = this.position + numBytes;
const bytes = this.bytes.slice(this.position, newPosition);
this.position = newPosition;
return bytes;
}
/**
* Interpret the following bytes as a string, stopping at the next null byte or
* the supplied limit.
*
* @param {number} numBytes
* @returns {string}
*/
readString(numBytes) {
let result = "";
for (let i = this.position; i < this.position + numBytes; i++) {
const currentByte = this.bytes[i];
if (currentByte === 0) break;
result += String.fromCharCode(currentByte);
}
this.position += numBytes;
return result;
}
/**
* Interpret the following bytes as an integer in big or little endian.
*
* @param {number} numBytes
* @param {string} [endianness="be"]
* @returns {number}
*/
readInt(numBytes, endianness="be") {
let val = 0;
if (endianness === "be") {
for (let i = this.position; i < this.position + numBytes; i++) {
val = val << 8;
val |= this.bytes[i];
}
} else {
for (let i = this.position + numBytes - 1; i >= this.position; i--) {
val = val << 8;
val |= this.bytes[i];
}
}
this.position += numBytes;
return val;
}
/**
* Consume the stream until we reach the specified byte or sequence of bytes.
*
* @param {number|List<number>} val
*/
continueUntil(val) {
if (typeof val === "number") {
while (++this.position < this.bytes.length && this.bytes[this.position] !== val) {
continue;
}
return;
}
// val is an array
let found = false;
while (!found && this.position < this.bytes.length) {
while (++this.position < this.bytes.length && this.bytes[this.position] !== val[0]) {
continue;
}
found = true;
for (let i = 1; i < val.length; i++) {
if (this.position + i > this.bytes.length || this.bytes[this.position + i] !== val[i])
found = false;
}
}
}
/**
* Consume the next byte if it matches the supplied value.
*
* @param {number} val
*/
consumeIf(val) {
if (this.bytes[this.position] === val)
this.position++;
}
/**
* Move forwards through the stream by the specified number of bytes.
*
* @param {number} numBytes
*/
moveForwardsBy(numBytes) {
this.position += numBytes;
}
/**
* Move to a specified position in the stream.
*
* @param {number} pos
*/
moveTo(pos) {
if (pos < 0 || pos > this.bytes.length - 1)
throw new Error("Cannot move to position " + pos + " in stream. Out of bounds.");
this.position = pos;
}
/**
* Returns true if there are more bytes left in the stream.
*
* @returns {boolean}
*/
hasMore() {
return this.position < this.bytes.length;
}
/**
* Returns a slice of the stream up to the current position.
*
* @returns {Uint8Array}
*/
carve() {
return this.bytes.slice(0, this.position);
}
}

View file

@ -0,0 +1,91 @@
/**
* @author n1474335 [n1474335@gmail.com]
* @copyright Crown Copyright 2018
* @license Apache-2.0
*/
import Operation from "../Operation";
// import OperationError from "../errors/OperationError";
import Magic from "../lib/Magic";
import Utils from "../Utils";
import {extractFile} from "../lib/FileExtraction";
/**
* Extract Files operation
*/
class ExtractFiles extends Operation {
/**
* ExtractFiles constructor
*/
constructor() {
super();
this.name = "Extract Files";
this.module = "Default";
this.description = "TODO";
this.infoURL = "https://forensicswiki.org/wiki/File_Carving";
this.inputType = "ArrayBuffer";
this.outputType = "List<File>";
this.presentType = "html";
this.args = [];
}
/**
* @param {ArrayBuffer} input
* @param {Object[]} args
* @returns {List<File>}
*/
run(input, args) {
const bytes = new Uint8Array(input);
// Scan for embedded files
const fileDetails = scanForEmbeddedFiles(bytes);
// Extract each file that we support
const files = [];
fileDetails.forEach(fileDetail => {
try {
files.push(extractFile(bytes, fileDetail));
} catch (err) {}
});
return files;
}
/**
* Displays the files in HTML for web apps.
*
* @param {File[]} files
* @returns {html}
*/
async present(files) {
return await Utils.displayFilesAsHTML(files);
}
}
/**
* TODO refactor
* @param data
*/
function scanForEmbeddedFiles(data) {
let type;
const types = [];
for (let i = 0; i < data.length; i++) {
type = Magic.magicFileType(data.slice(i));
if (type) {
types.push({
offset: i,
ext: type.ext,
mime: type.mime,
desc: type.desc
});
}
}
return types;
}
export default ExtractFiles;