mirror of
https://github.com/gchq/CyberChef.git
synced 2025-04-23 08:16:17 -04:00
Added 'Extract Files' operation and 'Forensics' category.
This commit is contained in:
parent
15fbe5a459
commit
6aa9d2b492
5 changed files with 572 additions and 74 deletions
|
@ -245,7 +245,8 @@
|
|||
"XPath expression",
|
||||
"JPath expression",
|
||||
"CSS selector",
|
||||
"Extract EXIF"
|
||||
"Extract EXIF",
|
||||
"Extract Files"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -336,14 +337,23 @@
|
|||
"From MessagePack"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Forensics",
|
||||
"ops": [
|
||||
"Detect File Type",
|
||||
"Scan for Embedded Files",
|
||||
"Extract Files",
|
||||
"Remove EXIF",
|
||||
"Extract EXIF",
|
||||
"Render Image"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Other",
|
||||
"ops": [
|
||||
"Entropy",
|
||||
"Frequency distribution",
|
||||
"Chi Square",
|
||||
"Detect File Type",
|
||||
"Scan for Embedded Files",
|
||||
"Disassemble x86",
|
||||
"Pseudo-Random Number Generator",
|
||||
"Generate UUID",
|
||||
|
@ -351,8 +361,6 @@
|
|||
"Generate HOTP",
|
||||
"Haversine distance",
|
||||
"Render Image",
|
||||
"Remove EXIF",
|
||||
"Extract EXIF",
|
||||
"Numberwang",
|
||||
"XKCD Random Number"
|
||||
]
|
||||
|
|
231
src/core/lib/FileExtraction.mjs
Normal file
231
src/core/lib/FileExtraction.mjs
Normal file
|
@ -0,0 +1,231 @@
|
|||
/**
|
||||
* File extraction functions
|
||||
*
|
||||
* @author n1474335 [n1474335@gmail.com]
|
||||
* @copyright Crown Copyright 2018
|
||||
* @license Apache-2.0
|
||||
*
|
||||
*/
|
||||
import Stream from "./Stream";
|
||||
|
||||
/**
|
||||
* Attempts to extract a file from a data stream given its mime type and offset.
|
||||
*
|
||||
* @param {Uint8Array} bytes
|
||||
* @param {Object} fileDetail
|
||||
* @param {string} fileDetail.mime
|
||||
* @param {string} fileDetail.ext
|
||||
* @param {number} fileDetail.offset
|
||||
* @returns {File}
|
||||
*/
|
||||
export function extractFile(bytes, fileDetail) {
|
||||
let fileData;
|
||||
switch (fileDetail.mime) {
|
||||
case "image/jpeg":
|
||||
fileData = extractJPEG(bytes, fileDetail.offset);
|
||||
break;
|
||||
case "application/x-msdownload":
|
||||
fileData = extractMZPE(bytes, fileDetail.offset);
|
||||
break;
|
||||
case "application/pdf":
|
||||
fileData = extractPDF(bytes, fileDetail.offset);
|
||||
break;
|
||||
case "application/zip":
|
||||
fileData = extractZIP(bytes, fileDetail.offset);
|
||||
break;
|
||||
default:
|
||||
throw new Error(`No extraction algorithm available for "${fileDetail.mime}" files`);
|
||||
}
|
||||
|
||||
return new File([fileData], `extracted_at_0x${fileDetail.offset.toString(16)}.${fileDetail.ext}`);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* JPEG extractor.
|
||||
*
|
||||
* @param {Uint8Array} bytes
|
||||
* @param {number} offset
|
||||
* @returns {Uint8Array}
|
||||
*/
|
||||
export function extractJPEG(bytes, offset) {
|
||||
const stream = new Stream(bytes.slice(offset));
|
||||
|
||||
while (stream.hasMore()) {
|
||||
const marker = stream.getBytes(2);
|
||||
if (marker[0] !== 0xff) throw new Error("Invalid JPEG marker: " + marker);
|
||||
|
||||
let segmentSize = 0;
|
||||
switch (marker[1]) {
|
||||
// No length
|
||||
case 0xd8: // Start of Image
|
||||
case 0x01: // For temporary use in arithmetic coding
|
||||
break;
|
||||
case 0xd9: // End found
|
||||
return stream.carve();
|
||||
|
||||
// Variable size segment
|
||||
case 0xc0: // Start of frame (Baseline DCT)
|
||||
case 0xc1: // Start of frame (Extended sequential DCT)
|
||||
case 0xc2: // Start of frame (Progressive DCT)
|
||||
case 0xc3: // Start of frame (Lossless sequential)
|
||||
case 0xc4: // Define Huffman Table
|
||||
case 0xc5: // Start of frame (Differential sequential DCT)
|
||||
case 0xc6: // Start of frame (Differential progressive DCT)
|
||||
case 0xc7: // Start of frame (Differential lossless)
|
||||
case 0xc8: // Reserved for JPEG extensions
|
||||
case 0xc9: // Start of frame (Extended sequential DCT)
|
||||
case 0xca: // Start of frame (Progressive DCT)
|
||||
case 0xcb: // Start of frame (Lossless sequential)
|
||||
case 0xcc: // Define arithmetic conditioning table
|
||||
case 0xcd: // Start of frame (Differential sequential DCT)
|
||||
case 0xce: // Start of frame (Differential progressive DCT)
|
||||
case 0xcf: // Start of frame (Differential lossless)
|
||||
case 0xdb: // Define Quantization Table
|
||||
case 0xde: // Define hierarchical progression
|
||||
case 0xe0: // Application-specific
|
||||
case 0xe1: // Application-specific
|
||||
case 0xe2: // Application-specific
|
||||
case 0xe3: // Application-specific
|
||||
case 0xe4: // Application-specific
|
||||
case 0xe5: // Application-specific
|
||||
case 0xe6: // Application-specific
|
||||
case 0xe7: // Application-specific
|
||||
case 0xe8: // Application-specific
|
||||
case 0xe9: // Application-specific
|
||||
case 0xea: // Application-specific
|
||||
case 0xeb: // Application-specific
|
||||
case 0xec: // Application-specific
|
||||
case 0xed: // Application-specific
|
||||
case 0xee: // Application-specific
|
||||
case 0xef: // Application-specific
|
||||
case 0xfe: // Comment
|
||||
segmentSize = stream.readInt(2, "be");
|
||||
stream.position += segmentSize - 2;
|
||||
break;
|
||||
|
||||
// 1 byte
|
||||
case 0xdf: // Expand reference image
|
||||
stream.position++;
|
||||
break;
|
||||
|
||||
// 2 bytes
|
||||
case 0xdc: // Define number of lines
|
||||
case 0xdd: // Define restart interval
|
||||
stream.position += 2;
|
||||
break;
|
||||
|
||||
// Start scan
|
||||
case 0xda: // Start of scan
|
||||
segmentSize = stream.readInt(2, "be");
|
||||
stream.position += segmentSize - 2;
|
||||
stream.continueUntil(0xff);
|
||||
break;
|
||||
|
||||
// Continue through encoded data
|
||||
case 0x00: // Byte stuffing
|
||||
case 0xd0: // Restart
|
||||
case 0xd1: // Restart
|
||||
case 0xd2: // Restart
|
||||
case 0xd3: // Restart
|
||||
case 0xd4: // Restart
|
||||
case 0xd5: // Restart
|
||||
case 0xd6: // Restart
|
||||
case 0xd7: // Restart
|
||||
stream.continueUntil(0xff);
|
||||
break;
|
||||
|
||||
default:
|
||||
stream.continueUntil(0xff);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error("Unable to parse JPEG successfully");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Portable executable extractor.
|
||||
* Assumes that the offset refers to an MZ header.
|
||||
*
|
||||
* @param {Uint8Array} bytes
|
||||
* @param {number} offset
|
||||
* @returns {Uint8Array}
|
||||
*/
|
||||
export function extractMZPE(bytes, offset) {
|
||||
const stream = new Stream(bytes.slice(offset));
|
||||
|
||||
// Move to PE header pointer
|
||||
stream.moveTo(0x3c);
|
||||
const peAddress = stream.readInt(4, "le");
|
||||
|
||||
// Move to PE header
|
||||
stream.moveTo(peAddress);
|
||||
|
||||
// Get number of sections
|
||||
stream.moveForwardsBy(6);
|
||||
const numSections = stream.readInt(2, "le");
|
||||
|
||||
// Get optional header size
|
||||
stream.moveForwardsBy(12);
|
||||
const optionalHeaderSize = stream.readInt(2, "le");
|
||||
|
||||
// Move past optional header to section header
|
||||
stream.moveForwardsBy(2 + optionalHeaderSize);
|
||||
|
||||
// Move to final section header
|
||||
stream.moveForwardsBy((numSections - 1) * 0x28);
|
||||
|
||||
// Get raw data info
|
||||
stream.moveForwardsBy(16);
|
||||
const rawDataSize = stream.readInt(4, "le");
|
||||
const rawDataAddress = stream.readInt(4, "le");
|
||||
|
||||
// Move to end of final section
|
||||
stream.moveTo(rawDataAddress + rawDataSize);
|
||||
|
||||
return stream.carve();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* PDF extractor.
|
||||
*
|
||||
* @param {Uint8Array} bytes
|
||||
* @param {number} offset
|
||||
* @returns {Uint8Array}
|
||||
*/
|
||||
export function extractPDF(bytes, offset) {
|
||||
const stream = new Stream(bytes.slice(offset));
|
||||
|
||||
// Find end-of-file marker (%%EOF)
|
||||
stream.continueUntil([0x25, 0x25, 0x45, 0x4f, 0x46]);
|
||||
stream.moveForwardsBy(5);
|
||||
stream.consumeIf(0x0d);
|
||||
stream.consumeIf(0x0a);
|
||||
|
||||
return stream.carve();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* ZIP extractor.
|
||||
*
|
||||
* @param {Uint8Array} bytes
|
||||
* @param {number} offset
|
||||
* @returns {Uint8Array}
|
||||
*/
|
||||
export function extractZIP(bytes, offset) {
|
||||
const stream = new Stream(bytes.slice(offset));
|
||||
|
||||
// Find End of central directory record
|
||||
stream.continueUntil([0x50, 0x4b, 0x05, 0x06]);
|
||||
|
||||
// Get comment length and consume
|
||||
stream.moveForwardsBy(20);
|
||||
const commentLength = stream.readInt(2, "le");
|
||||
stream.moveForwardsBy(commentLength);
|
||||
|
||||
return stream.carve();
|
||||
}
|
164
src/core/lib/Stream.mjs
Normal file
164
src/core/lib/Stream.mjs
Normal file
|
@ -0,0 +1,164 @@
|
|||
/**
|
||||
* Stream class for parsing binary protocols.
|
||||
*
|
||||
* @author n1474335 [n1474335@gmail.com]
|
||||
* @author tlwr [toby@toby.codes]
|
||||
* @copyright Crown Copyright 2018
|
||||
* @license Apache-2.0
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* A Stream can be used to traverse a binary blob, interpreting sections of it
|
||||
* as various data types.
|
||||
*
|
||||
* @param {Uint8Array} bytes
|
||||
* @param {Object} fileDetail
|
||||
* @param {string} fileDetail.mime
|
||||
* @param {string} fileDetail.ext
|
||||
* @param {number} fileDetail.offset
|
||||
* @returns {File}
|
||||
*/
|
||||
export default class Stream {
|
||||
|
||||
/**
|
||||
* Stream constructor.
|
||||
*
|
||||
* @param {Uint8Array} input
|
||||
*/
|
||||
constructor(input) {
|
||||
this.bytes = input;
|
||||
this.position = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a number of bytes from the current position.
|
||||
*
|
||||
* @param {number} numBytes
|
||||
* @returns {Uint8Array}
|
||||
*/
|
||||
getBytes(numBytes) {
|
||||
const newPosition = this.position + numBytes;
|
||||
const bytes = this.bytes.slice(this.position, newPosition);
|
||||
this.position = newPosition;
|
||||
return bytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Interpret the following bytes as a string, stopping at the next null byte or
|
||||
* the supplied limit.
|
||||
*
|
||||
* @param {number} numBytes
|
||||
* @returns {string}
|
||||
*/
|
||||
readString(numBytes) {
|
||||
let result = "";
|
||||
for (let i = this.position; i < this.position + numBytes; i++) {
|
||||
const currentByte = this.bytes[i];
|
||||
if (currentByte === 0) break;
|
||||
result += String.fromCharCode(currentByte);
|
||||
}
|
||||
this.position += numBytes;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Interpret the following bytes as an integer in big or little endian.
|
||||
*
|
||||
* @param {number} numBytes
|
||||
* @param {string} [endianness="be"]
|
||||
* @returns {number}
|
||||
*/
|
||||
readInt(numBytes, endianness="be") {
|
||||
let val = 0;
|
||||
if (endianness === "be") {
|
||||
for (let i = this.position; i < this.position + numBytes; i++) {
|
||||
val = val << 8;
|
||||
val |= this.bytes[i];
|
||||
}
|
||||
} else {
|
||||
for (let i = this.position + numBytes - 1; i >= this.position; i--) {
|
||||
val = val << 8;
|
||||
val |= this.bytes[i];
|
||||
}
|
||||
}
|
||||
this.position += numBytes;
|
||||
return val;
|
||||
}
|
||||
|
||||
/**
|
||||
* Consume the stream until we reach the specified byte or sequence of bytes.
|
||||
*
|
||||
* @param {number|List<number>} val
|
||||
*/
|
||||
continueUntil(val) {
|
||||
if (typeof val === "number") {
|
||||
while (++this.position < this.bytes.length && this.bytes[this.position] !== val) {
|
||||
continue;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// val is an array
|
||||
let found = false;
|
||||
while (!found && this.position < this.bytes.length) {
|
||||
while (++this.position < this.bytes.length && this.bytes[this.position] !== val[0]) {
|
||||
continue;
|
||||
}
|
||||
found = true;
|
||||
for (let i = 1; i < val.length; i++) {
|
||||
if (this.position + i > this.bytes.length || this.bytes[this.position + i] !== val[i])
|
||||
found = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Consume the next byte if it matches the supplied value.
|
||||
*
|
||||
* @param {number} val
|
||||
*/
|
||||
consumeIf(val) {
|
||||
if (this.bytes[this.position] === val)
|
||||
this.position++;
|
||||
}
|
||||
|
||||
/**
|
||||
* Move forwards through the stream by the specified number of bytes.
|
||||
*
|
||||
* @param {number} numBytes
|
||||
*/
|
||||
moveForwardsBy(numBytes) {
|
||||
this.position += numBytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Move to a specified position in the stream.
|
||||
*
|
||||
* @param {number} pos
|
||||
*/
|
||||
moveTo(pos) {
|
||||
if (pos < 0 || pos > this.bytes.length - 1)
|
||||
throw new Error("Cannot move to position " + pos + " in stream. Out of bounds.");
|
||||
this.position = pos;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if there are more bytes left in the stream.
|
||||
*
|
||||
* @returns {boolean}
|
||||
*/
|
||||
hasMore() {
|
||||
return this.position < this.bytes.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a slice of the stream up to the current position.
|
||||
*
|
||||
* @returns {Uint8Array}
|
||||
*/
|
||||
carve() {
|
||||
return this.bytes.slice(0, this.position);
|
||||
}
|
||||
|
||||
}
|
91
src/core/operations/ExtractFiles.mjs
Normal file
91
src/core/operations/ExtractFiles.mjs
Normal file
|
@ -0,0 +1,91 @@
|
|||
/**
|
||||
* @author n1474335 [n1474335@gmail.com]
|
||||
* @copyright Crown Copyright 2018
|
||||
* @license Apache-2.0
|
||||
*/
|
||||
|
||||
import Operation from "../Operation";
|
||||
// import OperationError from "../errors/OperationError";
|
||||
import Magic from "../lib/Magic";
|
||||
import Utils from "../Utils";
|
||||
import {extractFile} from "../lib/FileExtraction";
|
||||
|
||||
/**
|
||||
* Extract Files operation
|
||||
*/
|
||||
class ExtractFiles extends Operation {
|
||||
|
||||
/**
|
||||
* ExtractFiles constructor
|
||||
*/
|
||||
constructor() {
|
||||
super();
|
||||
|
||||
this.name = "Extract Files";
|
||||
this.module = "Default";
|
||||
this.description = "TODO";
|
||||
this.infoURL = "https://forensicswiki.org/wiki/File_Carving";
|
||||
this.inputType = "ArrayBuffer";
|
||||
this.outputType = "List<File>";
|
||||
this.presentType = "html";
|
||||
this.args = [];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {ArrayBuffer} input
|
||||
* @param {Object[]} args
|
||||
* @returns {List<File>}
|
||||
*/
|
||||
run(input, args) {
|
||||
const bytes = new Uint8Array(input);
|
||||
|
||||
// Scan for embedded files
|
||||
const fileDetails = scanForEmbeddedFiles(bytes);
|
||||
|
||||
// Extract each file that we support
|
||||
const files = [];
|
||||
fileDetails.forEach(fileDetail => {
|
||||
try {
|
||||
files.push(extractFile(bytes, fileDetail));
|
||||
} catch (err) {}
|
||||
});
|
||||
|
||||
return files;
|
||||
}
|
||||
|
||||
/**
|
||||
* Displays the files in HTML for web apps.
|
||||
*
|
||||
* @param {File[]} files
|
||||
* @returns {html}
|
||||
*/
|
||||
async present(files) {
|
||||
return await Utils.displayFilesAsHTML(files);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* TODO refactor
|
||||
* @param data
|
||||
*/
|
||||
function scanForEmbeddedFiles(data) {
|
||||
let type;
|
||||
const types = [];
|
||||
|
||||
for (let i = 0; i < data.length; i++) {
|
||||
type = Magic.magicFileType(data.slice(i));
|
||||
if (type) {
|
||||
types.push({
|
||||
offset: i,
|
||||
ext: type.ext,
|
||||
mime: type.mime,
|
||||
desc: type.desc
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return types;
|
||||
}
|
||||
|
||||
export default ExtractFiles;
|
Loading…
Add table
Add a link
Reference in a new issue