From 28244ac6050c1980bb76535e7bec0ce7885428e0 Mon Sep 17 00:00:00 2001 From: n1073645 Date: Tue, 7 Jan 2020 12:11:18 +0000 Subject: [PATCH] Entropy Library and making existing operations use it --- src/core/Utils.mjs | 38 --------------- src/core/lib/Magic.mjs | 45 +++--------------- src/core/operations/Entropy.mjs | 59 ++---------------------- src/core/operations/ExtractEntropies.mjs | 57 +---------------------- src/web/waiters/InputWaiter.mjs | 3 +- 5 files changed, 15 insertions(+), 187 deletions(-) diff --git a/src/core/Utils.mjs b/src/core/Utils.mjs index c99eccc9..0f10cb8a 100755 --- a/src/core/Utils.mjs +++ b/src/core/Utils.mjs @@ -591,44 +591,6 @@ class Utils { return utf8 ? Utils.byteArrayToUtf8(arr) : Utils.byteArrayToChars(arr); } - /** - * Calculates the Shannon entropy for a given set of data. - * - * @param {Uint8Array|ArrayBuffer} input - * @returns {number} - */ - static calculateShannonEntropy(data) { - if (data instanceof ArrayBuffer) { - data = new Uint8Array(data); - } - const prob = [], - occurrences = new Array(256).fill(0); - - // Count occurrences of each byte in the input - let i; - for (i = 0; i < data.length; i++) { - occurrences[data[i]]++; - } - - // Store probability list - for (i = 0; i < occurrences.length; i++) { - if (occurrences[i] > 0) { - prob.push(occurrences[i] / data.length); - } - } - - // Calculate Shannon entropy - let entropy = 0, - p; - - for (i = 0; i < prob.length; i++) { - p = prob[i]; - entropy += p * Math.log(p) / Math.log(2); - } - - return -entropy; - } - /** * Parses CSV data and returns it as a two dimensional array or strings. diff --git a/src/core/lib/Magic.mjs b/src/core/lib/Magic.mjs index 79c64452..30f64a88 100644 --- a/src/core/lib/Magic.mjs +++ b/src/core/lib/Magic.mjs @@ -4,6 +4,7 @@ import Recipe from "../Recipe.mjs"; import Dish from "../Dish.mjs"; import {detectFileType} from "./FileType.mjs"; import chiSquared from "chi-squared"; +import { freqDist, calculateShannonEntropyFromProb } from "./Entropy.mjs"; /** * A class for detecting encodings, file types and byte frequencies and @@ -63,7 +64,7 @@ class Magic { probability: Math.MIN_VALUE }]; - const inputFreq = this._freqDist(); + const inputFreq = freqDist(this.inputBuffer); const langFreqs = extLang ? EXTENSIVE_LANG_FREQS : COMMON_LANG_FREQS; const chiSqrs = []; @@ -186,16 +187,12 @@ class Magic { * @returns {number} */ calcEntropy() { - const prob = this._freqDist(); - let entropy = 0, - p; + if (!(this.freqDist)) + this.freqDist = freqDist(this.inputBuffer); - for (let i = 0; i < prob.length; i++) { - p = prob[i] / 100; - if (p === 0) continue; - entropy += p * Math.log(p) / Math.log(2); - } - return -entropy; + if (!(this.entropy)) + this.entropy = calculateShannonEntropyFromProb(this.freqDist); + return this.entropy; } /** @@ -413,34 +410,6 @@ class Magic { } } - /** - * Calculates the number of times each byte appears in the input as a percentage - * - * @private - * @returns {number[]} - */ - _freqDist() { - if (this.freqDist) return this.freqDist; - - const len = this.inputBuffer.length; - let i = len; - const counts = new Array(256).fill(0); - - if (!len) { - this.freqDist = counts; - return this.freqDist; - } - - while (i--) { - counts[this.inputBuffer[i]]++; - } - - this.freqDist = counts.map(c => { - return c / len * 100; - }); - return this.freqDist; - } - /** * Generates a list of all patterns that operations claim to be able to decode. * diff --git a/src/core/operations/Entropy.mjs b/src/core/operations/Entropy.mjs index 3e93cc57..7e120aa8 100644 --- a/src/core/operations/Entropy.mjs +++ b/src/core/operations/Entropy.mjs @@ -9,6 +9,8 @@ import * as nodomtemp from "nodom"; import Operation from "../Operation.mjs"; +import {calculateScanningEntropy, calculateShannonEntropy} from "../lib/Entropy.mjs"; + const d3 = d3temp.default ? d3temp.default : d3temp; const nodom = nodomtemp.default ? nodomtemp.default: nodomtemp; @@ -39,59 +41,6 @@ class Entropy extends Operation { ]; } - /** - * Calculates the frequency of bytes in the input. - * - * @param {Uint8Array} input - * @returns {number} - */ - calculateShannonEntropy(input) { - const prob = [], - occurrences = new Array(256).fill(0); - - // Count occurrences of each byte in the input - let i; - for (i = 0; i < input.length; i++) { - occurrences[input[i]]++; - } - - // Store probability list - for (i = 0; i < occurrences.length; i++) { - if (occurrences[i] > 0) { - prob.push(occurrences[i] / input.length); - } - } - - // Calculate Shannon entropy - let entropy = 0, - p; - - for (i = 0; i < prob.length; i++) { - p = prob[i]; - entropy += p * Math.log(p) / Math.log(2); - } - - return -entropy; - } - - /** - * Calculates the scanning entropy of the input - * - * @param {Uint8Array} inputBytes - * @returns {Object} - */ - calculateScanningEntropy(inputBytes) { - const entropyData = []; - const binWidth = inputBytes.length < 256 ? 8 : 256; - - for (let bytePos = 0; bytePos < inputBytes.length; bytePos += binWidth) { - const block = inputBytes.slice(bytePos, bytePos+binWidth); - entropyData.push(this.calculateShannonEntropy(block)); - } - - return { entropyData, binWidth }; - } - /** * Calculates the frequency of bytes in the input. * @@ -394,10 +343,10 @@ class Entropy extends Operation { return this.calculateByteFrequency(input); case "Curve": case "Image": - return this.calculateScanningEntropy(input).entropyData; + return calculateScanningEntropy(input, input.byteLength < 256 ? 8 : 256).entropyData; case "Shannon scale": default: - return this.calculateShannonEntropy(input); + return calculateShannonEntropy(input); } } diff --git a/src/core/operations/ExtractEntropies.mjs b/src/core/operations/ExtractEntropies.mjs index 555faece..2abca158 100644 --- a/src/core/operations/ExtractEntropies.mjs +++ b/src/core/operations/ExtractEntropies.mjs @@ -7,6 +7,7 @@ import Operation from "../Operation.mjs"; import OperationError from "../errors/OperationError.mjs"; import Utils from "../Utils.mjs"; +import {calculateScanningEntropy} from "../lib/Entropy.mjs"; /** * Extract Entropies operation @@ -81,60 +82,6 @@ class ExtractEntropies extends Operation { ]; } - /** - * Calculates the frequency of bytes in the input. - * - * @param {Uint8Array} input - * @returns {number} - */ - calculateShannonEntropy(input) { - const prob = [], - occurrences = new Array(256).fill(0); - - // Count occurrences of each byte in the input - let i; - for (i = 0; i < input.length; i++) { - occurrences[input[i]]++; - } - - // Store probability list - for (i = 0; i < occurrences.length; i++) { - if (occurrences[i] > 0) { - prob.push(occurrences[i] / input.length); - } - } - - // Calculate Shannon entropy - let entropy = 0, - p; - - for (i = 0; i < prob.length; i++) { - p = prob[i]; - entropy += p * Math.log(p) / Math.log(2); - } - - return -entropy; - } - - /** - * Calculates the scanning entropy of the input. - * - * @param {Uint8Array} inputBytes - * @param {number} binWidth - * @returns {Object} - */ - calculateScanningEntropy(inputBytes, binWidth) { - const entropyData = []; - // const binWidth = inputBytes.length < 256 ? 8 : 256; - - for (let bytePos = 0; bytePos < inputBytes.length; bytePos += binWidth) { - const block = inputBytes.slice(bytePos, bytePos+binWidth); - entropyData.push(this.calculateShannonEntropy(block)); - } - - return { entropyData, binWidth }; - } - /** * Calculates the average of a list of entropies. * @@ -459,7 +406,7 @@ class ExtractEntropies extends Operation { throw new OperationError("Cannot have a negative block size"); let result = []; - const entropies = this.calculateScanningEntropy(new Uint8Array(input), args[1]); + const entropies = calculateScanningEntropy(new Uint8Array(input), args[1]); switch (args[0]) { case "English Text": result = this.getRange(entropies.entropyData, 3.5, 5, input, args[5], args[1]); diff --git a/src/web/waiters/InputWaiter.mjs b/src/web/waiters/InputWaiter.mjs index fa05f22b..e73b48e9 100644 --- a/src/web/waiters/InputWaiter.mjs +++ b/src/web/waiters/InputWaiter.mjs @@ -10,6 +10,7 @@ import InputWorker from "worker-loader?inline&fallback=false!../workers/InputWor import Utils, { debounce } from "../../core/Utils.mjs"; import { toBase64 } from "../../core/lib/Base64.mjs"; import { isImage } from "../../core/lib/FileType.mjs"; +import { calculateShannonEntropy } from "../../core/lib/Entropy.mjs"; /** @@ -874,7 +875,7 @@ class InputWaiter { // Only preserve for high-entropy inputs const data = Utils.strToArrayBuffer(input); - const entropy = Utils.calculateShannonEntropy(data); + const entropy = calculateShannonEntropy(data); if (entropy > 6) { this.app.alert(preserveStr, 6000);