Entropy Library and making existing operations use it

This commit is contained in:
n1073645 2020-01-07 12:11:18 +00:00
parent 9484941f66
commit 28244ac605
5 changed files with 15 additions and 187 deletions

View file

@ -591,44 +591,6 @@ class Utils {
return utf8 ? Utils.byteArrayToUtf8(arr) : Utils.byteArrayToChars(arr);
}
/**
* Calculates the Shannon entropy for a given set of data.
*
* @param {Uint8Array|ArrayBuffer} input
* @returns {number}
*/
static calculateShannonEntropy(data) {
if (data instanceof ArrayBuffer) {
data = new Uint8Array(data);
}
const prob = [],
occurrences = new Array(256).fill(0);
// Count occurrences of each byte in the input
let i;
for (i = 0; i < data.length; i++) {
occurrences[data[i]]++;
}
// Store probability list
for (i = 0; i < occurrences.length; i++) {
if (occurrences[i] > 0) {
prob.push(occurrences[i] / data.length);
}
}
// Calculate Shannon entropy
let entropy = 0,
p;
for (i = 0; i < prob.length; i++) {
p = prob[i];
entropy += p * Math.log(p) / Math.log(2);
}
return -entropy;
}
/**
* Parses CSV data and returns it as a two dimensional array or strings.

View file

@ -4,6 +4,7 @@ import Recipe from "../Recipe.mjs";
import Dish from "../Dish.mjs";
import {detectFileType} from "./FileType.mjs";
import chiSquared from "chi-squared";
import { freqDist, calculateShannonEntropyFromProb } from "./Entropy.mjs";
/**
* A class for detecting encodings, file types and byte frequencies and
@ -63,7 +64,7 @@ class Magic {
probability: Math.MIN_VALUE
}];
const inputFreq = this._freqDist();
const inputFreq = freqDist(this.inputBuffer);
const langFreqs = extLang ? EXTENSIVE_LANG_FREQS : COMMON_LANG_FREQS;
const chiSqrs = [];
@ -186,16 +187,12 @@ class Magic {
* @returns {number}
*/
calcEntropy() {
const prob = this._freqDist();
let entropy = 0,
p;
if (!(this.freqDist))
this.freqDist = freqDist(this.inputBuffer);
for (let i = 0; i < prob.length; i++) {
p = prob[i] / 100;
if (p === 0) continue;
entropy += p * Math.log(p) / Math.log(2);
}
return -entropy;
if (!(this.entropy))
this.entropy = calculateShannonEntropyFromProb(this.freqDist);
return this.entropy;
}
/**
@ -413,34 +410,6 @@ class Magic {
}
}
/**
* Calculates the number of times each byte appears in the input as a percentage
*
* @private
* @returns {number[]}
*/
_freqDist() {
if (this.freqDist) return this.freqDist;
const len = this.inputBuffer.length;
let i = len;
const counts = new Array(256).fill(0);
if (!len) {
this.freqDist = counts;
return this.freqDist;
}
while (i--) {
counts[this.inputBuffer[i]]++;
}
this.freqDist = counts.map(c => {
return c / len * 100;
});
return this.freqDist;
}
/**
* Generates a list of all patterns that operations claim to be able to decode.
*

View file

@ -9,6 +9,8 @@ import * as nodomtemp from "nodom";
import Operation from "../Operation.mjs";
import {calculateScanningEntropy, calculateShannonEntropy} from "../lib/Entropy.mjs";
const d3 = d3temp.default ? d3temp.default : d3temp;
const nodom = nodomtemp.default ? nodomtemp.default: nodomtemp;
@ -39,59 +41,6 @@ class Entropy extends Operation {
];
}
/**
* Calculates the frequency of bytes in the input.
*
* @param {Uint8Array} input
* @returns {number}
*/
calculateShannonEntropy(input) {
const prob = [],
occurrences = new Array(256).fill(0);
// Count occurrences of each byte in the input
let i;
for (i = 0; i < input.length; i++) {
occurrences[input[i]]++;
}
// Store probability list
for (i = 0; i < occurrences.length; i++) {
if (occurrences[i] > 0) {
prob.push(occurrences[i] / input.length);
}
}
// Calculate Shannon entropy
let entropy = 0,
p;
for (i = 0; i < prob.length; i++) {
p = prob[i];
entropy += p * Math.log(p) / Math.log(2);
}
return -entropy;
}
/**
* Calculates the scanning entropy of the input
*
* @param {Uint8Array} inputBytes
* @returns {Object}
*/
calculateScanningEntropy(inputBytes) {
const entropyData = [];
const binWidth = inputBytes.length < 256 ? 8 : 256;
for (let bytePos = 0; bytePos < inputBytes.length; bytePos += binWidth) {
const block = inputBytes.slice(bytePos, bytePos+binWidth);
entropyData.push(this.calculateShannonEntropy(block));
}
return { entropyData, binWidth };
}
/**
* Calculates the frequency of bytes in the input.
*
@ -394,10 +343,10 @@ class Entropy extends Operation {
return this.calculateByteFrequency(input);
case "Curve":
case "Image":
return this.calculateScanningEntropy(input).entropyData;
return calculateScanningEntropy(input, input.byteLength < 256 ? 8 : 256).entropyData;
case "Shannon scale":
default:
return this.calculateShannonEntropy(input);
return calculateShannonEntropy(input);
}
}

View file

@ -7,6 +7,7 @@
import Operation from "../Operation.mjs";
import OperationError from "../errors/OperationError.mjs";
import Utils from "../Utils.mjs";
import {calculateScanningEntropy} from "../lib/Entropy.mjs";
/**
* Extract Entropies operation
@ -81,60 +82,6 @@ class ExtractEntropies extends Operation {
];
}
/**
* Calculates the frequency of bytes in the input.
*
* @param {Uint8Array} input
* @returns {number}
*/
calculateShannonEntropy(input) {
const prob = [],
occurrences = new Array(256).fill(0);
// Count occurrences of each byte in the input
let i;
for (i = 0; i < input.length; i++) {
occurrences[input[i]]++;
}
// Store probability list
for (i = 0; i < occurrences.length; i++) {
if (occurrences[i] > 0) {
prob.push(occurrences[i] / input.length);
}
}
// Calculate Shannon entropy
let entropy = 0,
p;
for (i = 0; i < prob.length; i++) {
p = prob[i];
entropy += p * Math.log(p) / Math.log(2);
}
return -entropy;
}
/**
* Calculates the scanning entropy of the input.
*
* @param {Uint8Array} inputBytes
* @param {number} binWidth
* @returns {Object}
*/
calculateScanningEntropy(inputBytes, binWidth) {
const entropyData = [];
// const binWidth = inputBytes.length < 256 ? 8 : 256;
for (let bytePos = 0; bytePos < inputBytes.length; bytePos += binWidth) {
const block = inputBytes.slice(bytePos, bytePos+binWidth);
entropyData.push(this.calculateShannonEntropy(block));
}
return { entropyData, binWidth };
}
/**
* Calculates the average of a list of entropies.
*
@ -459,7 +406,7 @@ class ExtractEntropies extends Operation {
throw new OperationError("Cannot have a negative block size");
let result = [];
const entropies = this.calculateScanningEntropy(new Uint8Array(input), args[1]);
const entropies = calculateScanningEntropy(new Uint8Array(input), args[1]);
switch (args[0]) {
case "English Text":
result = this.getRange(entropies.entropyData, 3.5, 5, input, args[5], args[1]);

View file

@ -10,6 +10,7 @@ import InputWorker from "worker-loader?inline&fallback=false!../workers/InputWor
import Utils, { debounce } from "../../core/Utils.mjs";
import { toBase64 } from "../../core/lib/Base64.mjs";
import { isImage } from "../../core/lib/FileType.mjs";
import { calculateShannonEntropy } from "../../core/lib/Entropy.mjs";
/**
@ -874,7 +875,7 @@ class InputWaiter {
// Only preserve for high-entropy inputs
const data = Utils.strToArrayBuffer(input);
const entropy = Utils.calculateShannonEntropy(data);
const entropy = calculateShannonEntropy(data);
if (entropy > 6) {
this.app.alert(preserveStr, 6000);