mirror of
https://github.com/gchq/CyberChef.git
synced 2025-04-20 06:55:08 -04:00
Merge pull request #630 from MShwed/feature/mime-rfc2047
Feature: MIME RFC2047 Decoding
This commit is contained in:
commit
7906f9d560
4 changed files with 263 additions and 1 deletions
|
@ -76,7 +76,8 @@
|
|||
"Rison Encode",
|
||||
"Rison Decode",
|
||||
"To Modhex",
|
||||
"From Modhex"
|
||||
"From Modhex",
|
||||
"MIME Decoding"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
171
src/core/operations/MIMEDecoding.mjs
Normal file
171
src/core/operations/MIMEDecoding.mjs
Normal file
|
@ -0,0 +1,171 @@
|
|||
/**
|
||||
* @author mshwed [m@ttshwed.com]
|
||||
* @copyright Crown Copyright 2019
|
||||
* @license Apache-2.0
|
||||
*/
|
||||
|
||||
import Operation from "../Operation.mjs";
|
||||
import OperationError from "../errors/OperationError.mjs";
|
||||
import Utils from "../Utils.mjs";
|
||||
import { fromHex } from "../lib/Hex.mjs";
|
||||
import { fromBase64 } from "../lib/Base64.mjs";
|
||||
import cptable from "codepage";
|
||||
|
||||
/**
|
||||
* MIME Decoding operation
|
||||
*/
|
||||
class MIMEDecoding extends Operation {
|
||||
|
||||
/**
|
||||
* MIMEDecoding constructor
|
||||
*/
|
||||
constructor() {
|
||||
super();
|
||||
|
||||
this.name = "MIME Decoding";
|
||||
this.module = "Default";
|
||||
this.description = "Enables the decoding of MIME message header extensions for non-ASCII text";
|
||||
this.infoURL = "https://tools.ietf.org/html/rfc2047";
|
||||
this.inputType = "byteArray";
|
||||
this.outputType = "string";
|
||||
this.args = [];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {byteArray} input
|
||||
* @param {Object[]} args
|
||||
* @returns {string}
|
||||
*/
|
||||
run(input, args) {
|
||||
const mimeEncodedText = Utils.byteArrayToUtf8(input);
|
||||
const encodedHeaders = mimeEncodedText.replace(/\r\n/g, "\n");
|
||||
|
||||
const decodedHeader = this.decodeHeaders(encodedHeaders);
|
||||
|
||||
return decodedHeader;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode MIME header strings
|
||||
*
|
||||
* @param headerString
|
||||
*/
|
||||
decodeHeaders(headerString) {
|
||||
// No encoded words detected
|
||||
let i = headerString.indexOf("=?");
|
||||
if (i === -1) return headerString;
|
||||
|
||||
let decodedHeaders = headerString.slice(0, i);
|
||||
let header = headerString.slice(i);
|
||||
|
||||
let isBetweenWords = false;
|
||||
let start, cur, charset, encoding, j, end, text;
|
||||
while (header.length > -1) {
|
||||
start = header.indexOf("=?");
|
||||
if (start === -1) break;
|
||||
cur = start + "=?".length;
|
||||
|
||||
i = header.slice(cur).indexOf("?");
|
||||
if (i === -1) break;
|
||||
|
||||
charset = header.slice(cur, cur + i);
|
||||
cur += i + "?".length;
|
||||
|
||||
if (header.length < cur + "Q??=".length) break;
|
||||
|
||||
encoding = header[cur];
|
||||
cur += 1;
|
||||
|
||||
if (header[cur] !== "?") break;
|
||||
|
||||
cur += 1;
|
||||
|
||||
j = header.slice(cur).indexOf("?=");
|
||||
if (j === -1) break;
|
||||
|
||||
text = header.slice(cur, cur + j);
|
||||
end = cur + j + "?=".length;
|
||||
|
||||
if (encoding.toLowerCase() === "b") {
|
||||
text = fromBase64(text);
|
||||
} else if (encoding.toLowerCase() === "q") {
|
||||
text = this.parseQEncodedWord(text);
|
||||
} else {
|
||||
isBetweenWords = false;
|
||||
decodedHeaders += header.slice(0, start + 2);
|
||||
header = header.slice(start + 2);
|
||||
}
|
||||
|
||||
if (start > 0 && (!isBetweenWords || header.slice(0, start).search(/\S/g) > -1)) {
|
||||
decodedHeaders += header.slice(0, start);
|
||||
}
|
||||
|
||||
decodedHeaders += this.convertFromCharset(charset, text);
|
||||
|
||||
header = header.slice(end);
|
||||
isBetweenWords = true;
|
||||
}
|
||||
|
||||
if (header.length > 0) {
|
||||
decodedHeaders += header;
|
||||
}
|
||||
|
||||
return decodedHeaders;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts decoded text for supported charsets.
|
||||
* Supports UTF-8, US-ASCII, ISO-8859-*
|
||||
*
|
||||
* @param encodedWord
|
||||
*/
|
||||
convertFromCharset(charset, encodedText) {
|
||||
charset = charset.toLowerCase();
|
||||
const parsedCharset = charset.split("-");
|
||||
|
||||
if (parsedCharset.length === 2 && parsedCharset[0] === "utf" && charset === "utf-8") {
|
||||
return cptable.utils.decode(65001, encodedText);
|
||||
} else if (parsedCharset.length === 2 && charset === "us-ascii") {
|
||||
return cptable.utils.decode(20127, encodedText);
|
||||
} else if (parsedCharset.length === 3 && parsedCharset[0] === "iso" && parsedCharset[1] === "8859") {
|
||||
const isoCharset = parseInt(parsedCharset[2], 10);
|
||||
if (isoCharset >= 1 && isoCharset <= 16) {
|
||||
return cptable.utils.decode(28590 + isoCharset, encodedText);
|
||||
}
|
||||
}
|
||||
|
||||
throw new OperationError("Unhandled Charset");
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a Q encoded word
|
||||
*
|
||||
* @param encodedWord
|
||||
*/
|
||||
parseQEncodedWord(encodedWord) {
|
||||
let decodedWord = "";
|
||||
for (let i = 0; i < encodedWord.length; i++) {
|
||||
if (encodedWord[i] === "_") {
|
||||
decodedWord += " ";
|
||||
// Parse hex encoding
|
||||
} else if (encodedWord[i] === "=") {
|
||||
if ((i + 2) >= encodedWord.length) throw new OperationError("Incorrectly Encoded Word");
|
||||
const decodedHex = Utils.byteArrayToChars(fromHex(encodedWord.substring(i + 1, i + 3)));
|
||||
decodedWord += decodedHex;
|
||||
i += 2;
|
||||
} else if (
|
||||
(encodedWord[i].charCodeAt(0) >= " ".charCodeAt(0) && encodedWord[i].charCodeAt(0) <= "~".charCodeAt(0)) ||
|
||||
encodedWord[i] === "\n" ||
|
||||
encodedWord[i] === "\r" ||
|
||||
encodedWord[i] === "\t") {
|
||||
decodedWord += encodedWord[i];
|
||||
} else {
|
||||
throw new OperationError("Incorrectly Encoded Word");
|
||||
}
|
||||
}
|
||||
|
||||
return decodedWord;
|
||||
}
|
||||
}
|
||||
|
||||
export default MIMEDecoding;
|
|
@ -104,6 +104,7 @@ import "./tests/LZNT1Decompress.mjs";
|
|||
import "./tests/LZString.mjs";
|
||||
import "./tests/Magic.mjs";
|
||||
import "./tests/Media.mjs";
|
||||
import "./tests/MIMEDecoding.mjs";
|
||||
import "./tests/Modhex.mjs";
|
||||
import "./tests/MorseCode.mjs";
|
||||
import "./tests/MS.mjs";
|
||||
|
|
89
tests/operations/tests/MIMEDecoding.mjs
Normal file
89
tests/operations/tests/MIMEDecoding.mjs
Normal file
|
@ -0,0 +1,89 @@
|
|||
/**
|
||||
* MIME Header Decoding tests
|
||||
*
|
||||
* @author mshwed [m@ttshwed.com]
|
||||
* @copyright Crown Copyright 2019
|
||||
* @license Apache-2.0
|
||||
*/
|
||||
|
||||
import TestRegister from "../../lib/TestRegister.mjs";
|
||||
|
||||
TestRegister.addTests([
|
||||
{
|
||||
name: "Encoded comments",
|
||||
input: "(=?ISO-8859-1?Q?a?=)",
|
||||
expectedOutput: "(a)",
|
||||
recipeConfig: [
|
||||
{
|
||||
"op": "MIME Decoding",
|
||||
"args": []
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
name: "Encoded adjacent comments whitespace",
|
||||
input: "(=?ISO-8859-1?Q?a?= b)",
|
||||
expectedOutput: "(a b)",
|
||||
recipeConfig: [
|
||||
{
|
||||
"op": "MIME Decoding",
|
||||
"args": []
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
name: "Encoded adjacent single whitespace ignored",
|
||||
input: "(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)",
|
||||
expectedOutput: "(ab)",
|
||||
recipeConfig: [
|
||||
{
|
||||
"op": "MIME Decoding",
|
||||
"args": []
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
name: "Encoded adjacent double whitespace ignored",
|
||||
input: "(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)",
|
||||
expectedOutput: "(ab)",
|
||||
recipeConfig: [
|
||||
{
|
||||
"op": "MIME Decoding",
|
||||
"args": []
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
name: "Encoded adjacent CRLF whitespace ignored",
|
||||
input: "(=?ISO-8859-1?Q?a?=\r\n =?ISO-8859-1?Q?b?=)",
|
||||
expectedOutput: "(ab)",
|
||||
recipeConfig: [
|
||||
{
|
||||
"op": "MIME Decoding",
|
||||
"args": []
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
name: "UTF-8 Encodings Multiple Headers",
|
||||
input: "=?utf-8?q?=C3=89ric?= <eric@example.org>, =?utf-8?q?Ana=C3=AFs?= <anais@example.org>",
|
||||
expectedOutput: "Éric <eric@example.org>, Anaïs <anais@example.org>",
|
||||
recipeConfig: [
|
||||
{
|
||||
"op": "MIME Decoding",
|
||||
"args": []
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
name: "ISO Decoding",
|
||||
input: "From: =?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>\nTo: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>\nCC: =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>\nSubject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=\n=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=",
|
||||
expectedOutput: "From: Keith Moore <moore@cs.utk.edu>\nTo: Keld Jørn Simonsen <keld@dkuug.dk>\nCC: André Pirard <PIRARD@vm1.ulg.ac.be>\nSubject: If you can read this you understand the example.",
|
||||
recipeConfig: [
|
||||
{
|
||||
"op": "MIME Decoding",
|
||||
"args": []
|
||||
}
|
||||
]
|
||||
}
|
||||
]);
|
Loading…
Add table
Add a link
Reference in a new issue