From 9a982f05ac7065a2253f8f2d963f6e636d79783a Mon Sep 17 00:00:00 2001 From: n1474335 Date: Thu, 14 Apr 2022 18:08:16 +0100 Subject: [PATCH] Extract operations now offer built-in Sort and Unique options. Unique operation offers option to count occurances. Closes #1334. --- src/core/lib/Extract.mjs | 27 +++-- src/core/lib/Sort.mjs | 105 +++++++++++++++++ src/core/operations/ExtractDates.mjs | 8 +- src/core/operations/ExtractDomains.mjs | 34 +++++- src/core/operations/ExtractEmailAddresses.mjs | 34 +++++- src/core/operations/ExtractFilePaths.mjs | 52 ++++++--- src/core/operations/ExtractIPAddresses.mjs | 71 ++++++----- src/core/operations/ExtractMACAddresses.mjs | 34 +++++- src/core/operations/ExtractURLs.mjs | 33 +++++- src/core/operations/Sort.mjs | 110 +----------------- src/core/operations/Strings.mjs | 50 +++++--- src/core/operations/Unique.mjs | 30 ++++- tests/node/tests/operations.mjs | 7 +- .../tests/ExtractEmailAddresses.mjs | 8 +- 14 files changed, 397 insertions(+), 206 deletions(-) create mode 100644 src/core/lib/Sort.mjs diff --git a/src/core/lib/Extract.mjs b/src/core/lib/Extract.mjs index 8b9f957e..18fec28c 100644 --- a/src/core/lib/Extract.mjs +++ b/src/core/lib/Extract.mjs @@ -12,15 +12,15 @@ * * @param {string} input * @param {RegExp} searchRegex - * @param {RegExp} removeRegex - A regular expression defining results to remove from the + * @param {RegExp} [removeRegex=null] - A regular expression defining results to remove from the * final list - * @param {boolean} includeTotal - Whether or not to include the total number of results + * @param {Function} [sortBy=null] - The sorting comparison function to apply + * @param {boolean} [unique=false] - Whether to unique the results * @returns {string} */ -export function search (input, searchRegex, removeRegex, includeTotal) { - let output = "", - total = 0, - match; +export function search(input, searchRegex, removeRegex=null, sortBy=null, unique=false) { + let results = []; + let match; while ((match = searchRegex.exec(input))) { // Moves pointer when an empty string is matched (prevents infinite loop) @@ -30,14 +30,19 @@ export function search (input, searchRegex, removeRegex, includeTotal) { if (removeRegex && removeRegex.test(match[0])) continue; - total++; - output += match[0] + "\n"; + + results.push(match[0]); } - if (includeTotal) - output = "Total found: " + total + "\n\n" + output; + if (sortBy) { + results = results.sort(sortBy); + } - return output; + if (unique) { + results = results.unique(); + } + + return results; } diff --git a/src/core/lib/Sort.mjs b/src/core/lib/Sort.mjs new file mode 100644 index 00000000..46bbebd9 --- /dev/null +++ b/src/core/lib/Sort.mjs @@ -0,0 +1,105 @@ +/** + * Sorting functions + * + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2022 + * @license Apache-2.0 + * + */ + +/** + * Comparison operation for sorting of strings ignoring case. + * + * @param {string} a + * @param {string} b + * @returns {number} + */ +export function caseInsensitiveSort(a, b) { + return a.toLowerCase().localeCompare(b.toLowerCase()); +} + + +/** + * Comparison operation for sorting of IPv4 addresses. + * + * @param {string} a + * @param {string} b + * @returns {number} + */ +export function ipSort(a, b) { + let a_ = a.split("."), + b_ = b.split("."); + + a_ = a_[0] * 0x1000000 + a_[1] * 0x10000 + a_[2] * 0x100 + a_[3] * 1; + b_ = b_[0] * 0x1000000 + b_[1] * 0x10000 + b_[2] * 0x100 + b_[3] * 1; + + if (isNaN(a_) && !isNaN(b_)) return 1; + if (!isNaN(a_) && isNaN(b_)) return -1; + if (isNaN(a_) && isNaN(b_)) return a.localeCompare(b); + + return a_ - b_; +} + +/** + * Comparison operation for sorting of numeric values. + * + * @author Chris van Marle + * @param {string} a + * @param {string} b + * @returns {number} + */ +export function numericSort(a, b) { + const a_ = a.split(/([^\d]+)/), + b_ = b.split(/([^\d]+)/); + + for (let i = 0; i < a_.length && i < b.length; ++i) { + if (isNaN(a_[i]) && !isNaN(b_[i])) return 1; // Numbers after non-numbers + if (!isNaN(a_[i]) && isNaN(b_[i])) return -1; + if (isNaN(a_[i]) && isNaN(b_[i])) { + const ret = a_[i].localeCompare(b_[i]); // Compare strings + if (ret !== 0) return ret; + } + if (!isNaN(a_[i]) && !isNaN(b_[i])) { // Compare numbers + if (a_[i] - b_[i] !== 0) return a_[i] - b_[i]; + } + } + + return a.localeCompare(b); +} + +/** + * Comparison operation for sorting of hexadecimal values. + * + * @author Chris van Marle + * @param {string} a + * @param {string} b + * @returns {number} + */ +export function hexadecimalSort(a, b) { + let a_ = a.split(/([^\da-f]+)/i), + b_ = b.split(/([^\da-f]+)/i); + + a_ = a_.map(v => { + const t = parseInt(v, 16); + return isNaN(t) ? v : t; + }); + + b_ = b_.map(v => { + const t = parseInt(v, 16); + return isNaN(t) ? v : t; + }); + + for (let i = 0; i < a_.length && i < b.length; ++i) { + if (isNaN(a_[i]) && !isNaN(b_[i])) return 1; // Numbers after non-numbers + if (!isNaN(a_[i]) && isNaN(b_[i])) return -1; + if (isNaN(a_[i]) && isNaN(b_[i])) { + const ret = a_[i].localeCompare(b_[i]); // Compare strings + if (ret !== 0) return ret; + } + if (!isNaN(a_[i]) && !isNaN(b_[i])) { // Compare numbers + if (a_[i] - b_[i] !== 0) return a_[i] - b_[i]; + } + } + + return a.localeCompare(b); +} diff --git a/src/core/operations/ExtractDates.mjs b/src/core/operations/ExtractDates.mjs index dfe93c88..74c177d3 100644 --- a/src/core/operations/ExtractDates.mjs +++ b/src/core/operations/ExtractDates.mjs @@ -44,7 +44,13 @@ class ExtractDates extends Operation { date3 = "(?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])[- /.](?:19|20)\\d\\d", // mm/dd/yyyy regex = new RegExp(date1 + "|" + date2 + "|" + date3, "ig"); - return search(input, regex, null, displayTotal); + const results = search(input, regex); + + if (displayTotal) { + return `Total found: ${results.length}\n\n${results.join("\n")}`; + } else { + return results.join("\n"); + } } } diff --git a/src/core/operations/ExtractDomains.mjs b/src/core/operations/ExtractDomains.mjs index ea9aa3af..c28efbb5 100644 --- a/src/core/operations/ExtractDomains.mjs +++ b/src/core/operations/ExtractDomains.mjs @@ -6,6 +6,7 @@ import Operation from "../Operation.mjs"; import { search, DOMAIN_REGEX } from "../lib/Extract.mjs"; +import { caseInsensitiveSort } from "../lib/Sort.mjs"; /** * Extract domains operation @@ -25,9 +26,19 @@ class ExtractDomains extends Operation { this.outputType = "string"; this.args = [ { - "name": "Display total", - "type": "boolean", - "value": true + name: "Display total", + type: "boolean", + value: false + }, + { + name: "Sort", + type: "boolean", + value: false + }, + { + name: "Unique", + type: "boolean", + value: false } ]; } @@ -38,8 +49,21 @@ class ExtractDomains extends Operation { * @returns {string} */ run(input, args) { - const displayTotal = args[0]; - return search(input, DOMAIN_REGEX, null, displayTotal); + const [displayTotal, sort, unique] = args; + + const results = search( + input, + DOMAIN_REGEX, + null, + sort ? caseInsensitiveSort : null, + unique + ); + + if (displayTotal) { + return `Total found: ${results.length}\n\n${results.join("\n")}`; + } else { + return results.join("\n"); + } } } diff --git a/src/core/operations/ExtractEmailAddresses.mjs b/src/core/operations/ExtractEmailAddresses.mjs index 43bf3b64..f50e1aaf 100644 --- a/src/core/operations/ExtractEmailAddresses.mjs +++ b/src/core/operations/ExtractEmailAddresses.mjs @@ -6,6 +6,7 @@ import Operation from "../Operation.mjs"; import { search } from "../lib/Extract.mjs"; +import { caseInsensitiveSort } from "../lib/Sort.mjs"; /** * Extract email addresses operation @@ -25,9 +26,19 @@ class ExtractEmailAddresses extends Operation { this.outputType = "string"; this.args = [ { - "name": "Display total", - "type": "boolean", - "value": false + name: "Display total", + type: "boolean", + value: false + }, + { + name: "Sort", + type: "boolean", + value: false + }, + { + name: "Unique", + type: "boolean", + value: false } ]; } @@ -38,10 +49,23 @@ class ExtractEmailAddresses extends Operation { * @returns {string} */ run(input, args) { - const displayTotal = args[0], + const [displayTotal, sort, unique] = args, // email regex from: https://www.regextester.com/98066 regex = /(?:[\u00A0-\uD7FF\uE000-\uFFFFa-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[\u00A0-\uD7FF\uE000-\uFFFFa-z0-9!#$%&'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[\u00A0-\uD7FF\uE000-\uFFFFa-z0-9](?:[\u00A0-\uD7FF\uE000-\uFFFFa-z0-9-]*[\u00A0-\uD7FF\uE000-\uFFFFa-z0-9])?\.)+[\u00A0-\uD7FF\uE000-\uFFFFa-z0-9](?:[\u00A0-\uD7FF\uE000-\uFFFFa-z0-9-]*[\u00A0-\uD7FF\uE000-\uFFFFa-z0-9])?|\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\.){3}\])/ig; - return search(input, regex, null, displayTotal); + + const results = search( + input, + regex, + null, + sort ? caseInsensitiveSort : null, + unique + ); + + if (displayTotal) { + return `Total found: ${results.length}\n\n${results.join("\n")}`; + } else { + return results.join("\n"); + } } } diff --git a/src/core/operations/ExtractFilePaths.mjs b/src/core/operations/ExtractFilePaths.mjs index 04d3f73e..5de76fe5 100644 --- a/src/core/operations/ExtractFilePaths.mjs +++ b/src/core/operations/ExtractFilePaths.mjs @@ -6,6 +6,7 @@ import Operation from "../Operation.mjs"; import { search } from "../lib/Extract.mjs"; +import { caseInsensitiveSort } from "../lib/Sort.mjs"; /** * Extract file paths operation @@ -25,19 +26,29 @@ class ExtractFilePaths extends Operation { this.outputType = "string"; this.args = [ { - "name": "Windows", - "type": "boolean", - "value": true + name: "Windows", + type: "boolean", + value: true }, { - "name": "UNIX", - "type": "boolean", - "value": true + name: "UNIX", + type: "boolean", + value: true }, { - "name": "Display total", - "type": "boolean", - "value": false + name: "Display total", + type: "boolean", + value: false + }, + { + name: "Sort", + type: "boolean", + value: false + }, + { + name: "Unique", + type: "boolean", + value: false } ]; } @@ -48,7 +59,7 @@ class ExtractFilePaths extends Operation { * @returns {string} */ run(input, args) { - const [includeWinPath, includeUnixPath, displayTotal] = args, + const [includeWinPath, includeUnixPath, displayTotal, sort, unique] = args, winDrive = "[A-Z]:\\\\", winName = "[A-Z\\d][A-Z\\d\\- '_\\(\\)~]{0,61}", winExt = "[A-Z\\d]{1,6}", @@ -65,12 +76,25 @@ class ExtractFilePaths extends Operation { filePaths = unixPath; } - if (filePaths) { - const regex = new RegExp(filePaths, "ig"); - return search(input, regex, null, displayTotal); - } else { + if (!filePaths) { return ""; } + + const regex = new RegExp(filePaths, "ig"); + const results = search( + input, + regex, + null, + sort ? caseInsensitiveSort : null, + unique + ); + + if (displayTotal) { + return `Total found: ${results.length}\n\n${results.join("\n")}`; + } else { + return results.join("\n"); + } + } } diff --git a/src/core/operations/ExtractIPAddresses.mjs b/src/core/operations/ExtractIPAddresses.mjs index 8d36a783..95e0a50f 100644 --- a/src/core/operations/ExtractIPAddresses.mjs +++ b/src/core/operations/ExtractIPAddresses.mjs @@ -6,6 +6,7 @@ import Operation from "../Operation.mjs"; import { search } from "../lib/Extract.mjs"; +import { ipSort } from "../lib/Sort.mjs"; /** * Extract IP addresses operation @@ -25,24 +26,34 @@ class ExtractIPAddresses extends Operation { this.outputType = "string"; this.args = [ { - "name": "IPv4", - "type": "boolean", - "value": true + name: "IPv4", + type: "boolean", + value: true }, { - "name": "IPv6", - "type": "boolean", - "value": false + name: "IPv6", + type: "boolean", + value: false }, { - "name": "Remove local IPv4 addresses", - "type": "boolean", - "value": false + name: "Remove local IPv4 addresses", + type: "boolean", + value: false }, { - "name": "Display total", - "type": "boolean", - "value": false + name: "Display total", + type: "boolean", + value: false + }, + { + name: "Sort", + type: "boolean", + value: false + }, + { + name: "Unique", + type: "boolean", + value: false } ]; } @@ -53,7 +64,7 @@ class ExtractIPAddresses extends Operation { * @returns {string} */ run(input, args) { - const [includeIpv4, includeIpv6, removeLocal, displayTotal] = args, + const [includeIpv4, includeIpv6, removeLocal, displayTotal, sort, unique] = args, ipv4 = "(?:(?:\\d|[01]?\\d\\d|2[0-4]\\d|25[0-5])\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d|\\d)(?:\\/\\d{1,2})?", ipv6 = "((?=.*::)(?!.*::.+::)(::)?([\\dA-F]{1,4}:(:|\\b)|){5}|([\\dA-F]{1,4}:){6})((([\\dA-F]{1,4}((?!\\3)::|:\\b|(?![\\dA-F])))|(?!\\2\\3)){2}|(((2[0-4]|1\\d|[1-9])?\\d|25[0-5])\\.?\\b){4})"; let ips = ""; @@ -66,23 +77,29 @@ class ExtractIPAddresses extends Operation { ips = ipv6; } - if (ips) { - const regex = new RegExp(ips, "ig"); + if (!ips) return ""; - if (removeLocal) { - const ten = "10\\..+", - oneninetwo = "192\\.168\\..+", - oneseventwo = "172\\.(?:1[6-9]|2\\d|3[01])\\..+", - onetwoseven = "127\\..+", - removeRegex = new RegExp("^(?:" + ten + "|" + oneninetwo + - "|" + oneseventwo + "|" + onetwoseven + ")"); + const regex = new RegExp(ips, "ig"); - return search(input, regex, removeRegex, displayTotal); - } else { - return search(input, regex, null, displayTotal); - } + const ten = "10\\..+", + oneninetwo = "192\\.168\\..+", + oneseventwo = "172\\.(?:1[6-9]|2\\d|3[01])\\..+", + onetwoseven = "127\\..+", + removeRegex = new RegExp("^(?:" + ten + "|" + oneninetwo + + "|" + oneseventwo + "|" + onetwoseven + ")"); + + const results = search( + input, + regex, + removeLocal ? removeRegex : null, + sort ? ipSort : null, + unique + ); + + if (displayTotal) { + return `Total found: ${results.length}\n\n${results.join("\n")}`; } else { - return ""; + return results.join("\n"); } } diff --git a/src/core/operations/ExtractMACAddresses.mjs b/src/core/operations/ExtractMACAddresses.mjs index d75b1a69..1689d18f 100644 --- a/src/core/operations/ExtractMACAddresses.mjs +++ b/src/core/operations/ExtractMACAddresses.mjs @@ -6,6 +6,7 @@ import Operation from "../Operation.mjs"; import { search } from "../lib/Extract.mjs"; +import { hexadecimalSort } from "../lib/Sort.mjs"; /** * Extract MAC addresses operation @@ -25,9 +26,19 @@ class ExtractMACAddresses extends Operation { this.outputType = "string"; this.args = [ { - "name": "Display total", - "type": "boolean", - "value": false + name: "Display total", + type: "boolean", + value: false + }, + { + name: "Sort", + type: "boolean", + value: false + }, + { + name: "Unique", + type: "boolean", + value: false } ]; } @@ -38,10 +49,21 @@ class ExtractMACAddresses extends Operation { * @returns {string} */ run(input, args) { - const displayTotal = args[0], - regex = /[A-F\d]{2}(?:[:-][A-F\d]{2}){5}/ig; + const [displayTotal, sort, unique] = args, + regex = /[A-F\d]{2}(?:[:-][A-F\d]{2}){5}/ig, + results = search( + input, + regex, + null, + sort ? hexadecimalSort : null, + unique + ); - return search(input, regex, null, displayTotal); + if (displayTotal) { + return `Total found: ${results.length}\n\n${results.join("\n")}`; + } else { + return results.join("\n"); + } } } diff --git a/src/core/operations/ExtractURLs.mjs b/src/core/operations/ExtractURLs.mjs index a5b26515..32cdb3a7 100644 --- a/src/core/operations/ExtractURLs.mjs +++ b/src/core/operations/ExtractURLs.mjs @@ -6,6 +6,7 @@ import Operation from "../Operation.mjs"; import { search, URL_REGEX } from "../lib/Extract.mjs"; +import { caseInsensitiveSort } from "../lib/Sort.mjs"; /** * Extract URLs operation @@ -25,9 +26,19 @@ class ExtractURLs extends Operation { this.outputType = "string"; this.args = [ { - "name": "Display total", - "type": "boolean", - "value": false + name: "Display total", + type: "boolean", + value: false + }, + { + name: "Sort", + type: "boolean", + value: false + }, + { + name: "Unique", + type: "boolean", + value: false } ]; } @@ -38,8 +49,20 @@ class ExtractURLs extends Operation { * @returns {string} */ run(input, args) { - const displayTotal = args[0]; - return search(input, URL_REGEX, null, displayTotal); + const [displayTotal, sort, unique] = args; + const results = search( + input, + URL_REGEX, + null, + sort ? caseInsensitiveSort : null, + unique + ); + + if (displayTotal) { + return `Total found: ${results.length}\n\n${results.join("\n")}`; + } else { + return results.join("\n"); + } } } diff --git a/src/core/operations/Sort.mjs b/src/core/operations/Sort.mjs index a1148f7c..19e4cbb2 100644 --- a/src/core/operations/Sort.mjs +++ b/src/core/operations/Sort.mjs @@ -7,6 +7,7 @@ import Operation from "../Operation.mjs"; import Utils from "../Utils.mjs"; import {INPUT_DELIM_OPTIONS} from "../lib/Delim.mjs"; +import {caseInsensitiveSort, ipSort, numericSort, hexadecimalSort} from "../lib/Sort.mjs"; /** * Sort operation @@ -57,120 +58,19 @@ class Sort extends Operation { if (order === "Alphabetical (case sensitive)") { sorted = sorted.sort(); } else if (order === "Alphabetical (case insensitive)") { - sorted = sorted.sort(Sort._caseInsensitiveSort); + sorted = sorted.sort(caseInsensitiveSort); } else if (order === "IP address") { - sorted = sorted.sort(Sort._ipSort); + sorted = sorted.sort(ipSort); } else if (order === "Numeric") { - sorted = sorted.sort(Sort._numericSort); + sorted = sorted.sort(numericSort); } else if (order === "Numeric (hexadecimal)") { - sorted = sorted.sort(Sort._hexadecimalSort); + sorted = sorted.sort(hexadecimalSort); } if (sortReverse) sorted.reverse(); return sorted.join(delim); } - /** - * Comparison operation for sorting of strings ignoring case. - * - * @private - * @param {string} a - * @param {string} b - * @returns {number} - */ - static _caseInsensitiveSort(a, b) { - return a.toLowerCase().localeCompare(b.toLowerCase()); - } - - - /** - * Comparison operation for sorting of IPv4 addresses. - * - * @private - * @param {string} a - * @param {string} b - * @returns {number} - */ - static _ipSort(a, b) { - let a_ = a.split("."), - b_ = b.split("."); - - a_ = a_[0] * 0x1000000 + a_[1] * 0x10000 + a_[2] * 0x100 + a_[3] * 1; - b_ = b_[0] * 0x1000000 + b_[1] * 0x10000 + b_[2] * 0x100 + b_[3] * 1; - - if (isNaN(a_) && !isNaN(b_)) return 1; - if (!isNaN(a_) && isNaN(b_)) return -1; - if (isNaN(a_) && isNaN(b_)) return a.localeCompare(b); - - return a_ - b_; - } - - /** - * Comparison operation for sorting of numeric values. - * - * @author Chris van Marle - * @private - * @param {string} a - * @param {string} b - * @returns {number} - */ - static _numericSort(a, b) { - const a_ = a.split(/([^\d]+)/), - b_ = b.split(/([^\d]+)/); - - for (let i = 0; i < a_.length && i < b.length; ++i) { - if (isNaN(a_[i]) && !isNaN(b_[i])) return 1; // Numbers after non-numbers - if (!isNaN(a_[i]) && isNaN(b_[i])) return -1; - if (isNaN(a_[i]) && isNaN(b_[i])) { - const ret = a_[i].localeCompare(b_[i]); // Compare strings - if (ret !== 0) return ret; - } - if (!isNaN(a_[i]) && !isNaN(b_[i])) { // Compare numbers - if (a_[i] - b_[i] !== 0) return a_[i] - b_[i]; - } - } - - return a.localeCompare(b); - } - - /** - * Comparison operation for sorting of hexadecimal values. - * - * @author Chris van Marle - * @private - * @param {string} a - * @param {string} b - * @returns {number} - */ - static _hexadecimalSort(a, b) { - let a_ = a.split(/([^\da-f]+)/i), - b_ = b.split(/([^\da-f]+)/i); - - a_ = a_.map(v => { - const t = parseInt(v, 16); - return isNaN(t) ? v : t; - }); - - b_ = b_.map(v => { - const t = parseInt(v, 16); - return isNaN(t) ? v : t; - }); - - for (let i = 0; i < a_.length && i < b.length; ++i) { - if (isNaN(a_[i]) && !isNaN(b_[i])) return 1; // Numbers after non-numbers - if (!isNaN(a_[i]) && isNaN(b_[i])) return -1; - if (isNaN(a_[i]) && isNaN(b_[i])) { - const ret = a_[i].localeCompare(b_[i]); // Compare strings - if (ret !== 0) return ret; - } - if (!isNaN(a_[i]) && !isNaN(b_[i])) { // Compare numbers - if (a_[i] - b_[i] !== 0) return a_[i] - b_[i]; - } - } - - return a.localeCompare(b); - } - } export default Sort; diff --git a/src/core/operations/Strings.mjs b/src/core/operations/Strings.mjs index ddf1d49d..dca2433a 100644 --- a/src/core/operations/Strings.mjs +++ b/src/core/operations/Strings.mjs @@ -7,6 +7,7 @@ import Operation from "../Operation.mjs"; import XRegExp from "xregexp"; import { search } from "../lib/Extract.mjs"; +import { caseInsensitiveSort } from "../lib/Sort.mjs"; /** * Strings operation @@ -27,27 +28,37 @@ class Strings extends Operation { this.outputType = "string"; this.args = [ { - "name": "Encoding", - "type": "option", - "value": ["Single byte", "16-bit littleendian", "16-bit bigendian", "All"] + name: "Encoding", + type: "option", + value: ["Single byte", "16-bit littleendian", "16-bit bigendian", "All"] }, { - "name": "Minimum length", - "type": "number", - "value": 4 + name: "Minimum length", + type: "number", + value: 4 }, { - "name": "Match", - "type": "option", - "value": [ + name: "Match", + type: "option", + value: [ "[ASCII]", "Alphanumeric + punctuation (A)", "All printable chars (A)", "Null-terminated strings (A)", "[Unicode]", "Alphanumeric + punctuation (U)", "All printable chars (U)", "Null-terminated strings (U)" ] }, { - "name": "Display total", - "type": "boolean", - "value": false + name: "Display total", + type: "boolean", + value: false + }, + { + name: "Sort", + type: "boolean", + value: false + }, + { + name: "Unique", + type: "boolean", + value: false } ]; } @@ -58,7 +69,7 @@ class Strings extends Operation { * @returns {string} */ run(input, args) { - const [encoding, minLen, matchType, displayTotal] = args, + const [encoding, minLen, matchType, displayTotal, sort, unique] = args, alphanumeric = "A-Z\\d", punctuation = "/\\-:.,_$%'\"()<>= !\\[\\]{}@", printable = "\x20-\x7e", @@ -108,8 +119,19 @@ class Strings extends Operation { } const regex = new XRegExp(strings, "ig"); + const results = search( + input, + regex, + null, + sort ? caseInsensitiveSort : null, + unique + ); - return search(input, regex, null, displayTotal); + if (displayTotal) { + return `Total found: ${results.length}\n\n${results.join("\n")}`; + } else { + return results.join("\n"); + } } } diff --git a/src/core/operations/Unique.mjs b/src/core/operations/Unique.mjs index 89de74c2..7ca2db66 100644 --- a/src/core/operations/Unique.mjs +++ b/src/core/operations/Unique.mjs @@ -26,9 +26,14 @@ class Unique extends Operation { this.outputType = "string"; this.args = [ { - "name": "Delimiter", - "type": "option", - "value": INPUT_DELIM_OPTIONS + name: "Delimiter", + type: "option", + value: INPUT_DELIM_OPTIONS + }, + { + name: "Display count", + type: "boolean", + value: false } ]; } @@ -39,8 +44,23 @@ class Unique extends Operation { * @returns {string} */ run(input, args) { - const delim = Utils.charRep(args[0]); - return input.split(delim).unique().join(delim); + const delim = Utils.charRep(args[0]), + count = args[1]; + + if (count) { + const valMap = input.split(delim).reduce((acc, curr) => { + if (Object.prototype.hasOwnProperty.call(acc, curr)) { + acc[curr]++; + } else { + acc[curr] = 1; + } + return acc; + }, {}); + + return Object.keys(valMap).map(val => `${valMap[val]} ${val}`).join(delim); + } else { + return input.split(delim).unique().join(delim); + } } } diff --git a/tests/node/tests/operations.mjs b/tests/node/tests/operations.mjs index 305debfb..783bd00a 100644 --- a/tests/node/tests/operations.mjs +++ b/tests/node/tests/operations.mjs @@ -471,7 +471,7 @@ color: white; }), it("Extract dates", () => { - assert.strictEqual(chef.extractDates("Don't Look a Gift Horse In The Mouth 01/02/1992").toString(), "01/02/1992\n"); + assert.strictEqual(chef.extractDates("Don't Look a Gift Horse In The Mouth 01/02/1992").toString(), "01/02/1992"); }), it("Filter", () => { @@ -859,7 +859,7 @@ pCGTErs= }), it("SQL Beautify", () => { - const result = chef.SQLBeautify(`SELECT MONTH, ID, RAIN_I, TEMP_F + const result = chef.SQLBeautify(`SELECT MONTH, ID, RAIN_I, TEMP_F FROM STATS;`); const expected = `SELECT MONTH, ID, @@ -879,8 +879,7 @@ FROM STATS;`; const result = chef.strings("smothering ampersand abreast", {displayTotal: true}); const expected = `Total found: 1 -smothering ampersand abreast -`; +smothering ampersand abreast`; assert.strictEqual(result.toString(), expected); }), diff --git a/tests/operations/tests/ExtractEmailAddresses.mjs b/tests/operations/tests/ExtractEmailAddresses.mjs index a0a01f67..658484cf 100644 --- a/tests/operations/tests/ExtractEmailAddresses.mjs +++ b/tests/operations/tests/ExtractEmailAddresses.mjs @@ -11,7 +11,7 @@ TestRegister.addTests([ { name: "Extract email address", input: "email@example.com\nfirstname.lastname@example.com\nemail@subdomain.example.com\nfirstname+lastname@example.com\n1234567890@example.com\nemail@example-one.com\n_______@example.com email@example.name\nemail@example.museum email@example.co.jp firstname-lastname@example.com", - expectedOutput: "email@example.com\nfirstname.lastname@example.com\nemail@subdomain.example.com\nfirstname+lastname@example.com\n1234567890@example.com\nemail@example-one.com\n_______@example.com\nemail@example.name\nemail@example.museum\nemail@example.co.jp\nfirstname-lastname@example.com\n", + expectedOutput: "email@example.com\nfirstname.lastname@example.com\nemail@subdomain.example.com\nfirstname+lastname@example.com\n1234567890@example.com\nemail@example-one.com\n_______@example.com\nemail@example.name\nemail@example.museum\nemail@example.co.jp\nfirstname-lastname@example.com", recipeConfig: [ { "op": "Extract email addresses", @@ -22,7 +22,7 @@ TestRegister.addTests([ { name: "Extract email address - Display total", input: "email@example.com\nfirstname.lastname@example.com\nemail@subdomain.example.com\nfirstname+lastname@example.com\n1234567890@example.com\nemail@example-one.com\n_______@example.com email@example.name\nemail@example.museum email@example.co.jp firstname-lastname@example.com", - expectedOutput: "Total found: 11\n\nemail@example.com\nfirstname.lastname@example.com\nemail@subdomain.example.com\nfirstname+lastname@example.com\n1234567890@example.com\nemail@example-one.com\n_______@example.com\nemail@example.name\nemail@example.museum\nemail@example.co.jp\nfirstname-lastname@example.com\n", + expectedOutput: "Total found: 11\n\nemail@example.com\nfirstname.lastname@example.com\nemail@subdomain.example.com\nfirstname+lastname@example.com\n1234567890@example.com\nemail@example-one.com\n_______@example.com\nemail@example.name\nemail@example.museum\nemail@example.co.jp\nfirstname-lastname@example.com", recipeConfig: [ { "op": "Extract email addresses", @@ -33,7 +33,7 @@ TestRegister.addTests([ { name: "Extract email address (Internationalized)", input: "\u4f0a\u662d\u5091@\u90f5\u4ef6.\u5546\u52d9 \u093e\u092e@\u092e\u094b\u0939\u0928.\u0908\u0928\u094d\u092b\u094b\n\u044e\u0437\u0435\u0440@\u0435\u043a\u0437\u0430\u043c\u043f\u043b.\u043a\u043e\u043c \u03b8\u03c3\u03b5\u03c1@\u03b5\u03c7\u03b1\u03bc\u03c0\u03bb\u03b5.\u03c8\u03bf\u03bc Jos\u1ec5Silv\u1ec5@googl\u1ec5.com\nJos\u1ec5Silv\u1ec5@google.com and Jos\u1ec5Silva@google.com\nFoO@BaR.CoM, john@192.168.10.100\ng\xf3mez@junk.br and Abc.123@example.com.\nuser+mailbox/department=shipping@example.com\n\u7528\u6237@\u4f8b\u5b50.\u5e7f\u544a\n\u0909\u092a\u092f\u094b\u0917\u0915\u0930\u094d\u0924\u093e@\u0909\u0926\u093e\u0939\u0930\u0923.\u0915\u0949\u092e\n\u044e\u0437\u0435\u0440@\u0435\u043a\u0437\u0430\u043c\u043f\u043b.\u043a\u043e\u043c\n\u03b8\u03c3\u03b5\u03c1@\u03b5\u03c7\u03b1\u03bc\u03c0\u03bb\u03b5.\u03c8\u03bf\u03bc\nD\xf6rte@S\xf6rensen.example.com\n\u0430\u0434\u0436\u0430\u0439@\u044d\u043a\u0437\u0430\u043c\u043f\u043b.\u0440\u0443\u0441\ntest@xn--bcher-kva.com", - expectedOutput: "\u4f0a\u662d\u5091@\u90f5\u4ef6.\u5546\u52d9\n\u093e\u092e@\u092e\u094b\u0939\u0928.\u0908\u0928\u094d\u092b\u094b\n\u044e\u0437\u0435\u0440@\u0435\u043a\u0437\u0430\u043c\u043f\u043b.\u043a\u043e\u043c\n\u03b8\u03c3\u03b5\u03c1@\u03b5\u03c7\u03b1\u03bc\u03c0\u03bb\u03b5.\u03c8\u03bf\u03bc\nJos\u1ec5Silv\u1ec5@googl\u1ec5.com\nJos\u1ec5Silv\u1ec5@google.com\nJos\u1ec5Silva@google.com\nFoO@BaR.CoM\njohn@192.168.10.100\ng\xf3mez@junk.br\nAbc.123@example.com\nuser+mailbox/department=shipping@example.com\n\u7528\u6237@\u4f8b\u5b50.\u5e7f\u544a\n\u0909\u092a\u092f\u094b\u0917\u0915\u0930\u094d\u0924\u093e@\u0909\u0926\u093e\u0939\u0930\u0923.\u0915\u0949\u092e\n\u044e\u0437\u0435\u0440@\u0435\u043a\u0437\u0430\u043c\u043f\u043b.\u043a\u043e\u043c\n\u03b8\u03c3\u03b5\u03c1@\u03b5\u03c7\u03b1\u03bc\u03c0\u03bb\u03b5.\u03c8\u03bf\u03bc\nD\xf6rte@S\xf6rensen.example.com\n\u0430\u0434\u0436\u0430\u0439@\u044d\u043a\u0437\u0430\u043c\u043f\u043b.\u0440\u0443\u0441\ntest@xn--bcher-kva.com\n", + expectedOutput: "\u4f0a\u662d\u5091@\u90f5\u4ef6.\u5546\u52d9\n\u093e\u092e@\u092e\u094b\u0939\u0928.\u0908\u0928\u094d\u092b\u094b\n\u044e\u0437\u0435\u0440@\u0435\u043a\u0437\u0430\u043c\u043f\u043b.\u043a\u043e\u043c\n\u03b8\u03c3\u03b5\u03c1@\u03b5\u03c7\u03b1\u03bc\u03c0\u03bb\u03b5.\u03c8\u03bf\u03bc\nJos\u1ec5Silv\u1ec5@googl\u1ec5.com\nJos\u1ec5Silv\u1ec5@google.com\nJos\u1ec5Silva@google.com\nFoO@BaR.CoM\njohn@192.168.10.100\ng\xf3mez@junk.br\nAbc.123@example.com\nuser+mailbox/department=shipping@example.com\n\u7528\u6237@\u4f8b\u5b50.\u5e7f\u544a\n\u0909\u092a\u092f\u094b\u0917\u0915\u0930\u094d\u0924\u093e@\u0909\u0926\u093e\u0939\u0930\u0923.\u0915\u0949\u092e\n\u044e\u0437\u0435\u0440@\u0435\u043a\u0437\u0430\u043c\u043f\u043b.\u043a\u043e\u043c\n\u03b8\u03c3\u03b5\u03c1@\u03b5\u03c7\u03b1\u03bc\u03c0\u03bb\u03b5.\u03c8\u03bf\u03bc\nD\xf6rte@S\xf6rensen.example.com\n\u0430\u0434\u0436\u0430\u0439@\u044d\u043a\u0437\u0430\u043c\u043f\u043b.\u0440\u0443\u0441\ntest@xn--bcher-kva.com", recipeConfig: [ { "op": "Extract email addresses", @@ -44,7 +44,7 @@ TestRegister.addTests([ { name: "Extract email address - Display total (Internationalized)", input: "\u4f0a\u662d\u5091@\u90f5\u4ef6.\u5546\u52d9 \u093e\u092e@\u092e\u094b\u0939\u0928.\u0908\u0928\u094d\u092b\u094b\n\u044e\u0437\u0435\u0440@\u0435\u043a\u0437\u0430\u043c\u043f\u043b.\u043a\u043e\u043c \u03b8\u03c3\u03b5\u03c1@\u03b5\u03c7\u03b1\u03bc\u03c0\u03bb\u03b5.\u03c8\u03bf\u03bc Jos\u1ec5Silv\u1ec5@googl\u1ec5.com\nJos\u1ec5Silv\u1ec5@google.com and Jos\u1ec5Silva@google.com\nFoO@BaR.CoM, john@192.168.10.100\ng\xf3mez@junk.br and Abc.123@example.com.\nuser+mailbox/department=shipping@example.com\n\u7528\u6237@\u4f8b\u5b50.\u5e7f\u544a\n\u0909\u092a\u092f\u094b\u0917\u0915\u0930\u094d\u0924\u093e@\u0909\u0926\u093e\u0939\u0930\u0923.\u0915\u0949\u092e\n\u044e\u0437\u0435\u0440@\u0435\u043a\u0437\u0430\u043c\u043f\u043b.\u043a\u043e\u043c\n\u03b8\u03c3\u03b5\u03c1@\u03b5\u03c7\u03b1\u03bc\u03c0\u03bb\u03b5.\u03c8\u03bf\u03bc\nD\xf6rte@S\xf6rensen.example.com\n\u0430\u0434\u0436\u0430\u0439@\u044d\u043a\u0437\u0430\u043c\u043f\u043b.\u0440\u0443\u0441\ntest@xn--bcher-kva.com", - expectedOutput: "Total found: 19\n\n\u4f0a\u662d\u5091@\u90f5\u4ef6.\u5546\u52d9\n\u093e\u092e@\u092e\u094b\u0939\u0928.\u0908\u0928\u094d\u092b\u094b\n\u044e\u0437\u0435\u0440@\u0435\u043a\u0437\u0430\u043c\u043f\u043b.\u043a\u043e\u043c\n\u03b8\u03c3\u03b5\u03c1@\u03b5\u03c7\u03b1\u03bc\u03c0\u03bb\u03b5.\u03c8\u03bf\u03bc\nJos\u1ec5Silv\u1ec5@googl\u1ec5.com\nJos\u1ec5Silv\u1ec5@google.com\nJos\u1ec5Silva@google.com\nFoO@BaR.CoM\njohn@192.168.10.100\ng\xf3mez@junk.br\nAbc.123@example.com\nuser+mailbox/department=shipping@example.com\n\u7528\u6237@\u4f8b\u5b50.\u5e7f\u544a\n\u0909\u092a\u092f\u094b\u0917\u0915\u0930\u094d\u0924\u093e@\u0909\u0926\u093e\u0939\u0930\u0923.\u0915\u0949\u092e\n\u044e\u0437\u0435\u0440@\u0435\u043a\u0437\u0430\u043c\u043f\u043b.\u043a\u043e\u043c\n\u03b8\u03c3\u03b5\u03c1@\u03b5\u03c7\u03b1\u03bc\u03c0\u03bb\u03b5.\u03c8\u03bf\u03bc\nD\xf6rte@S\xf6rensen.example.com\n\u0430\u0434\u0436\u0430\u0439@\u044d\u043a\u0437\u0430\u043c\u043f\u043b.\u0440\u0443\u0441\ntest@xn--bcher-kva.com\n", + expectedOutput: "Total found: 19\n\n\u4f0a\u662d\u5091@\u90f5\u4ef6.\u5546\u52d9\n\u093e\u092e@\u092e\u094b\u0939\u0928.\u0908\u0928\u094d\u092b\u094b\n\u044e\u0437\u0435\u0440@\u0435\u043a\u0437\u0430\u043c\u043f\u043b.\u043a\u043e\u043c\n\u03b8\u03c3\u03b5\u03c1@\u03b5\u03c7\u03b1\u03bc\u03c0\u03bb\u03b5.\u03c8\u03bf\u03bc\nJos\u1ec5Silv\u1ec5@googl\u1ec5.com\nJos\u1ec5Silv\u1ec5@google.com\nJos\u1ec5Silva@google.com\nFoO@BaR.CoM\njohn@192.168.10.100\ng\xf3mez@junk.br\nAbc.123@example.com\nuser+mailbox/department=shipping@example.com\n\u7528\u6237@\u4f8b\u5b50.\u5e7f\u544a\n\u0909\u092a\u092f\u094b\u0917\u0915\u0930\u094d\u0924\u093e@\u0909\u0926\u093e\u0939\u0930\u0923.\u0915\u0949\u092e\n\u044e\u0437\u0435\u0440@\u0435\u043a\u0437\u0430\u043c\u043f\u043b.\u043a\u043e\u043c\n\u03b8\u03c3\u03b5\u03c1@\u03b5\u03c7\u03b1\u03bc\u03c0\u03bb\u03b5.\u03c8\u03bf\u03bc\nD\xf6rte@S\xf6rensen.example.com\n\u0430\u0434\u0436\u0430\u0439@\u044d\u043a\u0437\u0430\u043c\u043f\u043b.\u0440\u0443\u0441\ntest@xn--bcher-kva.com", recipeConfig: [ { "op": "Extract email addresses",