mirror of
https://github.com/gchq/CyberChef.git
synced 2025-04-20 14:56:19 -04:00
ESM: Port Extract.js module
This commit is contained in:
parent
3c214ce17c
commit
b3ee251ee3
10 changed files with 586 additions and 333 deletions
55
src/core/operations/ExtractURLs.mjs
Normal file
55
src/core/operations/ExtractURLs.mjs
Normal file
|
@ -0,0 +1,55 @@
|
|||
/**
|
||||
* @author n1474335 [n1474335@gmail.com]
|
||||
* @copyright Crown Copyright 2016
|
||||
* @license Apache-2.0
|
||||
*/
|
||||
|
||||
import Operation from "../Operation";
|
||||
import { search } from "../lib/Extract";
|
||||
|
||||
/**
|
||||
* Extract URLs operation
|
||||
*/
|
||||
class ExtractURLs extends Operation {
|
||||
|
||||
/**
|
||||
* ExtractURLs constructor
|
||||
*/
|
||||
constructor() {
|
||||
super();
|
||||
|
||||
this.name = "Extract URLs";
|
||||
this.module = "Regex";
|
||||
this.description = "Extracts Uniform Resource Locators (URLs) from the input. The protocol (http, ftp etc.) is required otherwise there will be far too many false positives.";
|
||||
this.inputType = "string";
|
||||
this.outputType = "string";
|
||||
this.args = [
|
||||
{
|
||||
"name": "Display total",
|
||||
"type": "boolean",
|
||||
"value": false
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} input
|
||||
* @param {Object[]} args
|
||||
* @returns {string}
|
||||
*/
|
||||
run(input, args) {
|
||||
const displayTotal = args[0],
|
||||
protocol = "[A-Z]+://",
|
||||
hostname = "[-\\w]+(?:\\.\\w[-\\w]*)+",
|
||||
port = ":\\d+";
|
||||
let path = "/[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]*";
|
||||
|
||||
path += "(?:[.!,?]+[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]+)*";
|
||||
const regex = new RegExp(protocol + hostname + "(?:" + port +
|
||||
")?(?:" + path + ")?", "ig");
|
||||
return search(input, regex, null, displayTotal);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
export default ExtractURLs;
|
Loading…
Add table
Add a link
Reference in a new issue