From 159b80e8533ade6c3774762dd09e10e8b98f9a5e Mon Sep 17 00:00:00 2001 From: gchqdev364 <40990156+gchqdev364@users.noreply.github.com> Date: Mon, 12 May 2025 13:02:29 +0000 Subject: [PATCH 1/5] Added explicit tests for IPv4 addresses in decimal or octal. --- src/core/operations/ExtractIPAddresses.mjs | 3 +- tests/operations/index.mjs | 1 + tests/operations/tests/ExtractIPAddresses.mjs | 78 +++++++++++++++++++ 3 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 tests/operations/tests/ExtractIPAddresses.mjs diff --git a/src/core/operations/ExtractIPAddresses.mjs b/src/core/operations/ExtractIPAddresses.mjs index 97b52478..1fe9c96d 100644 --- a/src/core/operations/ExtractIPAddresses.mjs +++ b/src/core/operations/ExtractIPAddresses.mjs @@ -65,7 +65,8 @@ class ExtractIPAddresses extends Operation { */ run(input, args) { const [includeIpv4, includeIpv6, removeLocal, displayTotal, sort, unique] = args, - ipv4 = "(?:(?:\\d|[01]?\\d\\d|2[0-4]\\d|25[0-5])\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d|\\d)(?:\\/\\d{1,2})?", + // This regex has two major options; decimal values 0-255 or octal values prefixed with 0 up to 377 + ipv4 = "(?:(?:(?:25[0-5]|2[0-4]\\d|1?[1-9]\\d|\\d)\\.){3}(?:(?:25[0-5]|2[0-4]\\d|1?[1-9]\\d|\\d)))|(?:(?:(?:0[1-3]?[0-7]{0,2})\\.){3}(?:0[1-3]?[0-7]{0,2}))", ipv6 = "((?=.*::)(?!.*::.+::)(::)?([\\dA-F]{1,4}:(:|\\b)|){5}|([\\dA-F]{1,4}:){6})(([\\dA-F]{1,4}((?!\\3)::|:\\b|(?![\\dA-F])))|(?!\\2\\3)){2}"; let ips = ""; diff --git a/tests/operations/index.mjs b/tests/operations/index.mjs index ab1ceb8f..7e49104d 100644 --- a/tests/operations/index.mjs +++ b/tests/operations/index.mjs @@ -65,6 +65,7 @@ import "./tests/ELFInfo.mjs"; import "./tests/Enigma.mjs"; import "./tests/ExtractEmailAddresses.mjs"; import "./tests/ExtractHashes.mjs"; +import "./tests/ExtractIPAddresses.mjs"; import "./tests/Float.mjs"; import "./tests/FileTree.mjs"; import "./tests/FletcherChecksum.mjs"; diff --git a/tests/operations/tests/ExtractIPAddresses.mjs b/tests/operations/tests/ExtractIPAddresses.mjs new file mode 100644 index 00000000..d3eee407 --- /dev/null +++ b/tests/operations/tests/ExtractIPAddresses.mjs @@ -0,0 +1,78 @@ +/** + * ExtractIPAddresses tests. + * + * @author gchqdev365 [gchqdev365@outlook.com] + * @copyright Crown Copyright 2025 + * @license Apache-2.0 + */ +import TestRegister from "../../lib/TestRegister.mjs"; + +TestRegister.addTests([ + { + name: "ExtractIPAddress All Zeros", + input: "0.0.0.0", + expectedOutput: "0.0.0.0", + recipeConfig: [ + { + "op": "Extract IP addresses", + "args": [true, true, false, false, false, false] + }, + ], + }, + { + name: "ExtractIPAddress 255s", + input: "255.255.255.255", + expectedOutput: "255.255.255.255", + recipeConfig: [ + { + "op": "Extract IP addresses", + "args": [true, true, false, false, false, false] + }, + ], + }, + { + name: "ExtractIPAddress double digits", + input: "10.10.10.10 25.25.25.25 99.99.99.99", + expectedOutput: "10.10.10.10\n25.25.25.25\n99.99.99.99", + recipeConfig: [ + { + "op": "Extract IP addresses", + "args": [true, true, false, false, false, false] + }, + ], + }, + { + name: "ExtractIPAddress 256 in middle", + input: "255.256.255.255 255.255.256.255", + expectedOutput: "", + recipeConfig: [ + { + "op": "Extract IP addresses", + "args": [true, true, false, false, false, false] + }, + ], + }, + { + name: "ExtractIPAddress octal valid", + input: "01.01.01.01 0123.0123.0123.0123 0377.0377.0377.0377", + expectedOutput: "01.01.01.01\n0123.0123.0123.0123\n0377.0377.0377.0377", + recipeConfig: [ + { + "op": "Extract IP addresses", + "args": [true, true, false, false, false, false] + }, + ], + }, + { + name: "ExtractIPAddress octal invalid", + input: "0378.01.01.01 03.0377.2.3", + expectedOutput: "", + recipeConfig: [ + { + "op": "Extract IP addresses", + "args": [true, true, false, false, false, false] + }, + ], + }, +]); + From bfec582aee5fa6ff2d920a197bd5fd12747b1b5b Mon Sep 17 00:00:00 2001 From: gchqdev364 <40990156+gchqdev364@users.noreply.github.com> Date: Mon, 12 May 2025 13:35:39 +0000 Subject: [PATCH 2/5] Using code to generate regex string procedurally to improve readability. --- src/core/operations/ExtractIPAddresses.mjs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/core/operations/ExtractIPAddresses.mjs b/src/core/operations/ExtractIPAddresses.mjs index 1fe9c96d..9eac8c89 100644 --- a/src/core/operations/ExtractIPAddresses.mjs +++ b/src/core/operations/ExtractIPAddresses.mjs @@ -65,8 +65,17 @@ class ExtractIPAddresses extends Operation { */ run(input, args) { const [includeIpv4, includeIpv6, removeLocal, displayTotal, sort, unique] = args, - // This regex has two major options; decimal values 0-255 or octal values prefixed with 0 up to 377 - ipv4 = "(?:(?:(?:25[0-5]|2[0-4]\\d|1?[1-9]\\d|\\d)\\.){3}(?:(?:25[0-5]|2[0-4]\\d|1?[1-9]\\d|\\d)))|(?:(?:(?:0[1-3]?[0-7]{0,2})\\.){3}(?:0[1-3]?[0-7]{0,2}))", + + // IPv4 decimal groups can have values 0 to 255. To construct a regex the following sub-regex is reused: + ipv4DecimalByte = "(?:25[0-5]|2[0-4]\\d|1?[1-9]\\d|\\d)", + ipv4OctalByte = "(?:0[1-3]?[0-7]{1,2})", + + // Each variant requires exactly 4 groups with literal . between + ipv4Decimal = "(?:" + ipv4DecimalByte + "\\.){3}" + "(?:" + ipv4DecimalByte + ")", + ipv4Octal = "(?:" + ipv4OctalByte + "\\.){3}" + "(?:" + ipv4OctalByte + ")", + + // Then we allow IPv4 addresses to be expressed either entirely in decimal or entirely in Octal + ipv4 = "(?:" + ipv4Decimal + "|" + ipv4Octal + ")", ipv6 = "((?=.*::)(?!.*::.+::)(::)?([\\dA-F]{1,4}:(:|\\b)|){5}|([\\dA-F]{1,4}:){6})(([\\dA-F]{1,4}((?!\\3)::|:\\b|(?![\\dA-F])))|(?!\\2\\3)){2}"; let ips = ""; From 0c01c6a7c38d0e42324d601887e88cd13642c23b Mon Sep 17 00:00:00 2001 From: gchqdev364 <40990156+gchqdev364@users.noreply.github.com> Date: Mon, 12 May 2025 14:07:21 +0000 Subject: [PATCH 3/5] Added important tests that need to pass before merge. --- tests/operations/tests/ExtractIPAddresses.mjs | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tests/operations/tests/ExtractIPAddresses.mjs b/tests/operations/tests/ExtractIPAddresses.mjs index d3eee407..baa6056a 100644 --- a/tests/operations/tests/ExtractIPAddresses.mjs +++ b/tests/operations/tests/ExtractIPAddresses.mjs @@ -19,6 +19,28 @@ TestRegister.addTests([ }, ], }, + { + name: "ExtractIPAddress All 10s", + input: "10.10.10.10", + expectedOutput: "10.10.10.10", + recipeConfig: [ + { + "op": "Extract IP addresses", + "args": [true, true, false, false, false, false] + }, + ], + }, + { + name: "ExtractIPAddress All 10s", + input: "100.100.100.100", + expectedOutput: "100.100.100.100", + recipeConfig: [ + { + "op": "Extract IP addresses", + "args": [true, true, false, false, false, false] + }, + ], + }, { name: "ExtractIPAddress 255s", input: "255.255.255.255", @@ -52,6 +74,17 @@ TestRegister.addTests([ }, ], }, + { + name: "ExtractIPAddress 256 at each end", + input: "256.255.255.255 255.255.255.256", + expectedOutput: "", + recipeConfig: [ + { + "op": "Extract IP addresses", + "args": [true, true, false, false, false, false] + }, + ], + }, { name: "ExtractIPAddress octal valid", input: "01.01.01.01 0123.0123.0123.0123 0377.0377.0377.0377", From e9b182d33b7e437494e5f7045a25bdbac17fcc24 Mon Sep 17 00:00:00 2001 From: gchqdev364 <40990156+gchqdev364@users.noreply.github.com> Date: Mon, 12 May 2025 14:18:01 +0000 Subject: [PATCH 4/5] Added look ahead and look behind to avoid over capture. --- src/core/operations/ExtractIPAddresses.mjs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/core/operations/ExtractIPAddresses.mjs b/src/core/operations/ExtractIPAddresses.mjs index 9eac8c89..d337e80e 100644 --- a/src/core/operations/ExtractIPAddresses.mjs +++ b/src/core/operations/ExtractIPAddresses.mjs @@ -67,12 +67,16 @@ class ExtractIPAddresses extends Operation { const [includeIpv4, includeIpv6, removeLocal, displayTotal, sort, unique] = args, // IPv4 decimal groups can have values 0 to 255. To construct a regex the following sub-regex is reused: - ipv4DecimalByte = "(?:25[0-5]|2[0-4]\\d|1?[1-9]\\d|\\d)", + ipv4DecimalByte = "(?:25[0-5]|2[0-4]\\d|1?[0-9]\\d|\\d)", ipv4OctalByte = "(?:0[1-3]?[0-7]{1,2})", - // Each variant requires exactly 4 groups with literal . between - ipv4Decimal = "(?:" + ipv4DecimalByte + "\\.){3}" + "(?:" + ipv4DecimalByte + ")", - ipv4Octal = "(?:" + ipv4OctalByte + "\\.){3}" + "(?:" + ipv4OctalByte + ")", + // Look behind and ahead will be used to exclude matches with additional decimal digits left and right of IP address + lookBehind = "(? Date: Mon, 12 May 2025 14:24:07 +0000 Subject: [PATCH 5/5] Updated warning now that original issue has been resolved. --- src/core/operations/ExtractIPAddresses.mjs | 2 +- tests/operations/tests/ExtractIPAddresses.mjs | 22 +++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/core/operations/ExtractIPAddresses.mjs b/src/core/operations/ExtractIPAddresses.mjs index d337e80e..b74ec8fe 100644 --- a/src/core/operations/ExtractIPAddresses.mjs +++ b/src/core/operations/ExtractIPAddresses.mjs @@ -21,7 +21,7 @@ class ExtractIPAddresses extends Operation { this.name = "Extract IP addresses"; this.module = "Regex"; - this.description = "Extracts all IPv4 and IPv6 addresses.

Warning: Given a string 710.65.0.456, this will match 10.65.0.45 so always check the original input!"; + this.description = "Extracts all IPv4 and IPv6 addresses.

Warning: Given a string 1.2.3.4.5.6.7.8, this will match 1.2.3.4 and 5.6.7.8 so always check the original input!"; this.inputType = "string"; this.outputType = "string"; this.args = [ diff --git a/tests/operations/tests/ExtractIPAddresses.mjs b/tests/operations/tests/ExtractIPAddresses.mjs index baa6056a..13922e64 100644 --- a/tests/operations/tests/ExtractIPAddresses.mjs +++ b/tests/operations/tests/ExtractIPAddresses.mjs @@ -85,6 +85,28 @@ TestRegister.addTests([ }, ], }, + { + name: "ExtractIPAddress silly example", + input: "710.65.0.456", + expectedOutput: "", + recipeConfig: [ + { + "op": "Extract IP addresses", + "args": [true, true, false, false, false, false] + }, + ], + }, + { + name: "ExtractIPAddress longer dotted decimal", + input: "1.2.3.4.5.6.7.8", + expectedOutput: "1.2.3.4\n5.6.7.8", + recipeConfig: [ + { + "op": "Extract IP addresses", + "args": [true, true, false, false, false, false] + }, + ], + }, { name: "ExtractIPAddress octal valid", input: "01.01.01.01 0123.0123.0123.0123 0377.0377.0377.0377",