Updated URL regexes to match more unescaped special characters

This commit is contained in:
n1474335 2017-08-15 16:44:45 +00:00
parent cf1ba60a10
commit a19b02aa8c
2 changed files with 3 additions and 3 deletions

View file

@ -170,9 +170,9 @@ const Extract = {
protocol = "[A-Z]+://",
hostname = "[-\\w]+(?:\\.\\w[-\\w]*)+",
port = ":\\d+",
path = "/[^.!,?;\"'<>()\\[\\]{}\\s\\x7F-\\xFF]*";
path = "/[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]*";
path += "(?:[.!,?]+[^.!,?;\"'<>()\\[\\]{}\\s\\x7F-\\xFF]+)*";
path += "(?:[.!,?]+[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]+)*";
const regex = new RegExp(protocol + hostname + "(?:" + port +
")?(?:" + path + ")?", "ig");
return Extract._search(input, regex, null, displayTotal);