Factor out common URL regular expression code

This also eliminates the differences between the regular expressions.
This commit is contained in:
Richard Hansen 2020-12-11 15:55:13 -05:00 committed by John McLear
parent 66d0eb9a1f
commit 7e8de5540f
4 changed files with 74 additions and 73 deletions

View file

@ -22,6 +22,7 @@ const hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks');
const eejs = require('ep_etherpad-lite/node/eejs');
const _analyzeLine = require('./ExportHelper')._analyzeLine;
const _encodeWhitespace = require('./ExportHelper')._encodeWhitespace;
const padutils = require('../../static/js/pad_utils').padutils;
async function getPadHTML(pad, revNum) {
let atext = pad.atext;
@ -191,7 +192,7 @@ async function getHTMLFromAtext(pad, atext, authorColors) {
}
}
const urls = _findURLs(text);
const urls = padutils.findURLs(text);
let idx = 0;
@ -459,30 +460,6 @@ exports.getPadHTMLDocument = async function (padId, revNum) {
});
};
// copied from ACE
const _REGEX_WORDCHAR = /[\u0030-\u0039\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u1FFF\u3040-\u9FFF\uF900-\uFDFF\uFE70-\uFEFE\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFDC]/;
const _REGEX_SPACE = /\s/;
const _REGEX_URLCHAR = new RegExp(`(${/[-:@a-zA-Z0-9_.,~%+\/\\?=&#;()$]/.source}|${_REGEX_WORDCHAR.source})`);
const _REGEX_URL = new RegExp(`${/(?:(?:https?|s?ftp|ftps|file|smb|afp|nfs|(x-)?man|gopher|txmt):\/\/|mailto:)/.source + _REGEX_URLCHAR.source}*(?![:.,;])${_REGEX_URLCHAR.source}`, 'g');
// returns null if no URLs, or [[startIndex1, url1], [startIndex2, url2], ...]
function _findURLs(text) {
_REGEX_URL.lastIndex = 0;
let urls = null;
let execResult;
while ((execResult = _REGEX_URL.exec(text))) {
urls = (urls || []);
const startIndex = execResult.index;
const url = execResult[0];
urls.push([startIndex, url]);
}
return urls;
}
// copied from ACE
function _processSpaces(s) {
const doesWrap = true;