mirror of
https://github.com/ether/etherpad-lite.git
synced 2025-04-20 15:36:16 -04:00
Factor out common URL regular expression code
This also eliminates the differences between the regular expressions.
This commit is contained in:
parent
66d0eb9a1f
commit
7e8de5540f
4 changed files with 74 additions and 73 deletions
|
@ -22,6 +22,7 @@ const hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks');
|
||||||
const eejs = require('ep_etherpad-lite/node/eejs');
|
const eejs = require('ep_etherpad-lite/node/eejs');
|
||||||
const _analyzeLine = require('./ExportHelper')._analyzeLine;
|
const _analyzeLine = require('./ExportHelper')._analyzeLine;
|
||||||
const _encodeWhitespace = require('./ExportHelper')._encodeWhitespace;
|
const _encodeWhitespace = require('./ExportHelper')._encodeWhitespace;
|
||||||
|
const padutils = require('../../static/js/pad_utils').padutils;
|
||||||
|
|
||||||
async function getPadHTML(pad, revNum) {
|
async function getPadHTML(pad, revNum) {
|
||||||
let atext = pad.atext;
|
let atext = pad.atext;
|
||||||
|
@ -191,7 +192,7 @@ async function getHTMLFromAtext(pad, atext, authorColors) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const urls = _findURLs(text);
|
const urls = padutils.findURLs(text);
|
||||||
|
|
||||||
let idx = 0;
|
let idx = 0;
|
||||||
|
|
||||||
|
@ -459,30 +460,6 @@ exports.getPadHTMLDocument = async function (padId, revNum) {
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
// copied from ACE
|
|
||||||
const _REGEX_WORDCHAR = /[\u0030-\u0039\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u1FFF\u3040-\u9FFF\uF900-\uFDFF\uFE70-\uFEFE\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFDC]/;
|
|
||||||
const _REGEX_SPACE = /\s/;
|
|
||||||
const _REGEX_URLCHAR = new RegExp(`(${/[-:@a-zA-Z0-9_.,~%+\/\\?=&#;()$]/.source}|${_REGEX_WORDCHAR.source})`);
|
|
||||||
const _REGEX_URL = new RegExp(`${/(?:(?:https?|s?ftp|ftps|file|smb|afp|nfs|(x-)?man|gopher|txmt):\/\/|mailto:)/.source + _REGEX_URLCHAR.source}*(?![:.,;])${_REGEX_URLCHAR.source}`, 'g');
|
|
||||||
|
|
||||||
// returns null if no URLs, or [[startIndex1, url1], [startIndex2, url2], ...]
|
|
||||||
|
|
||||||
|
|
||||||
function _findURLs(text) {
|
|
||||||
_REGEX_URL.lastIndex = 0;
|
|
||||||
let urls = null;
|
|
||||||
let execResult;
|
|
||||||
while ((execResult = _REGEX_URL.exec(text))) {
|
|
||||||
urls = (urls || []);
|
|
||||||
const startIndex = execResult.index;
|
|
||||||
const url = execResult[0];
|
|
||||||
urls.push([startIndex, url]);
|
|
||||||
}
|
|
||||||
|
|
||||||
return urls;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// copied from ACE
|
// copied from ACE
|
||||||
function _processSpaces(s) {
|
function _processSpaces(s) {
|
||||||
const doesWrap = true;
|
const doesWrap = true;
|
||||||
|
|
|
@ -19,6 +19,9 @@
|
||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
const padutils = require('./pad_utils').padutils;
|
||||||
|
|
||||||
let _, $, jQuery, plugins, Ace2Common;
|
let _, $, jQuery, plugins, Ace2Common;
|
||||||
const browser = require('./browser');
|
const browser = require('./browser');
|
||||||
if (browser.msie) {
|
if (browser.msie) {
|
||||||
|
@ -2806,13 +2809,9 @@ function Ace2Inner() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// set of "letter or digit" chars is based on section 20.5.16 of the original Java Language Spec
|
|
||||||
const REGEX_WORDCHAR = /[\u0030-\u0039\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u1FFF\u3040-\u9FFF\uF900-\uFDFF\uFE70-\uFEFE\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFDC]/;
|
|
||||||
const REGEX_SPACE = /\s/;
|
const REGEX_SPACE = /\s/;
|
||||||
|
|
||||||
function isWordChar(c) {
|
const isWordChar = (c) => padutils.wordCharRegex.test(c);
|
||||||
return !!REGEX_WORDCHAR.exec(c);
|
|
||||||
}
|
|
||||||
editorInfo.ace_isWordChar = isWordChar;
|
editorInfo.ace_isWordChar = isWordChar;
|
||||||
|
|
||||||
function isSpaceChar(c) {
|
function isSpaceChar(c) {
|
||||||
|
|
|
@ -33,6 +33,7 @@ const hooks = require('./pluginfw/hooks');
|
||||||
const linestylefilter = {};
|
const linestylefilter = {};
|
||||||
const _ = require('./underscore');
|
const _ = require('./underscore');
|
||||||
const AttributeManager = require('./AttributeManager');
|
const AttributeManager = require('./AttributeManager');
|
||||||
|
const padutils = require('./pad_utils').padutils;
|
||||||
|
|
||||||
linestylefilter.ATTRIB_CLASSES = {
|
linestylefilter.ATTRIB_CLASSES = {
|
||||||
bold: 'tag:b',
|
bold: 'tag:b',
|
||||||
|
@ -224,11 +225,7 @@ linestylefilter.getRegexpFilter = function (regExp, tag) {
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
linestylefilter.REGEX_WORDCHAR = /[\u0030-\u0039\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u1FFF\u3040-\u9FFF\uF900-\uFDFF\uFE70-\uFEFE\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFDC]/;
|
linestylefilter.getURLFilter = linestylefilter.getRegexpFilter(padutils.urlRegex, 'url');
|
||||||
linestylefilter.REGEX_URLCHAR = new RegExp(`(${/[-:@a-zA-Z0-9_.,~%+\/\\?=&#!;()$]/.source}|${linestylefilter.REGEX_WORDCHAR.source})`);
|
|
||||||
linestylefilter.REGEX_URL = new RegExp(`${/(?:(?:https?|s?ftp|ftps|file|nfs):\/\/|(about|geo|mailto|tel):|www\.)/.source + linestylefilter.REGEX_URLCHAR.source}*(?![:.,;])${linestylefilter.REGEX_URLCHAR.source}`, 'g');
|
|
||||||
linestylefilter.getURLFilter = linestylefilter.getRegexpFilter(
|
|
||||||
linestylefilter.REGEX_URL, 'url');
|
|
||||||
|
|
||||||
linestylefilter.textAndClassFuncSplitter = function (func, splitPointsOpt) {
|
linestylefilter.textAndClassFuncSplitter = function (func, splitPointsOpt) {
|
||||||
let nextPointIndex = 0;
|
let nextPointIndex = 0;
|
||||||
|
|
|
@ -39,6 +39,55 @@ const randomString = (len) => {
|
||||||
return randomstring;
|
return randomstring;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Set of "letter or digit" chars is based on section 20.5.16 of the original Java Language Spec.
|
||||||
|
const wordCharRegex = new RegExp(`[${[
|
||||||
|
'\u0030-\u0039',
|
||||||
|
'\u0041-\u005A',
|
||||||
|
'\u0061-\u007A',
|
||||||
|
'\u00C0-\u00D6',
|
||||||
|
'\u00D8-\u00F6',
|
||||||
|
'\u00F8-\u00FF',
|
||||||
|
'\u0100-\u1FFF',
|
||||||
|
'\u3040-\u9FFF',
|
||||||
|
'\uF900-\uFDFF',
|
||||||
|
'\uFE70-\uFEFE',
|
||||||
|
'\uFF10-\uFF19',
|
||||||
|
'\uFF21-\uFF3A',
|
||||||
|
'\uFF41-\uFF5A',
|
||||||
|
'\uFF66-\uFFDC',
|
||||||
|
].join('')}]`);
|
||||||
|
|
||||||
|
const urlRegex = (() => {
|
||||||
|
// TODO: wordCharRegex matches many characters that are not permitted in URIs. Are they included
|
||||||
|
// here as an attempt to support IRIs? (See https://tools.ietf.org/html/rfc3987.)
|
||||||
|
const urlChar = `[-:@_.,~%+/?=&#!;()$${wordCharRegex.source.slice(1, -1)}]`;
|
||||||
|
// Matches a single character that should not be considered part of the URL if it is the last
|
||||||
|
// character that matches urlChar.
|
||||||
|
const postUrlPunct = '[:.,;]';
|
||||||
|
// Schemes that must be followed by ://
|
||||||
|
const withAuth = `(?:${[
|
||||||
|
'(?:x-)?man',
|
||||||
|
'afp',
|
||||||
|
'file',
|
||||||
|
'ftps?',
|
||||||
|
'gopher',
|
||||||
|
'https?',
|
||||||
|
'nfs',
|
||||||
|
'sftp',
|
||||||
|
'smb',
|
||||||
|
'txmt',
|
||||||
|
].join('|')})://`;
|
||||||
|
// Schemes that do not need to be followed by ://
|
||||||
|
const withoutAuth = `(?:${[
|
||||||
|
'about',
|
||||||
|
'geo',
|
||||||
|
'mailto',
|
||||||
|
'tel',
|
||||||
|
].join('|')}):`;
|
||||||
|
return new RegExp(
|
||||||
|
`(?:${withAuth}|${withoutAuth}|www\\.)${urlChar}*(?!${postUrlPunct})${urlChar}`, 'g');
|
||||||
|
})();
|
||||||
|
|
||||||
const padutils = {
|
const padutils = {
|
||||||
escapeHtml: (x) => Security.escapeHTML(String(x)),
|
escapeHtml: (x) => Security.escapeHTML(String(x)),
|
||||||
uniqueId: () => {
|
uniqueId: () => {
|
||||||
|
@ -75,45 +124,24 @@ const padutils = {
|
||||||
const hourmin = `${d.getHours()}:${(`0${d.getMinutes()}`).slice(-2)}`;
|
const hourmin = `${d.getHours()}:${(`0${d.getMinutes()}`).slice(-2)}`;
|
||||||
return `${dayOfWeek} ${month} ${dayOfMonth} ${year} ${hourmin}`;
|
return `${dayOfWeek} ${month} ${dayOfMonth} ${year} ${hourmin}`;
|
||||||
},
|
},
|
||||||
|
wordCharRegex,
|
||||||
|
urlRegex,
|
||||||
|
// returns null if no URLs, or [[startIndex1, url1], [startIndex2, url2], ...]
|
||||||
findURLs: (text) => {
|
findURLs: (text) => {
|
||||||
// copied from ACE
|
// Copy padutils.urlRegex so that the use of .exec() below (which mutates the RegExp object)
|
||||||
const _REGEX_WORDCHAR = new RegExp(`[${[
|
// does not break other concurrent uses of padutils.urlRegex.
|
||||||
'\u0030-\u0039',
|
const urlRegex = new RegExp(padutils.urlRegex, 'g');
|
||||||
'\u0041-\u005A',
|
urlRegex.lastIndex = 0;
|
||||||
'\u0061-\u007A',
|
let urls = null;
|
||||||
'\u00C0-\u00D6',
|
let execResult;
|
||||||
'\u00D8-\u00F6',
|
// TODO: Switch to String.prototype.matchAll() after support for Node.js < 12.0.0 is dropped.
|
||||||
'\u00F8-\u00FF',
|
while ((execResult = urlRegex.exec(text))) {
|
||||||
'\u0100-\u1FFF',
|
urls = (urls || []);
|
||||||
'\u3040-\u9FFF',
|
const startIndex = execResult.index;
|
||||||
'\uF900-\uFDFF',
|
const url = execResult[0];
|
||||||
'\uFE70-\uFEFE',
|
urls.push([startIndex, url]);
|
||||||
'\uFF10-\uFF19',
|
}
|
||||||
'\uFF21-\uFF3A',
|
return urls;
|
||||||
'\uFF41-\uFF5A',
|
|
||||||
'\uFF66-\uFFDC',
|
|
||||||
].join('')}]`);
|
|
||||||
const _REGEX_URLCHAR = new RegExp(`([-:@a-zA-Z0-9_.,~%+/?=&#;()$]|${_REGEX_WORDCHAR.source})`);
|
|
||||||
const _REGEX_URL = new RegExp(
|
|
||||||
'(?:(?:https?|s?ftp|ftps|file|nfs)://|(about|geo|mailto|tel):)' +
|
|
||||||
`${_REGEX_URLCHAR.source}*(?![:.,;])${_REGEX_URLCHAR.source}`, 'g');
|
|
||||||
|
|
||||||
// returns null if no URLs, or [[startIndex1, url1], [startIndex2, url2], ...]
|
|
||||||
const _findURLs = (text) => {
|
|
||||||
_REGEX_URL.lastIndex = 0;
|
|
||||||
let urls = null;
|
|
||||||
let execResult;
|
|
||||||
while ((execResult = _REGEX_URL.exec(text))) {
|
|
||||||
urls = (urls || []);
|
|
||||||
const startIndex = execResult.index;
|
|
||||||
const url = execResult[0];
|
|
||||||
urls.push([startIndex, url]);
|
|
||||||
}
|
|
||||||
|
|
||||||
return urls;
|
|
||||||
};
|
|
||||||
|
|
||||||
return _findURLs(text);
|
|
||||||
},
|
},
|
||||||
escapeHtmlWithClickableLinks: (text, target) => {
|
escapeHtmlWithClickableLinks: (text, target) => {
|
||||||
let idx = 0;
|
let idx = 0;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue