Merge branch 'develop' of github.com:ether/etherpad-lite into mochawesome

This commit is contained in:
John McLear 2021-01-27 20:11:53 +00:00
commit 7696ec0a02
7 changed files with 333 additions and 356 deletions

View file

@ -421,7 +421,20 @@ Things in context:
4. text - the text for that line
This hook allows you to validate/manipulate the text before it's sent to the
server side. The return value should be the validated/manipulated text.
server side. To change the text, either:
* Set the `text` context property to the desired value and return `undefined`.
* (Deprecated) Return a string. If a hook function changes the `text` context
property, the return value is ignored. If no hook function changes `text` but
multiple hook functions return a string, the first one wins.
Example:
```
exports.collectContentLineText = (hookName, context) => {
context.text = tweakText(context.text);
};
```
## collectContentLineBreak

View file

@ -391,9 +391,9 @@ const defaultResponseRefs = {
// convert to a dictionary of operation objects
const operations = {};
for (const resource in resources) {
for (const action in resources[resource]) {
const {operationId, responseSchema, ...operation} = resources[resource][action];
for (const [resource, actions] of Object.entries(resources)) {
for (const [action, spec] of Object.entries(actions)) {
const {operationId, responseSchema, ...operation} = spec;
// add response objects
const responses = {...defaultResponseRefs};
@ -623,7 +623,7 @@ exports.expressCreateServer = (hookName, args, cb) => {
} else {
// an unknown error happened
// log it and throw internal error
apiLogger.error(err);
apiLogger.error(err.stack || err.toString());
throw new createHTTPError.InternalError('internal error');
}
});

View file

@ -1139,7 +1139,7 @@ function Ace2Inner() {
lastDirtyNode = (lastDirtyNode && isNodeDirty(lastDirtyNode) && lastDirtyNode);
if (firstDirtyNode && lastDirtyNode) {
const cc = makeContentCollector(isStyled, browser, rep.apool, null, className2Author);
const cc = makeContentCollector(isStyled, browser, rep.apool, className2Author);
cc.notifySelection(selection);
const dirtyNodes = [];
for (let n = firstDirtyNode; n &&

View file

@ -32,30 +32,31 @@ const hooks = require('./pluginfw/hooks');
const sanitizeUnicode = (s) => UNorm.nfc(s);
const makeContentCollector = (collectStyles, abrowser, apool, domInterface, className2Author) => {
const dom = domInterface || {
isNodeText: (n) => n.nodeType === 3,
nodeTagName: (n) => n.tagName,
nodeValue: (n) => n.nodeValue,
nodeNumChildren: (n) => {
if (n.childNodes == null) return 0;
return n.childNodes.length;
},
nodeChild: (n, i) => {
if (n.childNodes.item == null) {
return n.childNodes[i];
}
return n.childNodes.item(i);
},
nodeProp: (n, p) => n[p],
nodeAttr: (n, a) => {
if (n.getAttribute != null) return n.getAttribute(a);
if (n.attribs != null) return n.attribs[a];
return null;
},
optNodeInnerHTML: (n) => n.innerHTML,
};
// This file is used both in browsers and with cheerio in Node.js (for importing HTML). Cheerio's
// Node-like objects are not 100% API compatible with the DOM specification; the following functions
// abstract away the differences.
// .nodeType works with DOM and cheerio 0.22.0, but cheerio 0.22.0 does not provide the Node.*_NODE
// constants so they cannot be used here.
const isElementNode = (n) => n.nodeType === 1; // Node.ELEMENT_NODE
const isTextNode = (n) => n.nodeType === 3; // Node.TEXT_NODE
// .tagName works with DOM and cheerio 0.22.0, but:
// * With DOM, .tagName is an uppercase string.
// * With cheerio 0.22.0, .tagName is a lowercase string.
// For consistency, this function always returns a lowercase string.
const tagName = (n) => n.tagName && n.tagName.toLowerCase();
// .childNodes works with DOM and cheerio 0.22.0, except in cheerio the .childNodes property does
// not exist on text nodes (and maybe other non-element nodes).
const childNodes = (n) => n.childNodes || [];
const getAttribute = (n, a) => {
// .getAttribute() works with DOM but not with cheerio 0.22.0.
if (n.getAttribute != null) return n.getAttribute(a);
// .attribs[] works with cheerio 0.22.0 but not with DOM.
if (n.attribs != null) return n.attribs[a];
return null;
};
const makeContentCollector = (collectStyles, abrowser, apool, className2Author) => {
const _blockElems = {
div: 1,
p: 1,
@ -67,7 +68,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, domInterface, clas
_blockElems[element] = 1;
});
const isBlockElement = (n) => !!_blockElems[(dom.nodeTagName(n) || '').toLowerCase()];
const isBlockElement = (n) => !!_blockElems[tagName(n) || ''];
const textify = (str) => sanitizeUnicode(
str.replace(/(\n | \n)/g, ' ')
@ -75,7 +76,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, domInterface, clas
.replace(/\xa0/g, ' ')
.replace(/\t/g, ' '));
const getAssoc = (node, name) => dom.nodeProp(node, `_magicdom_${name}`);
const getAssoc = (node, name) => node[`_magicdom_${name}`];
const lines = (() => {
const textArray = [];
@ -123,13 +124,17 @@ const makeContentCollector = (collectStyles, abrowser, apool, domInterface, clas
let selEnd = [-1, -1];
const _isEmpty = (node, state) => {
// consider clean blank lines pasted in IE to be empty
if (dom.nodeNumChildren(node) === 0) return true;
if (dom.nodeNumChildren(node) === 1 &&
if (childNodes(node).length === 0) return true;
if (childNodes(node).length === 1 &&
getAssoc(node, 'shouldBeEmpty') &&
dom.optNodeInnerHTML(node) === ' ' &&
// Note: The .innerHTML property exists on DOM Element objects but not on cheerio's
// Element-like objects (cheerio v0.22.0) so this equality check will always be false.
// Cheerio's Element-like objects have no equivalent to .innerHTML. (Cheerio objects have an
// .html() method, but that isn't accessible here.)
node.innerHTML === ' ' &&
!getAssoc(node, 'unpasted')) {
if (state) {
const child = dom.nodeChild(node, 0);
const child = childNodes(node)[0];
_reachPoint(child, 0, state);
_reachPoint(child, 1, state);
}
@ -149,7 +154,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, domInterface, clas
};
const _reachBlockPoint = (nd, idx, state) => {
if (!dom.isNodeText(nd)) _reachPoint(nd, idx, state);
if (!isTextNode(nd)) _reachPoint(nd, idx, state);
};
const _reachPoint = (nd, idx, state) => {
@ -228,25 +233,24 @@ const makeContentCollector = (collectStyles, abrowser, apool, domInterface, clas
const _recalcAttribString = (state) => {
const lst = [];
for (const a in state.attribs) {
if (state.attribs[a]) {
// The following splitting of the attribute name is a workaround
// to enable the content collector to store key-value attributes
// see https://github.com/ether/etherpad-lite/issues/2567 for more information
// in long term the contentcollector should be refactored to get rid of this workaround
const ATTRIBUTE_SPLIT_STRING = '::';
for (const [a, count] of Object.entries(state.attribs)) {
if (!count) continue;
// The following splitting of the attribute name is a workaround
// to enable the content collector to store key-value attributes
// see https://github.com/ether/etherpad-lite/issues/2567 for more information
// in long term the contentcollector should be refactored to get rid of this workaround
const ATTRIBUTE_SPLIT_STRING = '::';
// see if attributeString is splittable
const attributeSplits = a.split(ATTRIBUTE_SPLIT_STRING);
if (attributeSplits.length > 1) {
// the attribute name follows the convention key::value
// so save it as a key value attribute
lst.push([attributeSplits[0], attributeSplits[1]]);
} else {
// the "normal" case, the attribute is just a switch
// so set it true
lst.push([a, 'true']);
}
// see if attributeString is splittable
const attributeSplits = a.split(ATTRIBUTE_SPLIT_STRING);
if (attributeSplits.length > 1) {
// the attribute name follows the convention key::value
// so save it as a key value attribute
lst.push([attributeSplits[0], attributeSplits[1]]);
} else {
// the "normal" case, the attribute is just a switch
// so set it true
lst.push([a, 'true']);
}
}
if (state.authorLevel > 0) {
@ -316,25 +320,15 @@ const makeContentCollector = (collectStyles, abrowser, apool, domInterface, clas
const startLine = lines.length() - 1;
_reachBlockPoint(node, 0, state);
if (dom.isNodeText(node)) {
let txt = dom.nodeValue(node);
const tname = dom.nodeAttr(node.parentNode, 'name');
const txtFromHook = hooks.callAll('collectContentLineText', {
cc: this,
state,
tname,
node,
text: txt,
styl: null,
cls: null,
});
if (typeof (txtFromHook) === 'object') {
txt = dom.nodeValue(node);
} else if (txtFromHook) {
txt = txtFromHook;
}
if (isTextNode(node)) {
const tname = getAttribute(node.parentNode, 'name');
const context = {cc: this, state, tname, node, text: node.nodeValue};
// Hook functions may either return a string (deprecated) or modify context.text. If any hook
// function modifies context.text then all returned strings are ignored. If no hook functions
// modify context.text, the first hook function to return a string wins.
const [hookTxt] =
hooks.callAll('collectContentLineText', context).filter((s) => typeof s === 'string');
let txt = context.text === node.nodeValue && hookTxt != null ? hookTxt : context.text;
let rest = '';
let x = 0; // offset into original text
@ -384,8 +378,8 @@ const makeContentCollector = (collectStyles, abrowser, apool, domInterface, clas
cc.startNewLine(state);
}
}
} else {
const tname = (dom.nodeTagName(node) || '').toLowerCase();
} else if (isElementNode(node)) {
const tname = tagName(node) || '';
if (tname === 'img') {
hooks.callAll('collectContentImage', {
@ -403,8 +397,8 @@ const makeContentCollector = (collectStyles, abrowser, apool, domInterface, clas
if (tname === 'br') {
this.breakLine = true;
const tvalue = dom.nodeAttr(node, 'value');
const induceLineBreak = hooks.callAll('collectContentLineBreak', {
const tvalue = getAttribute(node, 'value');
const [startNewLine = true] = hooks.callAll('collectContentLineBreak', {
cc: this,
state,
tname,
@ -412,17 +406,14 @@ const makeContentCollector = (collectStyles, abrowser, apool, domInterface, clas
styl: null,
cls: null,
});
const startNewLine = (
typeof (induceLineBreak) === 'object' &&
induceLineBreak.length === 0) ? true : induceLineBreak[0];
if (startNewLine) {
cc.startNewLine(state);
}
} else if (tname === 'script' || tname === 'style') {
// ignore
} else if (!isEmpty) {
let styl = dom.nodeAttr(node, 'style');
let cls = dom.nodeAttr(node, 'class');
let styl = getAttribute(node, 'style');
let cls = getAttribute(node, 'class');
let isPre = (tname === 'pre');
if ((!isPre) && abrowser && abrowser.safari) {
isPre = (styl && /\bwhite-space:\s*pre\b/i.exec(styl));
@ -469,26 +460,23 @@ const makeContentCollector = (collectStyles, abrowser, apool, domInterface, clas
cc.doAttrib(state, 'strikethrough');
}
if (tname === 'ul' || tname === 'ol') {
let type = node.attribs ? node.attribs.class : null;
let type = getAttribute(node, 'class');
const rr = cls && /(?:^| )list-([a-z]+[0-9]+)\b/.exec(cls);
// lists do not need to have a type, so before we make a wrong guess
// check if we find a better hint within the node's children
if (!rr && !type) {
for (const i in node.children) {
if (node.children[i] && node.children[i].name === 'ul') {
type = node.children[i].attribs.class;
if (type) {
break;
}
}
for (const child of childNodes(node)) {
if (tagName(child) !== 'ul') continue;
type = getAttribute(child, 'class');
if (type) break;
}
}
if (rr && rr[1]) {
type = rr[1];
} else {
if (tname === 'ul') {
if ((type && type.match('indent')) ||
(node.attribs && node.attribs.class && node.attribs.class.match('indent'))) {
const cls = getAttribute(node, 'class');
if ((type && type.match('indent')) || (cls && cls.match('indent'))) {
type = 'indent';
} else {
type = 'bullet';
@ -503,7 +491,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, domInterface, clas
// This has undesirable behavior in Chrome but is right in other browsers.
// See https://github.com/ether/etherpad-lite/issues/2412 for reasoning
if (!abrowser.chrome) oldListTypeOrNull = (_enterList(state, undefined) || 'none');
} else if ((tname === 'li')) {
} else if (tname === 'li') {
state.lineAttributes.start = state.start || 0;
_recalcAttribString(state);
if (state.lineAttributes.list.indexOf('number') !== -1) {
@ -513,7 +501,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, domInterface, clas
Note how the <ol> item has to be inside a <li>
Because of this we don't increment the start number
*/
if (node.parent && node.parent.name !== 'ol') {
if (node.parentNode && tagName(node.parentNode) !== 'ol') {
/*
TODO: start number has to increment based on indentLevel(numberX)
This means we have to build an object IE
@ -530,7 +518,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, domInterface, clas
}
}
// UL list items never modify the start value.
if (node.parent && node.parent.name === 'ul') {
if (node.parentNode && tagName(node.parentNode) === 'ul') {
state.start++;
// TODO, this is hacky.
// Because if the first item is an UL it will increment a list no?
@ -559,9 +547,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, domInterface, clas
}
}
const nc = dom.nodeNumChildren(node);
for (let i = 0; i < nc; i++) {
const c = dom.nodeChild(node, i);
for (const c of childNodes(node)) {
cc.collectContent(c, state);
}

View file

@ -1,3 +1,5 @@
'use strict';
/*
* This file is copied & modified from <basedir>/tests/backend/specs/api/pad.js
*

View file

@ -6,151 +6,148 @@
* TODO: unify those two files, and merge in a single one.
*/
/* eslint-disable max-len */
const common = require('../../common');
const supertest = require(`${__dirname}/../../../../src/node_modules/supertest`);
const settings = require(`${__dirname}/../../../../tests/container/loadSettings.js`).loadSettings();
const api = supertest(`http://${settings.ip}:${settings.port}`);
const settings = require('../../../container/loadSettings.js').loadSettings();
const supertest = require('ep_etherpad-lite/node_modules/supertest');
const api = supertest(`http://${settings.ip}:${settings.port}`);
const apiKey = common.apiKey;
const apiVersion = 1;
const testImports = {
'malformed': {
input: '<html><body><li>wtf</ul></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>wtf<br><br></body></html>',
expectedText: 'wtf\n\n',
wantHTML: '<!DOCTYPE HTML><html><body>wtf<br><br></body></html>',
wantText: 'wtf\n\n',
disabled: true,
},
'nonelistiteminlist #3620': {
input: '<html><body><ul>test<li>FOO</li></ul></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body><ul class="bullet">test<li>FOO</ul><br></body></html>',
expectedText: '\ttest\n\t* FOO\n\n',
wantHTML: '<!DOCTYPE HTML><html><body><ul class="bullet">test<li>FOO</ul><br></body></html>',
wantText: '\ttest\n\t* FOO\n\n',
disabled: true,
},
'whitespaceinlist #3620': {
input: '<html><body><ul> <li>FOO</li></ul></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body><ul class="bullet"><li>FOO</ul><br></body></html>',
expectedText: '\t* FOO\n\n',
disabled: true,
wantHTML: '<!DOCTYPE HTML><html><body><ul class="bullet"><li>FOO</ul><br></body></html>',
wantText: '\t* FOO\n\n',
},
'prefixcorrectlinenumber': {
input: '<html><body><ol><li>should be 1</li><li>should be 2</li></ol></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body><ol start="1" class="number"><li>should be 1</li><li>should be 2</ol><br></body></html>',
expectedText: '\t1. should be 1\n\t2. should be 2\n\n',
wantHTML: '<!DOCTYPE HTML><html><body><ol start="1" class="number"><li>should be 1</li><li>should be 2</ol><br></body></html>',
wantText: '\t1. should be 1\n\t2. should be 2\n\n',
},
'prefixcorrectlinenumbernested': {
input: '<html><body><ol><li>should be 1</li><ol><li>foo</li></ol><li>should be 2</li></ol></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body><ol start="1" class="number"><li>should be 1<ol start="2" class="number"><li>foo</ol><li>should be 2</ol><br></body></html>',
expectedText: '\t1. should be 1\n\t\t1.1. foo\n\t2. should be 2\n\n',
wantHTML: '<!DOCTYPE HTML><html><body><ol start="1" class="number"><li>should be 1<ol start="2" class="number"><li>foo</ol><li>should be 2</ol><br></body></html>',
wantText: '\t1. should be 1\n\t\t1.1. foo\n\t2. should be 2\n\n',
},
/*
"prefixcorrectlinenumber when introduced none list item - currently not supported see #3450":{
"prefixcorrectlinenumber when introduced none list item - currently not supported see #3450": {
input: '<html><body><ol><li>should be 1</li>test<li>should be 2</li></ol></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body><ol start="1" class="number"><li>should be 1</li>test<li>should be 2</li></ol><br></body></html>',
expectedText: '\t1. should be 1\n\ttest\n\t2. should be 2\n\n'
wantHTML: '<!DOCTYPE HTML><html><body><ol start="1" class="number"><li>should be 1</li>test<li>should be 2</li></ol><br></body></html>',
wantText: '\t1. should be 1\n\ttest\n\t2. should be 2\n\n',
}
,
"newlinesshouldntresetlinenumber #2194":{
"newlinesshouldntresetlinenumber #2194": {
input: '<html><body><ol><li>should be 1</li>test<li>should be 2</li></ol></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body><ol class="number"><li>should be 1</li>test<li>should be 2</li></ol><br></body></html>',
expectedText: '\t1. should be 1\n\ttest\n\t2. should be 2\n\n'
wantHTML: '<!DOCTYPE HTML><html><body><ol class="number"><li>should be 1</li>test<li>should be 2</li></ol><br></body></html>',
wantText: '\t1. should be 1\n\ttest\n\t2. should be 2\n\n',
}
*/
'ignoreAnyTagsOutsideBody': {
description: 'Content outside body should be ignored',
input: '<html><head><title>title</title><style></style></head><body>empty<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>empty<br><br></body></html>',
expectedText: 'empty\n\n',
wantHTML: '<!DOCTYPE HTML><html><body>empty<br><br></body></html>',
wantText: 'empty\n\n',
},
'indentedListsAreNotBullets': {
description: 'Indented lists are represented with tabs and without bullets',
input: '<html><body><ul class="indent"><li>indent</li><li>indent</ul></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body><ul class="indent"><li>indent</li><li>indent</ul><br></body></html>',
expectedText: '\tindent\n\tindent\n\n'
wantHTML: '<!DOCTYPE HTML><html><body><ul class="indent"><li>indent</li><li>indent</ul><br></body></html>',
wantText: '\tindent\n\tindent\n\n',
},
lineWithMultipleSpaces: {
'lineWithMultipleSpaces': {
description: 'Multiple spaces should be collapsed',
input: '<html><body>Text with more than one space.<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>Text with more than one space.<br><br></body></html>',
expectedText: 'Text with more than one space.\n\n'
wantHTML: '<!DOCTYPE HTML><html><body>Text with more than one space.<br><br></body></html>',
wantText: 'Text with more than one space.\n\n',
},
lineWithMultipleNonBreakingAndNormalSpaces: {
'lineWithMultipleNonBreakingAndNormalSpaces': {
// XXX the HTML between "than" and "one" looks strange
description: 'non-breaking space should be preserved, but can be replaced when it',
input: '<html><body>Text&nbsp;with&nbsp; more&nbsp;&nbsp;&nbsp;than &nbsp;one space.<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>Text with&nbsp; more&nbsp;&nbsp; than&nbsp; one space.<br><br></body></html>',
expectedText: 'Text with more than one space.\n\n'
wantHTML: '<!DOCTYPE HTML><html><body>Text with&nbsp; more&nbsp;&nbsp; than&nbsp; one space.<br><br></body></html>',
wantText: 'Text with more than one space.\n\n',
},
multiplenbsp: {
'multiplenbsp': {
description: 'Multiple non-breaking space should be preserved',
input: '<html><body>&nbsp;&nbsp;<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>&nbsp;&nbsp;<br><br></body></html>',
expectedText: ' \n\n'
wantHTML: '<!DOCTYPE HTML><html><body>&nbsp;&nbsp;<br><br></body></html>',
wantText: ' \n\n',
},
multipleNonBreakingSpaceBetweenWords: {
'multipleNonBreakingSpaceBetweenWords': {
description: 'A normal space is always inserted before a word',
input: '<html><body>&nbsp;&nbsp;word1&nbsp;&nbsp;word2&nbsp;&nbsp;&nbsp;word3<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>&nbsp; word1&nbsp; word2&nbsp;&nbsp; word3<br><br></body></html>',
expectedText: ' word1 word2 word3\n\n'
wantHTML: '<!DOCTYPE HTML><html><body>&nbsp; word1&nbsp; word2&nbsp;&nbsp; word3<br><br></body></html>',
wantText: ' word1 word2 word3\n\n',
},
nonBreakingSpacePreceededBySpaceBetweenWords: {
'nonBreakingSpacePreceededBySpaceBetweenWords': {
description: 'A non-breaking space preceeded by a normal space',
input: '<html><body> &nbsp;word1 &nbsp;word2 &nbsp;word3<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>&nbsp;word1&nbsp; word2&nbsp; word3<br><br></body></html>',
expectedText: ' word1 word2 word3\n\n'
wantHTML: '<!DOCTYPE HTML><html><body>&nbsp;word1&nbsp; word2&nbsp; word3<br><br></body></html>',
wantText: ' word1 word2 word3\n\n',
},
nonBreakingSpaceFollowededBySpaceBetweenWords: {
'nonBreakingSpaceFollowededBySpaceBetweenWords': {
description: 'A non-breaking space followed by a normal space',
input: '<html><body>&nbsp; word1&nbsp; word2&nbsp; word3<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>&nbsp; word1&nbsp; word2&nbsp; word3<br><br></body></html>',
expectedText: ' word1 word2 word3\n\n'
wantHTML: '<!DOCTYPE HTML><html><body>&nbsp; word1&nbsp; word2&nbsp; word3<br><br></body></html>',
wantText: ' word1 word2 word3\n\n',
},
spacesAfterNewline: {
'spacesAfterNewline': {
description: 'Collapse spaces that follow a newline',
input:'<!doctype html><html><body>something<br> something<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>something<br>something<br><br></body></html>',
expectedText: 'something\nsomething\n\n'
input: '<!doctype html><html><body>something<br> something<br></body></html>',
wantHTML: '<!DOCTYPE HTML><html><body>something<br>something<br><br></body></html>',
wantText: 'something\nsomething\n\n',
},
spacesAfterNewlineP: {
'spacesAfterNewlineP': {
description: 'Collapse spaces that follow a paragraph',
input:'<!doctype html><html><body>something<p></p> something<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>something<br><br>something<br><br></body></html>',
expectedText: 'something\n\nsomething\n\n'
input: '<!doctype html><html><body>something<p></p> something<br></body></html>',
wantHTML: '<!DOCTYPE HTML><html><body>something<br><br>something<br><br></body></html>',
wantText: 'something\n\nsomething\n\n',
},
spacesAtEndOfLine: {
'spacesAtEndOfLine': {
description: 'Collapse spaces that preceed/follow a newline',
input:'<html><body>something <br> something<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>something<br>something<br><br></body></html>',
expectedText: 'something\nsomething\n\n'
input: '<html><body>something <br> something<br></body></html>',
wantHTML: '<!DOCTYPE HTML><html><body>something<br>something<br><br></body></html>',
wantText: 'something\nsomething\n\n',
},
spacesAtEndOfLineP: {
'spacesAtEndOfLineP': {
description: 'Collapse spaces that preceed/follow a paragraph',
input:'<html><body>something <p></p> something<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>something<br><br>something<br><br></body></html>',
expectedText: 'something\n\nsomething\n\n'
input: '<html><body>something <p></p> something<br></body></html>',
wantHTML: '<!DOCTYPE HTML><html><body>something<br><br>something<br><br></body></html>',
wantText: 'something\n\nsomething\n\n',
},
nonBreakingSpacesAfterNewlines: {
'nonBreakingSpacesAfterNewlines': {
description: 'Don\'t collapse non-breaking spaces that follow a newline',
input:'<html><body>something<br>&nbsp;&nbsp;&nbsp;something<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>something<br>&nbsp;&nbsp; something<br><br></body></html>',
expectedText: 'something\n something\n\n'
input: '<html><body>something<br>&nbsp;&nbsp;&nbsp;something<br></body></html>',
wantHTML: '<!DOCTYPE HTML><html><body>something<br>&nbsp;&nbsp; something<br><br></body></html>',
wantText: 'something\n something\n\n',
},
nonBreakingSpacesAfterNewlinesP: {
'nonBreakingSpacesAfterNewlinesP': {
description: 'Don\'t collapse non-breaking spaces that follow a paragraph',
input:'<html><body>something<p></p>&nbsp;&nbsp;&nbsp;something<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>something<br><br>&nbsp;&nbsp; something<br><br></body></html>',
expectedText: 'something\n\n something\n\n'
input: '<html><body>something<p></p>&nbsp;&nbsp;&nbsp;something<br></body></html>',
wantHTML: '<!DOCTYPE HTML><html><body>something<br><br>&nbsp;&nbsp; something<br><br></body></html>',
wantText: 'something\n\n something\n\n',
},
collapseSpacesInsideElements: {
'collapseSpacesInsideElements': {
description: 'Preserve only one space when multiple are present',
input: '<html><body>Need <span> more </span> space<i> s </i> !<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>Need more space<em> s </em>!<br><br></body></html>',
expectedText: 'Need more space s !\n\n'
wantHTML: '<!DOCTYPE HTML><html><body>Need more space<em> s </em>!<br><br></body></html>',
wantText: 'Need more space s !\n\n',
},
collapseSpacesAcrossNewlines: {
'collapseSpacesAcrossNewlines': {
description: 'Newlines and multiple spaces across newlines should be collapsed',
input: `
<html><body>Need
@ -158,30 +155,30 @@ const testImports = {
space
<i> s </i>
!<br></body></html>`,
expectedHTML: '<!DOCTYPE HTML><html><body>Need more space <em>s </em>!<br><br></body></html>',
expectedText: 'Need more space s !\n\n'
wantHTML: '<!DOCTYPE HTML><html><body>Need more space <em>s </em>!<br><br></body></html>',
wantText: 'Need more space s !\n\n',
},
multipleNewLinesAtBeginning: {
'multipleNewLinesAtBeginning': {
description: 'Multiple new lines and paragraphs at the beginning should be preserved',
input: '<html><body><br><br><p></p><p></p>first line<br><br>second line<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body><br><br><br><br>first line<br><br>second line<br><br></body></html>',
expectedText: '\n\n\n\nfirst line\n\nsecond line\n\n'
wantHTML: '<!DOCTYPE HTML><html><body><br><br><br><br>first line<br><br>second line<br><br></body></html>',
wantText: '\n\n\n\nfirst line\n\nsecond line\n\n',
},
multiLineParagraph:{
description: "A paragraph with multiple lines should not loose spaces when lines are combined",
input:`<html><body>
'multiLineParagraph': {
description: 'A paragraph with multiple lines should not loose spaces when lines are combined',
input: `<html><body>
<p>
а б в г ґ д е є ж з и і ї й к л м н о
п р с т у ф х ц ч ш щ ю я ь
</p>
</body></html>`,
expectedHTML: '<!DOCTYPE HTML><html><body>&#1072; &#1073; &#1074; &#1075; &#1169; &#1076; &#1077; &#1108; &#1078; &#1079; &#1080; &#1110; &#1111; &#1081; &#1082; &#1083; &#1084; &#1085; &#1086; &#1087; &#1088; &#1089; &#1090; &#1091; &#1092; &#1093; &#1094; &#1095; &#1096; &#1097; &#1102; &#1103; &#1100;<br><br></body></html>',
expectedText: 'а б в г ґ д е є ж з и і ї й к л м н о п р с т у ф х ц ч ш щ ю я ь\n\n'
wantHTML: '<!DOCTYPE HTML><html><body>&#1072; &#1073; &#1074; &#1075; &#1169; &#1076; &#1077; &#1108; &#1078; &#1079; &#1080; &#1110; &#1111; &#1081; &#1082; &#1083; &#1084; &#1085; &#1086; &#1087; &#1088; &#1089; &#1090; &#1091; &#1092; &#1093; &#1094; &#1095; &#1096; &#1097; &#1102; &#1103; &#1100;<br><br></body></html>',
wantText: 'а б в г ґ д е є ж з и і ї й к л м н о п р с т у ф х ц ч ш щ ю я ь\n\n',
},
multiLineParagraphWithPre:{
//XXX why is there &nbsp; before "in"?
description: "lines in preformatted text should be kept intact",
input:`<html><body>
'multiLineParagraphWithPre': {
// XXX why is there &nbsp; before "in"?
description: 'lines in preformatted text should be kept intact',
input: `<html><body>
<p>
а б в г ґ д е є ж з и і ї й к л м н о<pre>multiple
lines
@ -190,55 +187,55 @@ const testImports = {
</pre></p><p>п р с т у ф х ц ч ш щ ю я
ь</p>
</body></html>`,
expectedHTML: '<!DOCTYPE HTML><html><body>&#1072; &#1073; &#1074; &#1075; &#1169; &#1076; &#1077; &#1108; &#1078; &#1079; &#1080; &#1110; &#1111; &#1081; &#1082; &#1083; &#1084; &#1085; &#1086;<br>multiple<br>&nbsp;&nbsp; lines<br>&nbsp;in<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; pre<br><br>&#1087; &#1088; &#1089; &#1090; &#1091; &#1092; &#1093; &#1094; &#1095; &#1096; &#1097; &#1102; &#1103; &#1100;<br><br></body></html>',
expectedText: 'а б в г ґ д е є ж з и і ї й к л м н о\nmultiple\n lines\n in\n pre\n\nп р с т у ф х ц ч ш щ ю я ь\n\n'
wantHTML: '<!DOCTYPE HTML><html><body>&#1072; &#1073; &#1074; &#1075; &#1169; &#1076; &#1077; &#1108; &#1078; &#1079; &#1080; &#1110; &#1111; &#1081; &#1082; &#1083; &#1084; &#1085; &#1086;<br>multiple<br>&nbsp;&nbsp; lines<br>&nbsp;in<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; pre<br><br>&#1087; &#1088; &#1089; &#1090; &#1091; &#1092; &#1093; &#1094; &#1095; &#1096; &#1097; &#1102; &#1103; &#1100;<br><br></body></html>',
wantText: 'а б в г ґ д е є ж з и і ї й к л м н о\nmultiple\n lines\n in\n pre\n\nп р с т у ф х ц ч ш щ ю я ь\n\n',
},
preIntroducesASpace: {
description: "pre should be on a new line not preceeded by a space",
input:`<html><body><p>
'preIntroducesASpace': {
description: 'pre should be on a new line not preceeded by a space',
input: `<html><body><p>
1
<pre>preline
</pre></p></body></html>`,
expectedHTML: '<!DOCTYPE HTML><html><body>1<br>preline<br><br><br></body></html>',
expectedText: '1\npreline\n\n\n'
wantHTML: '<!DOCTYPE HTML><html><body>1<br>preline<br><br><br></body></html>',
wantText: '1\npreline\n\n\n',
},
dontDeleteSpaceInsideElements: {
'dontDeleteSpaceInsideElements': {
description: 'Preserve spaces inside elements',
input: '<html><body>Need<span> more </span>space<i> s </i>!<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>Need more space<em> s </em>!<br><br></body></html>',
expectedText: 'Need more space s !\n\n'
wantHTML: '<!DOCTYPE HTML><html><body>Need more space<em> s </em>!<br><br></body></html>',
wantText: 'Need more space s !\n\n',
},
dontDeleteSpaceOutsideElements: {
'dontDeleteSpaceOutsideElements': {
description: 'Preserve spaces outside elements',
input: '<html><body>Need <span>more</span> space <i>s</i> !<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>Need more space <em>s</em> !<br><br></body></html>',
expectedText: 'Need more space s !\n\n'
wantHTML: '<!DOCTYPE HTML><html><body>Need more space <em>s</em> !<br><br></body></html>',
wantText: 'Need more space s !\n\n',
},
dontDeleteSpaceAtEndOfElement: {
'dontDeleteSpaceAtEndOfElement': {
description: 'Preserve spaces at the end of an element',
input: '<html><body>Need <span>more </span>space <i>s </i>!<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>Need more space <em>s </em>!<br><br></body></html>',
expectedText: 'Need more space s !\n\n'
wantHTML: '<!DOCTYPE HTML><html><body>Need more space <em>s </em>!<br><br></body></html>',
wantText: 'Need more space s !\n\n',
},
dontDeleteSpaceAtBeginOfElements: {
'dontDeleteSpaceAtBeginOfElements': {
description: 'Preserve spaces at the start of an element',
input: '<html><body>Need<span> more</span> space<i> s</i> !<br></body></html>',
expectedHTML: '<!DOCTYPE HTML><html><body>Need more space<em> s</em> !<br><br></body></html>',
expectedText: 'Need more space s !\n\n'
wantHTML: '<!DOCTYPE HTML><html><body>Need more space<em> s</em> !<br><br></body></html>',
wantText: 'Need more space s !\n\n',
},
};
describe(__filename, function () {
Object.keys(testImports).forEach((testName) => {
const testPadId = makeid();
const test = testImports[testName];
if (test.disabled) {
return xit(`DISABLED: ${testName}`, function (done) {
done();
});
}
describe(`createPad ${testName}`, function () {
it('creates a new Pad', function (done) {
describe(testName, function () {
const testPadId = makeid();
const test = testImports[testName];
if (test.disabled) {
return xit(`DISABLED: ${testName}`, function (done) {
done();
});
}
it('createPad', function (done) {
api.get(`${endPoint('createPad')}&padID=${testPadId}`)
.expect((res) => {
if (res.body.code !== 0) throw new Error('Unable to create new Pad');
@ -246,10 +243,8 @@ describe(__filename, function () {
.expect('Content-Type', /json/)
.expect(200, done);
});
});
describe(`setHTML ${testName}`, function () {
it('Sets the HTML', function (done) {
it('setHTML', function (done) {
api.get(`${endPoint('setHTML')}&padID=${testPadId}&html=${encodeURIComponent(test.input)}`)
.expect((res) => {
if (res.body.code !== 0) throw new Error(`Error:${testName}`);
@ -257,23 +252,21 @@ describe(__filename, function () {
.expect('Content-Type', /json/)
.expect(200, done);
});
});
describe(`getHTML ${testName}`, function () {
it('Gets back the HTML of a Pad', function (done) {
it('getHTML', function (done) {
api.get(`${endPoint('getHTML')}&padID=${testPadId}`)
.expect((res) => {
const receivedHtml = res.body.data.html;
if (receivedHtml !== test.expectedHTML) {
const gotHtml = res.body.data.html;
if (gotHtml !== test.wantHTML) {
throw new Error(`HTML received from export is not the one we were expecting.
Test Name:
${testName}
Received:
${JSON.stringify(receivedHtml)}
Got:
${JSON.stringify(gotHtml)}
Expected:
${JSON.stringify(test.expectedHTML)}
Want:
${JSON.stringify(test.wantHTML)}
Which is a different version of the originally imported one:
${test.input}`);
@ -282,23 +275,21 @@ describe(__filename, function () {
.expect('Content-Type', /json/)
.expect(200, done);
});
});
describe(`getText ${testName}`, function () {
it('Gets back the Text of a Pad', function (done) {
it('getText', function (done) {
api.get(`${endPoint('getText')}&padID=${testPadId}`)
.expect((res) => {
const receivedText = res.body.data.text;
if (receivedText !== test.expectedText) {
const gotText = res.body.data.text;
if (gotText !== test.wantText) {
throw new Error(`Text received from export is not the one we were expecting.
Test Name:
${testName}
Received:
${JSON.stringify(receivedText)}
Got:
${JSON.stringify(gotText)}
Expected:
${JSON.stringify(test.expectedText)}
Want:
${JSON.stringify(test.wantText)}
Which is a different version of the originally imported one:
${test.input}`);
@ -315,7 +306,7 @@ describe(__filename, function () {
function endPoint(point, version) {
version = version || apiVersion;
return `/api/${version}/${point}?apikey=${apiKey}`;
};
}
function makeid() {
let text = '';

View file

@ -1,34 +1,34 @@
'use strict';
/* eslint-disable max-len */
/*
* While importexport tests target the `setHTML` API endpoint, which is nearly identical to what happens
* when a user manually imports a document via the UI, the contentcollector tests here don't use rehype to process
* the document. Rehype removes spaces and newĺines were applicable, so the expected results here can
* differ from importexport.js.
* While importexport tests target the `setHTML` API endpoint, which is nearly identical to what
* happens when a user manually imports a document via the UI, the contentcollector tests here don't
* use rehype to process the document. Rehype removes spaces and newĺines were applicable, so the
* expected results here can differ from importexport.js.
*
* If you add tests here, please also add them to importexport.js
*/
const contentcollector = require('../../../src/static/js/contentcollector');
const AttributePool = require('../../../src/static/js/AttributePool');
const cheerio = require('../../../src/node_modules/cheerio');
const AttributePool = require('ep_etherpad-lite/static/js/AttributePool');
const assert = require('assert').strict;
const cheerio = require('ep_etherpad-lite/node_modules/cheerio');
const contentcollector = require('ep_etherpad-lite/static/js/contentcollector');
const tests = {
nestedLi: {
description: 'Complex nested Li',
html: '<!doctype html><html><body><ol><li>one</li><li><ol><li>1.1</li></ol></li><li>two</li></ol></body></html>',
expectedLineAttribs: [
wantLineAttribs: [
'*0*1*2*3+1+3', '*0*4*2*5+1+3', '*0*1*2*5+1+3',
],
expectedText: [
wantText: [
'*one', '*1.1', '*two',
],
},
complexNest: {
description: 'Complex list of different types',
html: '<!doctype html><html><body><ul class="bullet"><li>one</li><li>two</li><li>0</li><li>1</li><li>2<ul class="bullet"><li>3</li><li>4</li></ul></li></ul><ol class="number"><li>item<ol class="number"><li>item1</li><li>item2</li></ol></li></ol></body></html>',
expectedLineAttribs: [
wantLineAttribs: [
'*0*1*2+1+3',
'*0*1*2+1+3',
'*0*1*2+1+1',
@ -40,7 +40,7 @@ const tests = {
'*0*6*2*7+1+5',
'*0*6*2*7+1+5',
],
expectedText: [
wantText: [
'*one',
'*two',
'*0',
@ -56,148 +56,142 @@ const tests = {
ul: {
description: 'Tests if uls properly get attributes',
html: '<html><body><ul><li>a</li><li>b</li></ul><div>div</div><p>foo</p></body></html>',
expectedLineAttribs: ['*0*1*2+1+1', '*0*1*2+1+1', '+3', '+3'],
expectedText: ['*a', '*b', 'div', 'foo'],
wantLineAttribs: ['*0*1*2+1+1', '*0*1*2+1+1', '+3', '+3'],
wantText: ['*a', '*b', 'div', 'foo'],
},
ulIndented: {
description: 'Tests if indented uls properly get attributes',
html: '<html><body><ul><li>a</li><ul><li>b</li></ul><li>a</li></ul><p>foo</p></body></html>',
expectedLineAttribs: ['*0*1*2+1+1', '*0*3*2+1+1', '*0*1*2+1+1', '+3'],
expectedText: ['*a', '*b', '*a', 'foo'],
wantLineAttribs: ['*0*1*2+1+1', '*0*3*2+1+1', '*0*1*2+1+1', '+3'],
wantText: ['*a', '*b', '*a', 'foo'],
},
ol: {
description: 'Tests if ols properly get line numbers when in a normal OL',
html: '<html><body><ol><li>a</li><li>b</li><li>c</li></ol><p>test</p></body></html>',
expectedLineAttribs: ['*0*1*2*3+1+1', '*0*1*2*3+1+1', '*0*1*2*3+1+1', '+4'],
expectedText: ['*a', '*b', '*c', 'test'],
wantLineAttribs: ['*0*1*2*3+1+1', '*0*1*2*3+1+1', '*0*1*2*3+1+1', '+4'],
wantText: ['*a', '*b', '*c', 'test'],
noteToSelf: 'Ensure empty P does not induce line attribute marker, wont this break the editor?',
},
lineDoBreakInOl: {
description: 'A single completely empty line break within an ol should reset count if OL is closed off..',
html: '<html><body><ol><li>should be 1</li></ol><p>hello</p><ol><li>should be 1</li><li>should be 2</li></ol><p></p></body></html>',
expectedLineAttribs: ['*0*1*2*3+1+b', '+5', '*0*1*2*4+1+b', '*0*1*2*4+1+b', ''],
expectedText: ['*should be 1', 'hello', '*should be 1', '*should be 2', ''],
wantLineAttribs: ['*0*1*2*3+1+b', '+5', '*0*1*2*4+1+b', '*0*1*2*4+1+b', ''],
wantText: ['*should be 1', 'hello', '*should be 1', '*should be 2', ''],
noteToSelf: "Shouldn't include attribute marker in the <p> line",
},
bulletListInOL: {
description: 'A bullet within an OL should not change numbering..',
html: '<html><body><ol><li>should be 1</li><ul><li>should be a bullet</li></ul><li>should be 2</li></ol><p></p></body></html>',
expectedLineAttribs: ['*0*1*2*3+1+b', '*0*4*2*3+1+i', '*0*1*2*5+1+b', ''],
expectedText: ['*should be 1', '*should be a bullet', '*should be 2', ''],
},
testP: {
description: 'A single <p></p> should create a new line',
html: '<html><body><p></p><p></p></body></html>',
expectedLineAttribs: ['', ''],
expectedText: ['', ''],
wantLineAttribs: ['', ''],
wantText: ['', ''],
noteToSelf: '<p></p>should create a line break but not break numbering',
},
nestedOl: {
description: 'Tests if ols properly get line numbers when in a normal OL',
html: '<html><body>a<ol><li>b<ol><li>c</li></ol></ol>notlist<p>foo</p></body></html>',
expectedLineAttribs: ['+1', '*0*1*2*3+1+1', '*0*4*2*5+1+1', '+7', '+3'],
expectedText: ['a', '*b', '*c', 'notlist', 'foo'],
wantLineAttribs: ['+1', '*0*1*2*3+1+1', '*0*4*2*5+1+1', '+7', '+3'],
wantText: ['a', '*b', '*c', 'notlist', 'foo'],
noteToSelf: 'Ensure empty P does not induce line attribute marker, wont this break the editor?',
},
nestedOl2: {
description: 'First item being an UL then subsequent being OL will fail',
html: '<html><body><ul><li>a<ol><li>b</li><li>c</li></ol></li></ul></body></html>',
expectedLineAttribs: ['+1', '*0*1*2*3+1+1', '*0*4*2*5+1+1'],
expectedText: ['a', '*b', '*c'],
wantLineAttribs: ['+1', '*0*1*2*3+1+1', '*0*4*2*5+1+1'],
wantText: ['a', '*b', '*c'],
noteToSelf: 'Ensure empty P does not induce line attribute marker, wont this break the editor?',
disabled: true,
},
lineDontBreakOL: {
description: 'A single completely empty line break within an ol should NOT reset count',
html: '<html><body><ol><li>should be 1</li><p></p><li>should be 2</li><li>should be 3</li></ol><p></p></body></html>',
expectedLineAttribs: [],
expectedText: ['*should be 1', '*should be 2', '*should be 3'],
wantLineAttribs: [],
wantText: ['*should be 1', '*should be 2', '*should be 3'],
noteToSelf: "<p></p>should create a line break but not break numbering -- This is what I can't get working!",
disabled: true,
},
ignoreAnyTagsOutsideBody: {
description: 'Content outside body should be ignored',
html: '<html><head><title>title</title><style></style></head><body>empty<br></body></html>',
expectedLineAttribs: ['+5'],
expectedText: ['empty'],
wantLineAttribs: ['+5'],
wantText: ['empty'],
},
lineWithMultipleSpaces: {
description: 'Multiple spaces should be preserved',
html: '<html><body>Text with more than one space.<br></body></html>',
expectedLineAttribs: [ '+10' ],
expectedText: ['Text with more than one space.']
wantLineAttribs: ['+10'],
wantText: ['Text with more than one space.'],
},
lineWithMultipleNonBreakingAndNormalSpaces: {
description: 'non-breaking and normal space should be preserved',
html: '<html><body>Text&nbsp;with&nbsp; more&nbsp;&nbsp;&nbsp;than &nbsp;one space.<br></body></html>',
expectedLineAttribs: [ '+10' ],
expectedText: ['Text with more than one space.']
wantLineAttribs: ['+10'],
wantText: ['Text with more than one space.'],
},
multiplenbsp: {
description: 'Multiple nbsp should be preserved',
html: '<html><body>&nbsp;&nbsp;<br></body></html>',
expectedLineAttribs: [ '+2' ],
expectedText: [' ']
wantLineAttribs: ['+2'],
wantText: [' '],
},
multipleNonBreakingSpaceBetweenWords: {
description: 'Multiple nbsp between words ',
html: '<html><body>&nbsp;&nbsp;word1&nbsp;&nbsp;word2&nbsp;&nbsp;&nbsp;word3<br></body></html>',
expectedLineAttribs: [ '+m' ],
expectedText: [' word1 word2 word3']
wantLineAttribs: ['+m'],
wantText: [' word1 word2 word3'],
},
nonBreakingSpacePreceededBySpaceBetweenWords: {
description: 'A non-breaking space preceeded by a normal space',
html: '<html><body> &nbsp;word1 &nbsp;word2 &nbsp;word3<br></body></html>',
expectedLineAttribs: [ '+l' ],
expectedText: [' word1 word2 word3']
wantLineAttribs: ['+l'],
wantText: [' word1 word2 word3'],
},
nonBreakingSpaceFollowededBySpaceBetweenWords: {
description: 'A non-breaking space followed by a normal space',
html: '<html><body>&nbsp; word1&nbsp; word2&nbsp; word3<br></body></html>',
expectedLineAttribs: [ '+l' ],
expectedText: [' word1 word2 word3']
wantLineAttribs: ['+l'],
wantText: [' word1 word2 word3'],
},
spacesAfterNewline: {
description: 'Don\'t collapse spaces that follow a newline',
html:'<!doctype html><html><body>something<br> something<br></body></html>',
expectedLineAttribs: ['+9', '+m'],
expectedText: ['something', ' something']
html: '<!doctype html><html><body>something<br> something<br></body></html>',
wantLineAttribs: ['+9', '+m'],
wantText: ['something', ' something'],
},
spacesAfterNewlineP: {
description: 'Don\'t collapse spaces that follow a empty paragraph',
html:'<!doctype html><html><body>something<p></p> something<br></body></html>',
expectedLineAttribs: ['+9', '', '+m'],
expectedText: ['something', '', ' something']
html: '<!doctype html><html><body>something<p></p> something<br></body></html>',
wantLineAttribs: ['+9', '', '+m'],
wantText: ['something', '', ' something'],
},
spacesAtEndOfLine: {
description: 'Don\'t collapse spaces that preceed/follow a newline',
html:'<html><body>something <br> something<br></body></html>',
expectedLineAttribs: ['+l', '+m'],
expectedText: ['something ', ' something']
html: '<html><body>something <br> something<br></body></html>',
wantLineAttribs: ['+l', '+m'],
wantText: ['something ', ' something'],
},
spacesAtEndOfLineP: {
description: 'Don\'t collapse spaces that preceed/follow a empty paragraph',
html:'<html><body>something <p></p> something<br></body></html>',
expectedLineAttribs: ['+l', '', '+m'],
expectedText: ['something ', '', ' something']
html: '<html><body>something <p></p> something<br></body></html>',
wantLineAttribs: ['+l', '', '+m'],
wantText: ['something ', '', ' something'],
},
nonBreakingSpacesAfterNewlines: {
description: 'Don\'t collapse non-breaking spaces that follow a newline',
html:'<html><body>something<br>&nbsp;&nbsp;&nbsp;something<br></body></html>',
expectedLineAttribs: ['+9', '+c'],
expectedText: ['something', ' something']
html: '<html><body>something<br>&nbsp;&nbsp;&nbsp;something<br></body></html>',
wantLineAttribs: ['+9', '+c'],
wantText: ['something', ' something'],
},
nonBreakingSpacesAfterNewlinesP: {
description: 'Don\'t collapse non-breaking spaces that follow a paragraph',
html:'<html><body>something<p></p>&nbsp;&nbsp;&nbsp;something<br></body></html>',
expectedLineAttribs: ['+9', '', '+c'],
expectedText: ['something', '', ' something']
html: '<html><body>something<p></p>&nbsp;&nbsp;&nbsp;something<br></body></html>',
wantLineAttribs: ['+9', '', '+c'],
wantText: ['something', '', ' something'],
},
preserveSpacesInsideElements: {
description: 'Preserve all spaces when multiple are present',
html: '<html><body>Need <span> more </span> space<i> s </i> !<br></body></html>',
expectedLineAttribs: ['+h*0+4+2'],
expectedText: ['Need more space s !'],
wantLineAttribs: ['+h*0+4+2'],
wantText: ['Need more space s !'],
},
preserveSpacesAcrossNewlines: {
description: 'Newlines and multiple spaces across newlines should be preserved',
@ -207,69 +201,76 @@ const tests = {
space
<i> s </i>
!<br></body></html>`,
expectedLineAttribs: [ '+19*0+4+b' ],
expectedText: [ 'Need more space s !' ]
wantLineAttribs: ['+19*0+4+b'],
wantText: ['Need more space s !'],
},
multipleNewLinesAtBeginning: {
description: 'Multiple new lines at the beginning should be preserved',
html: '<html><body><br><br><p></p><p></p>first line<br><br>second line<br></body></html>',
expectedLineAttribs: ['', '', '', '', '+a', '', '+b'],
expectedText: [ '', '', '', '', 'first line', '', 'second line']
wantLineAttribs: ['', '', '', '', '+a', '', '+b'],
wantText: ['', '', '', '', 'first line', '', 'second line'],
},
multiLineParagraph:{
description: "A paragraph with multiple lines should not loose spaces when lines are combined",
html:`<html><body><p>
multiLineParagraph: {
description: 'A paragraph with multiple lines should not loose spaces when lines are combined',
html: `<html><body><p>
а б в г ґ д е є ж з и і ї й к л м н о
п р с т у ф х ц ч ш щ ю я ь</p>
</body></html>`,
expectedLineAttribs: [ '+1t' ],
expectedText: ["а б в г ґ д е є ж з и і ї й к л м н о п р с т у ф х ц ч ш щ ю я ь"]
wantLineAttribs: ['+1t'],
wantText: ['а б в г ґ д е є ж з и і ї й к л м н о п р с т у ф х ц ч ш щ ю я ь'],
},
multiLineParagraphWithPre:{
description: "lines in preformatted text should be kept intact",
html:`<html><body><p>
а б в г ґ д е є ж з и і ї й к л м н о<pre>multiple
multiLineParagraphWithPre: {
description: 'lines in preformatted text should be kept intact',
html: `<html><body><p>
а б в г ґ д е є ж з и і ї й к л м н о</p><pre>multiple
lines
in
pre
</pre></p><p>п р с т у ф х ц ч ш щ ю я
</pre><p>п р с т у ф х ц ч ш щ ю я
ь</p>
</body></html>`,
expectedLineAttribs: [ '+11', '+8', '+5', '+2', '+3', '+r' ],
expectedText: ['а б в г ґ д е є ж з и і ї й к л м н о', 'multiple', 'lines', 'in', 'pre', 'п р с т у ф х ц ч ш щ ю я ь']
wantLineAttribs: ['+11', '+8', '+5', '+2', '+3', '+r'],
wantText: [
'а б в г ґ д е є ж з и і ї й к л м н о',
'multiple',
'lines',
'in',
'pre',
'п р с т у ф х ц ч ш щ ю я ь',
],
},
preIntroducesASpace: {
description: "pre should be on a new line not preceeded by a space",
html:`<html><body><p>
description: 'pre should be on a new line not preceeded by a space',
html: `<html><body><p>
1
<pre>preline
</pre></p></body></html>`,
expectedLineAttribs: [ '+6', '+7' ],
expectedText: [' 1 ', 'preline']
</p><pre>preline
</pre></body></html>`,
wantLineAttribs: ['+6', '+7'],
wantText: [' 1 ', 'preline'],
},
dontDeleteSpaceInsideElements: {
description: 'Preserve spaces on the beginning and end of a element',
html: '<html><body>Need<span> more </span>space<i> s </i>!<br></body></html>',
expectedLineAttribs: ['+f*0+3+1'],
expectedText: ['Need more space s !']
wantLineAttribs: ['+f*0+3+1'],
wantText: ['Need more space s !'],
},
dontDeleteSpaceOutsideElements: {
description: 'Preserve spaces outside elements',
html: '<html><body>Need <span>more</span> space <i>s</i> !<br></body></html>',
expectedLineAttribs: ['+g*0+1+2'],
expectedText: ['Need more space s !']
wantLineAttribs: ['+g*0+1+2'],
wantText: ['Need more space s !'],
},
dontDeleteSpaceAtEndOfElement: {
description: 'Preserve spaces at the end of an element',
html: '<html><body>Need <span>more </span>space <i>s </i>!<br></body></html>',
expectedLineAttribs: ['+g*0+2+1'],
expectedText: ['Need more space s !']
wantLineAttribs: ['+g*0+2+1'],
wantText: ['Need more space s !'],
},
dontDeleteSpaceAtBeginOfElements: {
description: 'Preserve spaces at the start of an element',
html: '<html><body>Need<span> more</span> space<i> s</i> !<br></body></html>',
expectedLineAttribs: ['+f*0+2+2'],
expectedText: ['Need more space s !']
wantLineAttribs: ['+f*0+2+2'],
wantText: ['Need more space s !'],
},
};
@ -283,7 +284,7 @@ describe(__filename, function () {
});
}
it(testObj.description, function (done) {
it(testObj.description, async function () {
const $ = cheerio.load(testObj.html); // Load HTML into Cheerio
const doc = $('body')[0]; // Creates a dom-like representation of HTML
// Create an empty attribute pool
@ -293,29 +294,13 @@ describe(__filename, function () {
const cc = contentcollector.makeContentCollector(true, null, apool);
cc.collectContent(doc);
const result = cc.finish();
const recievedAttributes = result.lineAttribs;
const expectedAttributes = testObj.expectedLineAttribs;
const recievedText = new Array(result.lines);
const expectedText = testObj.expectedText;
const gotAttributes = result.lineAttribs;
const wantAttributes = testObj.wantLineAttribs;
const gotText = new Array(result.lines);
const wantText = testObj.wantText;
// Check recieved text matches the expected text
if (arraysEqual(recievedText[0], expectedText)) {
// console.log("PASS: Recieved Text did match Expected Text\nRecieved:", recievedText[0], "\nExpected:", testObj.expectedText)
} else {
console.error('FAIL: Recieved Text did not match Expected Text\nRecieved:', recievedText[0], '\nExpected:', testObj.expectedText);
throw new Error();
}
// Check recieved attributes matches the expected attributes
if (arraysEqual(recievedAttributes, expectedAttributes)) {
// console.log("PASS: Recieved Attributes matched Expected Attributes");
done();
} else {
console.error('FAIL', test, testObj.description);
console.error('FAIL: Recieved Attributes did not match Expected Attributes\nRecieved: ', recievedAttributes, '\nExpected: ', expectedAttributes);
console.error('FAILING HTML', testObj.html);
throw new Error();
}
assert.deepEqual(gotText[0], wantText);
assert.deepEqual(gotAttributes, wantAttributes);
});
});
}
@ -325,7 +310,7 @@ describe(__filename, function () {
function arraysEqual(a, b) {
if (a === b) return true;
if (a == null || b == null) return false;
if (a.length != b.length) return false;
if (a.length !== b.length) return false;
// If you don't care about the order of the elements inside
// the array, you should sort both arrays here.