etherpad-lite/src/tests/backend/specs/contentcollector.js

391 lines
13 KiB
JavaScript
Raw Normal View History

2020-12-18 06:40:33 +01:00
'use strict';
2020-12-20 07:18:19 +01:00
/*
* While importexport tests target the `setHTML` API endpoint, which is nearly identical to what
* happens when a user manually imports a document via the UI, the contentcollector tests here don't
* use rehype to process the document. Rehype removes spaces and newĺines were applicable, so the
* expected results here can differ from importexport.js.
2020-12-20 07:18:19 +01:00
*
* If you add tests here, please also add them to importexport.js
*/
2020-12-18 06:40:33 +01:00
2021-02-03 12:08:43 +00:00
const AttributePool = require('../../../static/js/AttributePool');
2021-11-19 05:16:11 -05:00
const Changeset = require('../../../static/js/Changeset');
const assert = require('assert').strict;
const attributes = require('../../../static/js/attributes');
2021-02-03 12:08:43 +00:00
const contentcollector = require('../../../static/js/contentcollector');
const jsdom = require('jsdom');
2021-11-19 05:16:11 -05:00
// All test case `wantAlines` values must only refer to attributes in this list so that the
// attribute numbers do not change due to changes in pool insertion order.
const knownAttribs = [
['insertorder', 'first'],
['italic', 'true'],
['list', 'bullet1'],
['list', 'bullet2'],
['list', 'number1'],
['list', 'number2'],
['lmkr', '1'],
['start', '1'],
['start', '2'],
];
const testCases = [
{
description: 'Simple',
html: '<html><body><p>foo</p></body></html>',
wantAlines: ['+3'],
wantText: ['foo'],
},
{
description: 'Line starts with asterisk',
html: '<html><body><p>*foo</p></body></html>',
wantAlines: ['+4'],
wantText: ['*foo'],
},
{
description: 'Complex nested Li',
html: '<!doctype html><html><body><ol><li>one</li><li><ol><li>1.1</li></ol></li><li>two</li></ol></body></html>',
2021-11-19 05:16:11 -05:00
wantAlines: [
'*0*4*6*7+1+3',
'*0*5*6*8+1+3',
'*0*4*6*8+1+3',
],
wantText: [
'*one', '*1.1', '*two',
],
},
2021-11-19 05:16:11 -05:00
{
description: 'Complex list of different types',
html: '<!doctype html><html><body><ul class="bullet"><li>one</li><li>two</li><li>0</li><li>1</li><li>2<ul class="bullet"><li>3</li><li>4</li></ul></li></ul><ol class="number"><li>item<ol class="number"><li>item1</li><li>item2</li></ol></li></ol></body></html>',
2021-11-19 05:16:11 -05:00
wantAlines: [
'*0*2*6+1+3',
'*0*2*6+1+3',
'*0*2*6+1+1',
'*0*2*6+1+1',
'*0*2*6+1+1',
'*0*3*6+1+1',
'*0*3*6+1+1',
'*0*4*6*7+1+4',
'*0*5*6*8+1+5',
'*0*5*6*8+1+5',
],
wantText: [
'*one',
'*two',
'*0',
'*1',
'*2',
'*3',
'*4',
'*item',
'*item1',
'*item2',
],
},
2021-11-19 05:16:11 -05:00
{
description: 'Tests if uls properly get attributes',
html: '<html><body><ul><li>a</li><li>b</li></ul><div>div</div><p>foo</p></body></html>',
2021-11-19 05:16:11 -05:00
wantAlines: [
'*0*2*6+1+1',
'*0*2*6+1+1',
'+3',
'+3',
],
wantText: ['*a', '*b', 'div', 'foo'],
},
2021-11-19 05:16:11 -05:00
{
description: 'Tests if indented uls properly get attributes',
html: '<html><body><ul><li>a</li><ul><li>b</li></ul><li>a</li></ul><p>foo</p></body></html>',
2021-11-19 05:16:11 -05:00
wantAlines: [
'*0*2*6+1+1',
'*0*3*6+1+1',
'*0*2*6+1+1',
'+3',
],
wantText: ['*a', '*b', '*a', 'foo'],
},
2021-11-19 05:16:11 -05:00
{
description: 'Tests if ols properly get line numbers when in a normal OL',
html: '<html><body><ol><li>a</li><li>b</li><li>c</li></ol><p>test</p></body></html>',
2021-11-19 05:16:11 -05:00
wantAlines: [
'*0*4*6*7+1+1',
'*0*4*6*7+1+1',
'*0*4*6*7+1+1',
'+4',
],
wantText: ['*a', '*b', '*c', 'test'],
noteToSelf: 'Ensure empty P does not induce line attribute marker, wont this break the editor?',
},
2021-11-19 05:16:11 -05:00
{
description: 'A single completely empty line break within an ol should reset count if OL is closed off..',
html: '<html><body><ol><li>should be 1</li></ol><p>hello</p><ol><li>should be 1</li><li>should be 2</li></ol><p></p></body></html>',
2021-11-19 05:16:11 -05:00
wantAlines: [
'*0*4*6*7+1+b',
'+5',
'*0*4*6*8+1+b',
'*0*4*6*8+1+b',
'',
],
wantText: ['*should be 1', 'hello', '*should be 1', '*should be 2', ''],
noteToSelf: "Shouldn't include attribute marker in the <p> line",
},
2021-11-19 05:16:11 -05:00
{
description: 'A single <p></p> should create a new line',
html: '<html><body><p></p><p></p></body></html>',
2021-11-19 05:16:11 -05:00
wantAlines: ['', ''],
wantText: ['', ''],
noteToSelf: '<p></p>should create a line break but not break numbering',
},
2021-11-19 05:16:11 -05:00
{
description: 'Tests if ols properly get line numbers when in a normal OL #2',
html: '<html><body>a<ol><li>b<ol><li>c</li></ol></ol>notlist<p>foo</p></body></html>',
2021-11-19 05:16:11 -05:00
wantAlines: [
'+1',
'*0*4*6*7+1+1',
'*0*5*6*8+1+1',
'+7',
'+3',
],
wantText: ['a', '*b', '*c', 'notlist', 'foo'],
noteToSelf: 'Ensure empty P does not induce line attribute marker, wont this break the editor?',
},
2021-11-19 05:16:11 -05:00
{
description: 'First item being an UL then subsequent being OL will fail',
html: '<html><body><ul><li>a<ol><li>b</li><li>c</li></ol></li></ul></body></html>',
2021-11-19 05:16:11 -05:00
wantAlines: ['+1', '*0*1*2*3+1+1', '*0*4*2*5+1+1'],
wantText: ['a', '*b', '*c'],
noteToSelf: 'Ensure empty P does not induce line attribute marker, wont this break the editor?',
disabled: true,
},
2021-11-19 05:16:11 -05:00
{
description: 'A single completely empty line break within an ol should NOT reset count',
html: '<html><body><ol><li>should be 1</li><p></p><li>should be 2</li><li>should be 3</li></ol><p></p></body></html>',
2021-11-19 05:16:11 -05:00
wantAlines: [],
wantText: ['*should be 1', '*should be 2', '*should be 3'],
noteToSelf: "<p></p>should create a line break but not break numbering -- This is what I can't get working!",
disabled: true,
},
2021-11-19 05:16:11 -05:00
{
description: 'Content outside body should be ignored',
html: '<html><head><title>title</title><style></style></head><body>empty<br></body></html>',
2021-11-19 05:16:11 -05:00
wantAlines: ['+5'],
wantText: ['empty'],
},
2021-11-19 05:16:11 -05:00
{
2020-12-20 07:18:19 +01:00
description: 'Multiple spaces should be preserved',
html: '<html><body>Text with more than one space.<br></body></html>',
2021-11-19 05:16:11 -05:00
wantAlines: ['+10'],
wantText: ['Text with more than one space.'],
2020-12-20 07:18:19 +01:00
},
2021-11-19 05:16:11 -05:00
{
2020-12-20 07:18:19 +01:00
description: 'non-breaking and normal space should be preserved',
html: '<html><body>Text&nbsp;with&nbsp; more&nbsp;&nbsp;&nbsp;than &nbsp;one space.<br></body></html>',
2021-11-19 05:16:11 -05:00
wantAlines: ['+10'],
wantText: ['Text with more than one space.'],
2020-12-20 07:18:19 +01:00
},
2021-11-19 05:16:11 -05:00
{
2020-12-20 07:18:19 +01:00
description: 'Multiple nbsp should be preserved',
html: '<html><body>&nbsp;&nbsp;<br></body></html>',
2021-11-19 05:16:11 -05:00
wantAlines: ['+2'],
wantText: [' '],
2020-12-20 07:18:19 +01:00
},
2021-11-19 05:16:11 -05:00
{
2020-12-20 07:18:19 +01:00
description: 'Multiple nbsp between words ',
html: '<html><body>&nbsp;&nbsp;word1&nbsp;&nbsp;word2&nbsp;&nbsp;&nbsp;word3<br></body></html>',
2021-11-19 05:16:11 -05:00
wantAlines: ['+m'],
wantText: [' word1 word2 word3'],
2020-12-20 07:18:19 +01:00
},
2021-11-19 05:16:11 -05:00
{
2021-02-03 00:30:07 +01:00
description: 'A non-breaking space preceded by a normal space',
2020-12-20 07:18:19 +01:00
html: '<html><body> &nbsp;word1 &nbsp;word2 &nbsp;word3<br></body></html>',
2021-11-19 05:16:11 -05:00
wantAlines: ['+l'],
wantText: [' word1 word2 word3'],
2020-12-20 07:18:19 +01:00
},
2021-11-19 05:16:11 -05:00
{
2020-12-20 07:18:19 +01:00
description: 'A non-breaking space followed by a normal space',
html: '<html><body>&nbsp; word1&nbsp; word2&nbsp; word3<br></body></html>',
2021-11-19 05:16:11 -05:00
wantAlines: ['+l'],
wantText: [' word1 word2 word3'],
2020-12-20 07:18:19 +01:00
},
2021-11-19 05:16:11 -05:00
{
2020-12-20 07:18:19 +01:00
description: 'Don\'t collapse spaces that follow a newline',
html: '<!doctype html><html><body>something<br> something<br></body></html>',
2021-11-19 05:16:11 -05:00
wantAlines: ['+9', '+m'],
wantText: ['something', ' something'],
2020-12-20 07:18:19 +01:00
},
2021-11-19 05:16:11 -05:00
{
2020-12-20 07:18:19 +01:00
description: 'Don\'t collapse spaces that follow a empty paragraph',
html: '<!doctype html><html><body>something<p></p> something<br></body></html>',
2021-11-19 05:16:11 -05:00
wantAlines: ['+9', '', '+m'],
wantText: ['something', '', ' something'],
2020-12-20 07:18:19 +01:00
},
2021-11-19 05:16:11 -05:00
{
2020-12-20 07:18:19 +01:00
description: 'Don\'t collapse spaces that preceed/follow a newline',
html: '<html><body>something <br> something<br></body></html>',
2021-11-19 05:16:11 -05:00
wantAlines: ['+l', '+m'],
wantText: ['something ', ' something'],
2020-12-20 07:18:19 +01:00
},
2021-11-19 05:16:11 -05:00
{
2020-12-20 07:18:19 +01:00
description: 'Don\'t collapse spaces that preceed/follow a empty paragraph',
html: '<html><body>something <p></p> something<br></body></html>',
2021-11-19 05:16:11 -05:00
wantAlines: ['+l', '', '+m'],
wantText: ['something ', '', ' something'],
2020-12-20 07:18:19 +01:00
},
2021-11-19 05:16:11 -05:00
{
2020-12-20 07:18:19 +01:00
description: 'Don\'t collapse non-breaking spaces that follow a newline',
html: '<html><body>something<br>&nbsp;&nbsp;&nbsp;something<br></body></html>',
2021-11-19 05:16:11 -05:00
wantAlines: ['+9', '+c'],
wantText: ['something', ' something'],
2020-12-20 07:18:19 +01:00
},
2021-11-19 05:16:11 -05:00
{
2020-12-20 07:18:19 +01:00
description: 'Don\'t collapse non-breaking spaces that follow a paragraph',
html: '<html><body>something<p></p>&nbsp;&nbsp;&nbsp;something<br></body></html>',
2021-11-19 05:16:11 -05:00
wantAlines: ['+9', '', '+c'],
wantText: ['something', '', ' something'],
2020-12-20 07:18:19 +01:00
},
2021-11-19 05:16:11 -05:00
{
2020-12-20 07:18:19 +01:00
description: 'Preserve all spaces when multiple are present',
html: '<html><body>Need <span> more </span> space<i> s </i> !<br></body></html>',
2021-11-19 05:16:11 -05:00
wantAlines: ['+h*1+4+2'],
wantText: ['Need more space s !'],
2020-12-20 07:18:19 +01:00
},
2021-11-19 05:16:11 -05:00
{
2020-12-20 07:18:19 +01:00
description: 'Newlines and multiple spaces across newlines should be preserved',
html: `
<html><body>Need
<span> more </span>
space
<i> s </i>
!<br></body></html>`,
2021-11-19 05:16:11 -05:00
wantAlines: ['+19*1+4+b'],
wantText: ['Need more space s !'],
2020-12-20 07:18:19 +01:00
},
2021-11-19 05:16:11 -05:00
{
2020-12-20 07:18:19 +01:00
description: 'Multiple new lines at the beginning should be preserved',
html: '<html><body><br><br><p></p><p></p>first line<br><br>second line<br></body></html>',
2021-11-19 05:16:11 -05:00
wantAlines: ['', '', '', '', '+a', '', '+b'],
wantText: ['', '', '', '', 'first line', '', 'second line'],
2020-12-20 07:18:19 +01:00
},
2021-11-19 05:16:11 -05:00
{
description: 'A paragraph with multiple lines should not loose spaces when lines are combined',
html: `<html><body><p>
2020-12-20 07:18:19 +01:00
а б в г ґ д е є ж з и і ї й к л м н о
п р с т у ф х ц ч ш щ ю я ь</p>
</body></html>`,
2021-11-19 05:16:11 -05:00
wantAlines: ['+1t'],
wantText: ['а б в г ґ д е є ж з и і ї й к л м н о п р с т у ф х ц ч ш щ ю я ь'],
2020-12-20 07:18:19 +01:00
},
2021-11-19 05:16:11 -05:00
{
description: 'lines in preformatted text should be kept intact',
html: `<html><body><p>
а б в г ґ д е є ж з и і ї й к л м н о</p><pre>multiple
2020-12-20 07:18:19 +01:00
lines
in
pre
</pre><p>п р с т у ф х ц ч ш щ ю я
2020-12-20 07:18:19 +01:00
ь</p>
</body></html>`,
2021-11-19 05:16:11 -05:00
wantAlines: ['+11', '+8', '+5', '+2', '+3', '+r'],
wantText: [
'а б в г ґ д е є ж з и і ї й к л м н о',
'multiple',
'lines',
'in',
'pre',
'п р с т у ф х ц ч ш щ ю я ь',
],
2020-12-20 07:18:19 +01:00
},
2021-11-19 05:16:11 -05:00
{
2021-02-03 00:30:07 +01:00
description: 'pre should be on a new line not preceded by a space',
html: `<html><body><p>
2020-12-20 07:18:19 +01:00
1
</p><pre>preline
</pre></body></html>`,
2021-11-19 05:16:11 -05:00
wantAlines: ['+6', '+7'],
wantText: [' 1 ', 'preline'],
2020-12-20 07:18:19 +01:00
},
2021-11-19 05:16:11 -05:00
{
2020-12-20 07:18:19 +01:00
description: 'Preserve spaces on the beginning and end of a element',
html: '<html><body>Need<span> more </span>space<i> s </i>!<br></body></html>',
2021-11-19 05:16:11 -05:00
wantAlines: ['+f*1+3+1'],
wantText: ['Need more space s !'],
2020-12-20 07:18:19 +01:00
},
2021-11-19 05:16:11 -05:00
{
2020-12-20 07:18:19 +01:00
description: 'Preserve spaces outside elements',
html: '<html><body>Need <span>more</span> space <i>s</i> !<br></body></html>',
2021-11-19 05:16:11 -05:00
wantAlines: ['+g*1+1+2'],
wantText: ['Need more space s !'],
2020-12-20 07:18:19 +01:00
},
2021-11-19 05:16:11 -05:00
{
2020-12-20 07:18:19 +01:00
description: 'Preserve spaces at the end of an element',
html: '<html><body>Need <span>more </span>space <i>s </i>!<br></body></html>',
2021-11-19 05:16:11 -05:00
wantAlines: ['+g*1+2+1'],
wantText: ['Need more space s !'],
2020-12-20 07:18:19 +01:00
},
2021-11-19 05:16:11 -05:00
{
2020-12-20 07:18:19 +01:00
description: 'Preserve spaces at the start of an element',
html: '<html><body>Need<span> more</span> space<i> s</i> !<br></body></html>',
2021-11-19 05:16:11 -05:00
wantAlines: ['+f*1+2+2'],
wantText: ['Need more space s !'],
2020-12-20 07:18:19 +01:00
},
2021-11-19 05:16:11 -05:00
];
describe(__filename, function () {
2021-11-19 05:16:11 -05:00
for (const tc of testCases) {
describe(tc.description, function () {
let apool;
let result;
2021-11-19 05:16:11 -05:00
before(async function () {
if (tc.disabled) return this.skip();
const {window: {document}} = new jsdom.JSDOM(tc.html);
apool = new AttributePool();
// To reduce test fragility, the attribute pool is seeded with `knownAttribs`, and all
// attributes in `tc.wantAlines` must be in `knownAttribs`. (This guarantees that attribute
// numbers do not change if the attribute processing code changes.)
for (const attrib of knownAttribs) apool.putAttrib(attrib);
for (const aline of tc.wantAlines) {
const opIter = Changeset.opIterator(aline);
while (opIter.hasNext()) {
const op = opIter.next();
for (const n of attributes.decodeAttribString(op.attribs)) {
assert(n < knownAttribs.length);
}
2021-11-19 05:16:11 -05:00
}
}
const cc = contentcollector.makeContentCollector(true, null, apool);
cc.collectContent(document.body);
2021-11-19 05:16:11 -05:00
result = cc.finish();
});
2021-11-19 05:16:11 -05:00
it('text matches', async function () {
assert.deepEqual(result.lines, tc.wantText);
});
2021-11-19 05:16:11 -05:00
it('alines match', async function () {
assert.deepEqual(result.lineAttribs, tc.wantAlines);
});
2021-11-19 05:16:11 -05:00
it('attributes are sorted in canonical order', async function () {
const gotAttribs = [];
const wantAttribs = [];
for (const aline of result.lineAttribs) {
const gotAlineAttribs = [];
gotAttribs.push(gotAlineAttribs);
const wantAlineAttribs = [];
wantAttribs.push(wantAlineAttribs);
const opIter = Changeset.opIterator(aline);
while (opIter.hasNext()) {
const op = opIter.next();
const gotOpAttribs = [...attributes.attribsFromString(op.attribs, apool)];
2021-11-19 05:16:11 -05:00
gotAlineAttribs.push(gotOpAttribs);
wantAlineAttribs.push(attributes.sort([...gotOpAttribs]));
2021-11-19 05:16:11 -05:00
}
}
assert.deepEqual(gotAttribs, wantAttribs);
});
});
}
2021-11-19 05:16:11 -05:00
});