etherpad-lite/src/tests/backend/specs/contentcollector.js
John McLear 2ea8ea1275 restructure: move bin/ and tests/ to src/
Also add symlinks from the old `bin/` and `tests/` locations to avoid
breaking scripts and other tools.

Motivations:

  * Scripts and tests no longer have to do dubious things like:

        require('ep_etherpad-lite/node_modules/foo')

    to access packages installed as dependencies in
    `src/package.json`.

  * Plugins can access the backend test helper library in a non-hacky
    way:

        require('ep_etherpad-lite/tests/backend/common')

  * We can delete the top-level `package.json` without breaking our
    ability to lint the files in `bin/` and `tests/`.

    Deleting the top-level `package.json` has downsides: It will cause
    `npm` to print warnings whenever plugins are installed, npm will
    no longer be able to enforce a plugin's peer dependency on
    ep_etherpad-lite, and npm will keep deleting the
    `node_modules/ep_etherpad-lite` symlink that points to `../src`.

    But there are significant upsides to deleting the top-level
    `package.json`: It will drastically speed up plugin installation
    because `npm` doesn't have to recursively walk the dependencies in
    `src/package.json`. Also, deleting the top-level `package.json`
    avoids npm's horrible dependency hoisting behavior (where it moves
    stuff from `src/node_modules/` to the top-level `node_modules/`
    directory). Dependency hoisting causes numerous mysterious
    problems such as silent failures in `npm outdated` and `npm
    update`. Dependency hoisting also breaks plugins that do:

        require('ep_etherpad-lite/node_modules/foo')
2021-02-04 17:15:08 -05:00

324 lines
13 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

'use strict';
/*
* While importexport tests target the `setHTML` API endpoint, which is nearly identical to what
* happens when a user manually imports a document via the UI, the contentcollector tests here don't
* use rehype to process the document. Rehype removes spaces and newĺines were applicable, so the
* expected results here can differ from importexport.js.
*
* If you add tests here, please also add them to importexport.js
*/
const AttributePool = require('../../../static/js/AttributePool');
const assert = require('assert').strict;
const cheerio = require('cheerio');
const contentcollector = require('../../../static/js/contentcollector');
const tests = {
nestedLi: {
description: 'Complex nested Li',
html: '<!doctype html><html><body><ol><li>one</li><li><ol><li>1.1</li></ol></li><li>two</li></ol></body></html>',
wantLineAttribs: [
'*0*1*2*3+1+3', '*0*4*2*5+1+3', '*0*1*2*5+1+3',
],
wantText: [
'*one', '*1.1', '*two',
],
},
complexNest: {
description: 'Complex list of different types',
html: '<!doctype html><html><body><ul class="bullet"><li>one</li><li>two</li><li>0</li><li>1</li><li>2<ul class="bullet"><li>3</li><li>4</li></ul></li></ul><ol class="number"><li>item<ol class="number"><li>item1</li><li>item2</li></ol></li></ol></body></html>',
wantLineAttribs: [
'*0*1*2+1+3',
'*0*1*2+1+3',
'*0*1*2+1+1',
'*0*1*2+1+1',
'*0*1*2+1+1',
'*0*3*2+1+1',
'*0*3*2+1+1',
'*0*4*2*5+1+4',
'*0*6*2*7+1+5',
'*0*6*2*7+1+5',
],
wantText: [
'*one',
'*two',
'*0',
'*1',
'*2',
'*3',
'*4',
'*item',
'*item1',
'*item2',
],
},
ul: {
description: 'Tests if uls properly get attributes',
html: '<html><body><ul><li>a</li><li>b</li></ul><div>div</div><p>foo</p></body></html>',
wantLineAttribs: ['*0*1*2+1+1', '*0*1*2+1+1', '+3', '+3'],
wantText: ['*a', '*b', 'div', 'foo'],
},
ulIndented: {
description: 'Tests if indented uls properly get attributes',
html: '<html><body><ul><li>a</li><ul><li>b</li></ul><li>a</li></ul><p>foo</p></body></html>',
wantLineAttribs: ['*0*1*2+1+1', '*0*3*2+1+1', '*0*1*2+1+1', '+3'],
wantText: ['*a', '*b', '*a', 'foo'],
},
ol: {
description: 'Tests if ols properly get line numbers when in a normal OL',
html: '<html><body><ol><li>a</li><li>b</li><li>c</li></ol><p>test</p></body></html>',
wantLineAttribs: ['*0*1*2*3+1+1', '*0*1*2*3+1+1', '*0*1*2*3+1+1', '+4'],
wantText: ['*a', '*b', '*c', 'test'],
noteToSelf: 'Ensure empty P does not induce line attribute marker, wont this break the editor?',
},
lineDoBreakInOl: {
description: 'A single completely empty line break within an ol should reset count if OL is closed off..',
html: '<html><body><ol><li>should be 1</li></ol><p>hello</p><ol><li>should be 1</li><li>should be 2</li></ol><p></p></body></html>',
wantLineAttribs: ['*0*1*2*3+1+b', '+5', '*0*1*2*4+1+b', '*0*1*2*4+1+b', ''],
wantText: ['*should be 1', 'hello', '*should be 1', '*should be 2', ''],
noteToSelf: "Shouldn't include attribute marker in the <p> line",
},
testP: {
description: 'A single <p></p> should create a new line',
html: '<html><body><p></p><p></p></body></html>',
wantLineAttribs: ['', ''],
wantText: ['', ''],
noteToSelf: '<p></p>should create a line break but not break numbering',
},
nestedOl: {
description: 'Tests if ols properly get line numbers when in a normal OL',
html: '<html><body>a<ol><li>b<ol><li>c</li></ol></ol>notlist<p>foo</p></body></html>',
wantLineAttribs: ['+1', '*0*1*2*3+1+1', '*0*4*2*5+1+1', '+7', '+3'],
wantText: ['a', '*b', '*c', 'notlist', 'foo'],
noteToSelf: 'Ensure empty P does not induce line attribute marker, wont this break the editor?',
},
nestedOl2: {
description: 'First item being an UL then subsequent being OL will fail',
html: '<html><body><ul><li>a<ol><li>b</li><li>c</li></ol></li></ul></body></html>',
wantLineAttribs: ['+1', '*0*1*2*3+1+1', '*0*4*2*5+1+1'],
wantText: ['a', '*b', '*c'],
noteToSelf: 'Ensure empty P does not induce line attribute marker, wont this break the editor?',
disabled: true,
},
lineDontBreakOL: {
description: 'A single completely empty line break within an ol should NOT reset count',
html: '<html><body><ol><li>should be 1</li><p></p><li>should be 2</li><li>should be 3</li></ol><p></p></body></html>',
wantLineAttribs: [],
wantText: ['*should be 1', '*should be 2', '*should be 3'],
noteToSelf: "<p></p>should create a line break but not break numbering -- This is what I can't get working!",
disabled: true,
},
ignoreAnyTagsOutsideBody: {
description: 'Content outside body should be ignored',
html: '<html><head><title>title</title><style></style></head><body>empty<br></body></html>',
wantLineAttribs: ['+5'],
wantText: ['empty'],
},
lineWithMultipleSpaces: {
description: 'Multiple spaces should be preserved',
html: '<html><body>Text with more than one space.<br></body></html>',
wantLineAttribs: ['+10'],
wantText: ['Text with more than one space.'],
},
lineWithMultipleNonBreakingAndNormalSpaces: {
description: 'non-breaking and normal space should be preserved',
html: '<html><body>Text&nbsp;with&nbsp; more&nbsp;&nbsp;&nbsp;than &nbsp;one space.<br></body></html>',
wantLineAttribs: ['+10'],
wantText: ['Text with more than one space.'],
},
multiplenbsp: {
description: 'Multiple nbsp should be preserved',
html: '<html><body>&nbsp;&nbsp;<br></body></html>',
wantLineAttribs: ['+2'],
wantText: [' '],
},
multipleNonBreakingSpaceBetweenWords: {
description: 'Multiple nbsp between words ',
html: '<html><body>&nbsp;&nbsp;word1&nbsp;&nbsp;word2&nbsp;&nbsp;&nbsp;word3<br></body></html>',
wantLineAttribs: ['+m'],
wantText: [' word1 word2 word3'],
},
nonBreakingSpacePreceededBySpaceBetweenWords: {
description: 'A non-breaking space preceded by a normal space',
html: '<html><body> &nbsp;word1 &nbsp;word2 &nbsp;word3<br></body></html>',
wantLineAttribs: ['+l'],
wantText: [' word1 word2 word3'],
},
nonBreakingSpaceFollowededBySpaceBetweenWords: {
description: 'A non-breaking space followed by a normal space',
html: '<html><body>&nbsp; word1&nbsp; word2&nbsp; word3<br></body></html>',
wantLineAttribs: ['+l'],
wantText: [' word1 word2 word3'],
},
spacesAfterNewline: {
description: 'Don\'t collapse spaces that follow a newline',
html: '<!doctype html><html><body>something<br> something<br></body></html>',
wantLineAttribs: ['+9', '+m'],
wantText: ['something', ' something'],
},
spacesAfterNewlineP: {
description: 'Don\'t collapse spaces that follow a empty paragraph',
html: '<!doctype html><html><body>something<p></p> something<br></body></html>',
wantLineAttribs: ['+9', '', '+m'],
wantText: ['something', '', ' something'],
},
spacesAtEndOfLine: {
description: 'Don\'t collapse spaces that preceed/follow a newline',
html: '<html><body>something <br> something<br></body></html>',
wantLineAttribs: ['+l', '+m'],
wantText: ['something ', ' something'],
},
spacesAtEndOfLineP: {
description: 'Don\'t collapse spaces that preceed/follow a empty paragraph',
html: '<html><body>something <p></p> something<br></body></html>',
wantLineAttribs: ['+l', '', '+m'],
wantText: ['something ', '', ' something'],
},
nonBreakingSpacesAfterNewlines: {
description: 'Don\'t collapse non-breaking spaces that follow a newline',
html: '<html><body>something<br>&nbsp;&nbsp;&nbsp;something<br></body></html>',
wantLineAttribs: ['+9', '+c'],
wantText: ['something', ' something'],
},
nonBreakingSpacesAfterNewlinesP: {
description: 'Don\'t collapse non-breaking spaces that follow a paragraph',
html: '<html><body>something<p></p>&nbsp;&nbsp;&nbsp;something<br></body></html>',
wantLineAttribs: ['+9', '', '+c'],
wantText: ['something', '', ' something'],
},
preserveSpacesInsideElements: {
description: 'Preserve all spaces when multiple are present',
html: '<html><body>Need <span> more </span> space<i> s </i> !<br></body></html>',
wantLineAttribs: ['+h*0+4+2'],
wantText: ['Need more space s !'],
},
preserveSpacesAcrossNewlines: {
description: 'Newlines and multiple spaces across newlines should be preserved',
html: `
<html><body>Need
<span> more </span>
space
<i> s </i>
!<br></body></html>`,
wantLineAttribs: ['+19*0+4+b'],
wantText: ['Need more space s !'],
},
multipleNewLinesAtBeginning: {
description: 'Multiple new lines at the beginning should be preserved',
html: '<html><body><br><br><p></p><p></p>first line<br><br>second line<br></body></html>',
wantLineAttribs: ['', '', '', '', '+a', '', '+b'],
wantText: ['', '', '', '', 'first line', '', 'second line'],
},
multiLineParagraph: {
description: 'A paragraph with multiple lines should not loose spaces when lines are combined',
html: `<html><body><p>
а б в г ґ д е є ж з и і ї й к л м н о
п р с т у ф х ц ч ш щ ю я ь</p>
</body></html>`,
wantLineAttribs: ['+1t'],
wantText: ['а б в г ґ д е є ж з и і ї й к л м н о п р с т у ф х ц ч ш щ ю я ь'],
},
multiLineParagraphWithPre: {
description: 'lines in preformatted text should be kept intact',
html: `<html><body><p>
а б в г ґ д е є ж з и і ї й к л м н о</p><pre>multiple
lines
in
pre
</pre><p>п р с т у ф х ц ч ш щ ю я
ь</p>
</body></html>`,
wantLineAttribs: ['+11', '+8', '+5', '+2', '+3', '+r'],
wantText: [
'а б в г ґ д е є ж з и і ї й к л м н о',
'multiple',
'lines',
'in',
'pre',
'п р с т у ф х ц ч ш щ ю я ь',
],
},
preIntroducesASpace: {
description: 'pre should be on a new line not preceded by a space',
html: `<html><body><p>
1
</p><pre>preline
</pre></body></html>`,
wantLineAttribs: ['+6', '+7'],
wantText: [' 1 ', 'preline'],
},
dontDeleteSpaceInsideElements: {
description: 'Preserve spaces on the beginning and end of a element',
html: '<html><body>Need<span> more </span>space<i> s </i>!<br></body></html>',
wantLineAttribs: ['+f*0+3+1'],
wantText: ['Need more space s !'],
},
dontDeleteSpaceOutsideElements: {
description: 'Preserve spaces outside elements',
html: '<html><body>Need <span>more</span> space <i>s</i> !<br></body></html>',
wantLineAttribs: ['+g*0+1+2'],
wantText: ['Need more space s !'],
},
dontDeleteSpaceAtEndOfElement: {
description: 'Preserve spaces at the end of an element',
html: '<html><body>Need <span>more </span>space <i>s </i>!<br></body></html>',
wantLineAttribs: ['+g*0+2+1'],
wantText: ['Need more space s !'],
},
dontDeleteSpaceAtBeginOfElements: {
description: 'Preserve spaces at the start of an element',
html: '<html><body>Need<span> more</span> space<i> s</i> !<br></body></html>',
wantLineAttribs: ['+f*0+2+2'],
wantText: ['Need more space s !'],
},
};
describe(__filename, function () {
for (const test of Object.keys(tests)) {
const testObj = tests[test];
describe(test, function () {
if (testObj.disabled) {
return xit('DISABLED:', test, function (done) {
done();
});
}
it(testObj.description, async function () {
const $ = cheerio.load(testObj.html); // Load HTML into Cheerio
const doc = $('body')[0]; // Creates a dom-like representation of HTML
// Create an empty attribute pool
const apool = new AttributePool();
// Convert a dom tree into a list of lines and attribute liens
// using the content collector object
const cc = contentcollector.makeContentCollector(true, null, apool);
cc.collectContent(doc);
const result = cc.finish();
const gotAttributes = result.lineAttribs;
const wantAttributes = testObj.wantLineAttribs;
const gotText = new Array(result.lines);
const wantText = testObj.wantText;
assert.deepEqual(gotText[0], wantText);
assert.deepEqual(gotAttributes, wantAttributes);
});
});
}
});
function arraysEqual(a, b) {
if (a === b) return true;
if (a == null || b == null) return false;
if (a.length !== b.length) return false;
// If you don't care about the order of the elements inside
// the array, you should sort both arrays here.
// Please note that calling sort on an array will modify that array.
// you might want to clone your array first.
for (let i = 0; i < a.length; ++i) {
if (a[i] !== b[i]) return false;
}
return true;
}