From 54a3dbb9a0c0f39c425c22201f7851992033c81b Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sun, 24 Jan 2021 00:56:11 -0500 Subject: [PATCH 01/26] lint: Fix some straightforward ESLint errors --- src/node/hooks/express/openapi.js | 6 +- tests/backend/specs/api/characterEncoding.js | 2 + tests/backend/specs/api/importexport.js | 134 +++++++++---------- tests/backend/specs/contentcollector.js | 114 ++++++++-------- 4 files changed, 131 insertions(+), 125 deletions(-) diff --git a/src/node/hooks/express/openapi.js b/src/node/hooks/express/openapi.js index 344913389..d7059896a 100644 --- a/src/node/hooks/express/openapi.js +++ b/src/node/hooks/express/openapi.js @@ -391,9 +391,9 @@ const defaultResponseRefs = { // convert to a dictionary of operation objects const operations = {}; -for (const resource in resources) { - for (const action in resources[resource]) { - const {operationId, responseSchema, ...operation} = resources[resource][action]; +for (const [resource, actions] of Object.entries(resources)) { + for (const [action, spec] of Object.entries(actions)) { + const {operationId, responseSchema, ...operation} = spec; // add response objects const responses = {...defaultResponseRefs}; diff --git a/tests/backend/specs/api/characterEncoding.js b/tests/backend/specs/api/characterEncoding.js index e17e90fe4..94c22307d 100644 --- a/tests/backend/specs/api/characterEncoding.js +++ b/tests/backend/specs/api/characterEncoding.js @@ -1,3 +1,5 @@ +'use strict'; + /* * This file is copied & modified from /tests/backend/specs/api/pad.js * diff --git a/tests/backend/specs/api/importexport.js b/tests/backend/specs/api/importexport.js index b7b45b261..fe61dfa12 100644 --- a/tests/backend/specs/api/importexport.js +++ b/tests/backend/specs/api/importexport.js @@ -6,13 +6,11 @@ * TODO: unify those two files, and merge in a single one. */ -/* eslint-disable max-len */ - const common = require('../../common'); -const supertest = require(`${__dirname}/../../../../src/node_modules/supertest`); -const settings = require(`${__dirname}/../../../../tests/container/loadSettings.js`).loadSettings(); -const api = supertest(`http://${settings.ip}:${settings.port}`); +const settings = require('../../../container/loadSettings.js').loadSettings(); +const supertest = require('ep_etherpad-lite/node_modules/supertest'); +const api = supertest(`http://${settings.ip}:${settings.port}`); const apiKey = common.apiKey; const apiVersion = 1; @@ -47,16 +45,16 @@ const testImports = { }, /* - "prefixcorrectlinenumber when introduced none list item - currently not supported see #3450":{ + "prefixcorrectlinenumber when introduced none list item - currently not supported see #3450": { input: '
  1. should be 1
  2. test
  3. should be 2
', expectedHTML: '
  1. should be 1
  2. test
  3. should be 2

', - expectedText: '\t1. should be 1\n\ttest\n\t2. should be 2\n\n' + expectedText: '\t1. should be 1\n\ttest\n\t2. should be 2\n\n', } , - "newlinesshouldntresetlinenumber #2194":{ + "newlinesshouldntresetlinenumber #2194": { input: '
  1. should be 1
  2. test
  3. should be 2
', expectedHTML: '
  1. should be 1
  2. test
  3. should be 2

', - expectedText: '\t1. should be 1\n\ttest\n\t2. should be 2\n\n' + expectedText: '\t1. should be 1\n\ttest\n\t2. should be 2\n\n', } */ 'ignoreAnyTagsOutsideBody': { @@ -69,88 +67,88 @@ const testImports = { description: 'Indented lists are represented with tabs and without bullets', input: '', expectedHTML: '
', - expectedText: '\tindent\n\tindent\n\n' + expectedText: '\tindent\n\tindent\n\n', }, - lineWithMultipleSpaces: { + 'lineWithMultipleSpaces': { description: 'Multiple spaces should be collapsed', input: 'Text with more than one space.
', expectedHTML: 'Text with more than one space.

', - expectedText: 'Text with more than one space.\n\n' + expectedText: 'Text with more than one space.\n\n', }, - lineWithMultipleNonBreakingAndNormalSpaces: { + 'lineWithMultipleNonBreakingAndNormalSpaces': { // XXX the HTML between "than" and "one" looks strange description: 'non-breaking space should be preserved, but can be replaced when it', input: 'Text with  more   than  one space.
', expectedHTML: 'Text with  more   than  one space.

', - expectedText: 'Text with more than one space.\n\n' + expectedText: 'Text with more than one space.\n\n', }, - multiplenbsp: { + 'multiplenbsp': { description: 'Multiple non-breaking space should be preserved', input: '  
', expectedHTML: '  

', - expectedText: ' \n\n' + expectedText: ' \n\n', }, - multipleNonBreakingSpaceBetweenWords: { + 'multipleNonBreakingSpaceBetweenWords': { description: 'A normal space is always inserted before a word', input: '  word1  word2   word3
', expectedHTML: '  word1  word2   word3

', - expectedText: ' word1 word2 word3\n\n' + expectedText: ' word1 word2 word3\n\n', }, - nonBreakingSpacePreceededBySpaceBetweenWords: { + 'nonBreakingSpacePreceededBySpaceBetweenWords': { description: 'A non-breaking space preceeded by a normal space', input: '  word1  word2  word3
', expectedHTML: ' word1  word2  word3

', - expectedText: ' word1 word2 word3\n\n' + expectedText: ' word1 word2 word3\n\n', }, - nonBreakingSpaceFollowededBySpaceBetweenWords: { + 'nonBreakingSpaceFollowededBySpaceBetweenWords': { description: 'A non-breaking space followed by a normal space', input: '  word1  word2  word3
', expectedHTML: '  word1  word2  word3

', - expectedText: ' word1 word2 word3\n\n' + expectedText: ' word1 word2 word3\n\n', }, - spacesAfterNewline: { + 'spacesAfterNewline': { description: 'Collapse spaces that follow a newline', - input:'something
something
', + input: 'something
something
', expectedHTML: 'something
something

', - expectedText: 'something\nsomething\n\n' + expectedText: 'something\nsomething\n\n', }, - spacesAfterNewlineP: { + 'spacesAfterNewlineP': { description: 'Collapse spaces that follow a paragraph', - input:'something

something
', + input: 'something

something
', expectedHTML: 'something

something

', - expectedText: 'something\n\nsomething\n\n' + expectedText: 'something\n\nsomething\n\n', }, - spacesAtEndOfLine: { + 'spacesAtEndOfLine': { description: 'Collapse spaces that preceed/follow a newline', - input:'something
something
', + input: 'something
something
', expectedHTML: 'something
something

', - expectedText: 'something\nsomething\n\n' + expectedText: 'something\nsomething\n\n', }, - spacesAtEndOfLineP: { + 'spacesAtEndOfLineP': { description: 'Collapse spaces that preceed/follow a paragraph', - input:'something

something
', + input: 'something

something
', expectedHTML: 'something

something

', - expectedText: 'something\n\nsomething\n\n' + expectedText: 'something\n\nsomething\n\n', }, - nonBreakingSpacesAfterNewlines: { + 'nonBreakingSpacesAfterNewlines': { description: 'Don\'t collapse non-breaking spaces that follow a newline', - input:'something
   something
', + input: 'something
   something
', expectedHTML: 'something
   something

', - expectedText: 'something\n something\n\n' + expectedText: 'something\n something\n\n', }, - nonBreakingSpacesAfterNewlinesP: { + 'nonBreakingSpacesAfterNewlinesP': { description: 'Don\'t collapse non-breaking spaces that follow a paragraph', - input:'something

   something
', + input: 'something

   something
', expectedHTML: 'something

   something

', - expectedText: 'something\n\n something\n\n' + expectedText: 'something\n\n something\n\n', }, - collapseSpacesInsideElements: { + 'collapseSpacesInsideElements': { description: 'Preserve only one space when multiple are present', input: 'Need more space s !
', expectedHTML: 'Need more space s !

', - expectedText: 'Need more space s !\n\n' + expectedText: 'Need more space s !\n\n', }, - collapseSpacesAcrossNewlines: { + 'collapseSpacesAcrossNewlines': { description: 'Newlines and multiple spaces across newlines should be collapsed', input: ` Need @@ -159,29 +157,29 @@ const testImports = { s !
`, expectedHTML: 'Need more space s !

', - expectedText: 'Need more space s !\n\n' + expectedText: 'Need more space s !\n\n', }, - multipleNewLinesAtBeginning: { + 'multipleNewLinesAtBeginning': { description: 'Multiple new lines and paragraphs at the beginning should be preserved', input: '

first line

second line
', expectedHTML: '



first line

second line

', - expectedText: '\n\n\n\nfirst line\n\nsecond line\n\n' + expectedText: '\n\n\n\nfirst line\n\nsecond line\n\n', }, - multiLineParagraph:{ - description: "A paragraph with multiple lines should not loose spaces when lines are combined", - input:` + 'multiLineParagraph': { + description: 'A paragraph with multiple lines should not loose spaces when lines are combined', + input: `

а б в г ґ д е є ж з и і ї й к л м н о п р с т у ф х ц ч ш щ ю я ь

`, expectedHTML: 'а б в г ґ д е є ж з и і ї й к л м н о п р с т у ф х ц ч ш щ ю я ь

', - expectedText: 'а б в г ґ д е є ж з и і ї й к л м н о п р с т у ф х ц ч ш щ ю я ь\n\n' + expectedText: 'а б в г ґ д е є ж з и і ї й к л м н о п р с т у ф х ц ч ш щ ю я ь\n\n', }, - multiLineParagraphWithPre:{ - //XXX why is there   before "in"? - description: "lines in preformatted text should be kept intact", - input:` + 'multiLineParagraphWithPre': { + // XXX why is there   before "in"? + description: 'lines in preformatted text should be kept intact', + input: `

а б в г ґ д е є ж з и і ї й к л м н о

multiple
    lines
@@ -191,40 +189,40 @@ const testImports = {
 ь

`, expectedHTML: 'а б в г ґ д е є ж з и і ї й к л м н о
multiple
   lines
 in
      pre

п р с т у ф х ц ч ш щ ю я ь

', - expectedText: 'а б в г ґ д е є ж з и і ї й к л м н о\nmultiple\n lines\n in\n pre\n\nп р с т у ф х ц ч ш щ ю я ь\n\n' + expectedText: 'а б в г ґ д е є ж з и і ї й к л м н о\nmultiple\n lines\n in\n pre\n\nп р с т у ф х ц ч ш щ ю я ь\n\n', }, - preIntroducesASpace: { - description: "pre should be on a new line not preceeded by a space", - input:`

+ 'preIntroducesASpace': { + description: 'pre should be on a new line not preceeded by a space', + input: `

1

preline
 

`, expectedHTML: '1
preline


', - expectedText: '1\npreline\n\n\n' + expectedText: '1\npreline\n\n\n', }, - dontDeleteSpaceInsideElements: { + 'dontDeleteSpaceInsideElements': { description: 'Preserve spaces inside elements', input: 'Need more space s !
', expectedHTML: 'Need more space s !

', - expectedText: 'Need more space s !\n\n' + expectedText: 'Need more space s !\n\n', }, - dontDeleteSpaceOutsideElements: { + 'dontDeleteSpaceOutsideElements': { description: 'Preserve spaces outside elements', input: 'Need more space s !
', expectedHTML: 'Need more space s !

', - expectedText: 'Need more space s !\n\n' + expectedText: 'Need more space s !\n\n', }, - dontDeleteSpaceAtEndOfElement: { + 'dontDeleteSpaceAtEndOfElement': { description: 'Preserve spaces at the end of an element', input: 'Need more space s !
', expectedHTML: 'Need more space s !

', - expectedText: 'Need more space s !\n\n' + expectedText: 'Need more space s !\n\n', }, - dontDeleteSpaceAtBeginOfElements: { + 'dontDeleteSpaceAtBeginOfElements': { description: 'Preserve spaces at the start of an element', input: 'Need more space s !
', expectedHTML: 'Need more space s !

', - expectedText: 'Need more space s !\n\n' + expectedText: 'Need more space s !\n\n', }, }; @@ -315,7 +313,7 @@ describe(__filename, function () { function endPoint(point, version) { version = version || apiVersion; return `/api/${version}/${point}?apikey=${apiKey}`; -}; +} function makeid() { let text = ''; diff --git a/tests/backend/specs/contentcollector.js b/tests/backend/specs/contentcollector.js index c85904917..83417966d 100644 --- a/tests/backend/specs/contentcollector.js +++ b/tests/backend/specs/contentcollector.js @@ -1,18 +1,17 @@ 'use strict'; -/* eslint-disable max-len */ /* - * While importexport tests target the `setHTML` API endpoint, which is nearly identical to what happens - * when a user manually imports a document via the UI, the contentcollector tests here don't use rehype to process - * the document. Rehype removes spaces and newĺines were applicable, so the expected results here can - * differ from importexport.js. + * While importexport tests target the `setHTML` API endpoint, which is nearly identical to what + * happens when a user manually imports a document via the UI, the contentcollector tests here don't + * use rehype to process the document. Rehype removes spaces and newĺines were applicable, so the + * expected results here can differ from importexport.js. * * If you add tests here, please also add them to importexport.js */ -const contentcollector = require('../../../src/static/js/contentcollector'); -const AttributePool = require('../../../src/static/js/AttributePool'); -const cheerio = require('../../../src/node_modules/cheerio'); +const AttributePool = require('ep_etherpad-lite/static/js/AttributePool'); +const cheerio = require('ep_etherpad-lite/node_modules/cheerio'); +const contentcollector = require('ep_etherpad-lite/static/js/contentcollector'); const tests = { nestedLi: { @@ -124,74 +123,74 @@ const tests = { lineWithMultipleSpaces: { description: 'Multiple spaces should be preserved', html: 'Text with more than one space.
', - expectedLineAttribs: [ '+10' ], - expectedText: ['Text with more than one space.'] + expectedLineAttribs: ['+10'], + expectedText: ['Text with more than one space.'], }, lineWithMultipleNonBreakingAndNormalSpaces: { description: 'non-breaking and normal space should be preserved', html: 'Text with  more   than  one space.
', - expectedLineAttribs: [ '+10' ], - expectedText: ['Text with more than one space.'] + expectedLineAttribs: ['+10'], + expectedText: ['Text with more than one space.'], }, multiplenbsp: { description: 'Multiple nbsp should be preserved', html: '  
', - expectedLineAttribs: [ '+2' ], - expectedText: [' '] + expectedLineAttribs: ['+2'], + expectedText: [' '], }, multipleNonBreakingSpaceBetweenWords: { description: 'Multiple nbsp between words ', html: '  word1  word2   word3
', - expectedLineAttribs: [ '+m' ], - expectedText: [' word1 word2 word3'] + expectedLineAttribs: ['+m'], + expectedText: [' word1 word2 word3'], }, nonBreakingSpacePreceededBySpaceBetweenWords: { description: 'A non-breaking space preceeded by a normal space', html: '  word1  word2  word3
', - expectedLineAttribs: [ '+l' ], - expectedText: [' word1 word2 word3'] + expectedLineAttribs: ['+l'], + expectedText: [' word1 word2 word3'], }, nonBreakingSpaceFollowededBySpaceBetweenWords: { description: 'A non-breaking space followed by a normal space', html: '  word1  word2  word3
', - expectedLineAttribs: [ '+l' ], - expectedText: [' word1 word2 word3'] + expectedLineAttribs: ['+l'], + expectedText: [' word1 word2 word3'], }, spacesAfterNewline: { description: 'Don\'t collapse spaces that follow a newline', - html:'something
something
', + html: 'something
something
', expectedLineAttribs: ['+9', '+m'], - expectedText: ['something', ' something'] + expectedText: ['something', ' something'], }, spacesAfterNewlineP: { description: 'Don\'t collapse spaces that follow a empty paragraph', - html:'something

something
', + html: 'something

something
', expectedLineAttribs: ['+9', '', '+m'], - expectedText: ['something', '', ' something'] + expectedText: ['something', '', ' something'], }, spacesAtEndOfLine: { description: 'Don\'t collapse spaces that preceed/follow a newline', - html:'something
something
', + html: 'something
something
', expectedLineAttribs: ['+l', '+m'], - expectedText: ['something ', ' something'] + expectedText: ['something ', ' something'], }, spacesAtEndOfLineP: { description: 'Don\'t collapse spaces that preceed/follow a empty paragraph', - html:'something

something
', + html: 'something

something
', expectedLineAttribs: ['+l', '', '+m'], - expectedText: ['something ', '', ' something'] + expectedText: ['something ', '', ' something'], }, nonBreakingSpacesAfterNewlines: { description: 'Don\'t collapse non-breaking spaces that follow a newline', - html:'something
   something
', + html: 'something
   something
', expectedLineAttribs: ['+9', '+c'], - expectedText: ['something', ' something'] + expectedText: ['something', ' something'], }, nonBreakingSpacesAfterNewlinesP: { description: 'Don\'t collapse non-breaking spaces that follow a paragraph', - html:'something

   something
', + html: 'something

   something
', expectedLineAttribs: ['+9', '', '+c'], - expectedText: ['something', '', ' something'] + expectedText: ['something', '', ' something'], }, preserveSpacesInsideElements: { description: 'Preserve all spaces when multiple are present', @@ -207,27 +206,27 @@ const tests = { space s !
`, - expectedLineAttribs: [ '+19*0+4+b' ], - expectedText: [ 'Need more space s !' ] + expectedLineAttribs: ['+19*0+4+b'], + expectedText: ['Need more space s !'], }, multipleNewLinesAtBeginning: { description: 'Multiple new lines at the beginning should be preserved', html: '

first line

second line
', expectedLineAttribs: ['', '', '', '', '+a', '', '+b'], - expectedText: [ '', '', '', '', 'first line', '', 'second line'] + expectedText: ['', '', '', '', 'first line', '', 'second line'], }, - multiLineParagraph:{ - description: "A paragraph with multiple lines should not loose spaces when lines are combined", - html:`

+ multiLineParagraph: { + description: 'A paragraph with multiple lines should not loose spaces when lines are combined', + html: `

а б в г ґ д е є ж з и і ї й к л м н о п р с т у ф х ц ч ш щ ю я ь

`, - expectedLineAttribs: [ '+1t' ], - expectedText: ["а б в г ґ д е є ж з и і ї й к л м н о п р с т у ф х ц ч ш щ ю я ь"] + expectedLineAttribs: ['+1t'], + expectedText: ['а б в г ґ д е є ж з и і ї й к л м н о п р с т у ф х ц ч ш щ ю я ь'], }, - multiLineParagraphWithPre:{ - description: "lines in preformatted text should be kept intact", - html:`

+ multiLineParagraphWithPre: { + description: 'lines in preformatted text should be kept intact', + html: `

а б в г ґ д е є ж з и і ї й к л м н о

multiple
 lines
 in
@@ -235,41 +234,48 @@ pre
 

п р с т у ф х ц ч ш щ ю я ь

`, - expectedLineAttribs: [ '+11', '+8', '+5', '+2', '+3', '+r' ], - expectedText: ['а б в г ґ д е є ж з и і ї й к л м н о', 'multiple', 'lines', 'in', 'pre', 'п р с т у ф х ц ч ш щ ю я ь'] + expectedLineAttribs: ['+11', '+8', '+5', '+2', '+3', '+r'], + expectedText: [ + 'а б в г ґ д е є ж з и і ї й к л м н о', + 'multiple', + 'lines', + 'in', + 'pre', + 'п р с т у ф х ц ч ш щ ю я ь', + ], }, preIntroducesASpace: { - description: "pre should be on a new line not preceeded by a space", - html:`

+ description: 'pre should be on a new line not preceeded by a space', + html: `

1

preline
 

`, - expectedLineAttribs: [ '+6', '+7' ], - expectedText: [' 1 ', 'preline'] + expectedLineAttribs: ['+6', '+7'], + expectedText: [' 1 ', 'preline'], }, dontDeleteSpaceInsideElements: { description: 'Preserve spaces on the beginning and end of a element', html: 'Need more space s !
', expectedLineAttribs: ['+f*0+3+1'], - expectedText: ['Need more space s !'] + expectedText: ['Need more space s !'], }, dontDeleteSpaceOutsideElements: { description: 'Preserve spaces outside elements', html: 'Need more space s !
', expectedLineAttribs: ['+g*0+1+2'], - expectedText: ['Need more space s !'] + expectedText: ['Need more space s !'], }, dontDeleteSpaceAtEndOfElement: { description: 'Preserve spaces at the end of an element', html: 'Need more space s !
', expectedLineAttribs: ['+g*0+2+1'], - expectedText: ['Need more space s !'] + expectedText: ['Need more space s !'], }, dontDeleteSpaceAtBeginOfElements: { description: 'Preserve spaces at the start of an element', html: 'Need more space s !
', expectedLineAttribs: ['+f*0+2+2'], - expectedText: ['Need more space s !'] + expectedText: ['Need more space s !'], }, }; @@ -325,7 +331,7 @@ describe(__filename, function () { function arraysEqual(a, b) { if (a === b) return true; if (a == null || b == null) return false; - if (a.length != b.length) return false; + if (a.length !== b.length) return false; // If you don't care about the order of the elements inside // the array, you should sort both arrays here. From 906b2624edb5e0729c1e42d7b851dc80fff640ca Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sun, 24 Jan 2021 01:28:11 -0500 Subject: [PATCH 02/26] tests: Re-enable import/export test that is now working --- tests/backend/specs/api/importexport.js | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/backend/specs/api/importexport.js b/tests/backend/specs/api/importexport.js index fe61dfa12..870a715b2 100644 --- a/tests/backend/specs/api/importexport.js +++ b/tests/backend/specs/api/importexport.js @@ -31,7 +31,6 @@ const testImports = { input: '
  • FOO
', expectedHTML: '
  • FOO

', expectedText: '\t* FOO\n\n', - disabled: true, }, 'prefixcorrectlinenumber': { input: '
  1. should be 1
  2. should be 2
', From 53160f4a21dba880df94461f2def288bcb43ab76 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sun, 24 Jan 2021 03:30:43 -0500 Subject: [PATCH 03/26] tests: Delete invalid contentcollector test The HTML spec doesn't allow `
    ` to be a child of `
      ` (it must be a child of `
    1. ` instead). --- tests/backend/specs/contentcollector.js | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/backend/specs/contentcollector.js b/tests/backend/specs/contentcollector.js index 83417966d..04970d74d 100644 --- a/tests/backend/specs/contentcollector.js +++ b/tests/backend/specs/contentcollector.js @@ -78,12 +78,6 @@ const tests = { expectedText: ['*should be 1', 'hello', '*should be 1', '*should be 2', ''], noteToSelf: "Shouldn't include attribute marker in the

      line", }, - bulletListInOL: { - description: 'A bullet within an OL should not change numbering..', - html: '

      1. should be 1
        • should be a bullet
      2. should be 2

      ', - expectedLineAttribs: ['*0*1*2*3+1+b', '*0*4*2*3+1+i', '*0*1*2*5+1+b', ''], - expectedText: ['*should be 1', '*should be a bullet', '*should be 2', ''], - }, testP: { description: 'A single

      should create a new line', html: '

      ', From 32a0df4883d17ea9e8f3871f2785c2916416c3b3 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sun, 24 Jan 2021 03:31:45 -0500 Subject: [PATCH 04/26] tests: Fix invalid HTML in contentcollector tests The HTML spec does not permit `
      ` as a child of `

      `. --- tests/backend/specs/contentcollector.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/backend/specs/contentcollector.js b/tests/backend/specs/contentcollector.js index 04970d74d..81ac08ed6 100644 --- a/tests/backend/specs/contentcollector.js +++ b/tests/backend/specs/contentcollector.js @@ -221,11 +221,11 @@ const tests = { multiLineParagraphWithPre: { description: 'lines in preformatted text should be kept intact', html: `

      -а б в г ґ д е є ж з и і ї й к л м н о

      multiple
      +а б в г ґ д е є ж з и і ї й к л м н о

      multiple
       lines
       in
       pre
      -

      п р с т у ф х ц ч ш щ ю я +

      п р с т у ф х ц ч ш щ ю я ь

      `, expectedLineAttribs: ['+11', '+8', '+5', '+2', '+3', '+r'], @@ -242,8 +242,8 @@ pre description: 'pre should be on a new line not preceeded by a space', html: `

      1 -

      preline
      -

      `, +

      preline
      +
      `, expectedLineAttribs: ['+6', '+7'], expectedText: [' 1 ', 'preline'], }, From dd815892f296c785b3ac98b7bb70b0f4ac6ef474 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sun, 24 Jan 2021 01:29:24 -0500 Subject: [PATCH 05/26] tests: Delete erroneous `describe()` calls `describe()` is meant to be used by independent tests, but the tests in this file are not independent. Add a higher-level `describe()` call and delete all of the `describe()` calls that wrap a single test. --- tests/backend/specs/api/importexport.js | 30 ++++++++++--------------- 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/tests/backend/specs/api/importexport.js b/tests/backend/specs/api/importexport.js index 870a715b2..fefd146f3 100644 --- a/tests/backend/specs/api/importexport.js +++ b/tests/backend/specs/api/importexport.js @@ -227,15 +227,15 @@ const testImports = { describe(__filename, function () { Object.keys(testImports).forEach((testName) => { - const testPadId = makeid(); - const test = testImports[testName]; - if (test.disabled) { - return xit(`DISABLED: ${testName}`, function (done) { - done(); - }); - } - describe(`createPad ${testName}`, function () { - it('creates a new Pad', function (done) { + describe(testName, function () { + const testPadId = makeid(); + const test = testImports[testName]; + if (test.disabled) { + return xit(`DISABLED: ${testName}`, function (done) { + done(); + }); + } + it('createPad', function (done) { api.get(`${endPoint('createPad')}&padID=${testPadId}`) .expect((res) => { if (res.body.code !== 0) throw new Error('Unable to create new Pad'); @@ -243,10 +243,8 @@ describe(__filename, function () { .expect('Content-Type', /json/) .expect(200, done); }); - }); - describe(`setHTML ${testName}`, function () { - it('Sets the HTML', function (done) { + it('setHTML', function (done) { api.get(`${endPoint('setHTML')}&padID=${testPadId}&html=${encodeURIComponent(test.input)}`) .expect((res) => { if (res.body.code !== 0) throw new Error(`Error:${testName}`); @@ -254,10 +252,8 @@ describe(__filename, function () { .expect('Content-Type', /json/) .expect(200, done); }); - }); - describe(`getHTML ${testName}`, function () { - it('Gets back the HTML of a Pad', function (done) { + it('getHTML', function (done) { api.get(`${endPoint('getHTML')}&padID=${testPadId}`) .expect((res) => { const receivedHtml = res.body.data.html; @@ -279,10 +275,8 @@ describe(__filename, function () { .expect('Content-Type', /json/) .expect(200, done); }); - }); - describe(`getText ${testName}`, function () { - it('Gets back the Text of a Pad', function (done) { + it('getText', function (done) { api.get(`${endPoint('getText')}&padID=${testPadId}`) .expect((res) => { const receivedText = res.body.data.text; From fc69ae78aafa2765731fd8410b2535613b50902c Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sun, 24 Jan 2021 01:34:28 -0500 Subject: [PATCH 06/26] tests: Use `assert.deepEqual()` to simplify equality checks --- tests/backend/specs/contentcollector.js | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/tests/backend/specs/contentcollector.js b/tests/backend/specs/contentcollector.js index 81ac08ed6..7014cac2a 100644 --- a/tests/backend/specs/contentcollector.js +++ b/tests/backend/specs/contentcollector.js @@ -10,6 +10,7 @@ */ const AttributePool = require('ep_etherpad-lite/static/js/AttributePool'); +const assert = require('assert').strict; const cheerio = require('ep_etherpad-lite/node_modules/cheerio'); const contentcollector = require('ep_etherpad-lite/static/js/contentcollector'); @@ -298,24 +299,8 @@ describe(__filename, function () { const recievedText = new Array(result.lines); const expectedText = testObj.expectedText; - // Check recieved text matches the expected text - if (arraysEqual(recievedText[0], expectedText)) { - // console.log("PASS: Recieved Text did match Expected Text\nRecieved:", recievedText[0], "\nExpected:", testObj.expectedText) - } else { - console.error('FAIL: Recieved Text did not match Expected Text\nRecieved:', recievedText[0], '\nExpected:', testObj.expectedText); - throw new Error(); - } - - // Check recieved attributes matches the expected attributes - if (arraysEqual(recievedAttributes, expectedAttributes)) { - // console.log("PASS: Recieved Attributes matched Expected Attributes"); - done(); - } else { - console.error('FAIL', test, testObj.description); - console.error('FAIL: Recieved Attributes did not match Expected Attributes\nRecieved: ', recievedAttributes, '\nExpected: ', expectedAttributes); - console.error('FAILING HTML', testObj.html); - throw new Error(); - } + assert.deepEqual(recievedText[0], expectedText); + assert.deepEqual(recievedAttributes, expectedAttributes); }); }); } From b164f9b4319fb06ea041feea51e1efc2fe272713 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sun, 24 Jan 2021 01:54:14 -0500 Subject: [PATCH 07/26] tests: Replace "expected" with "want", "received" with "got" "Got" and "want" are common terms for testing, plus this fixes a spelling mistake ("received" was misspelled as "recieved"). --- tests/backend/specs/api/importexport.js | 148 ++++++++++++------------ tests/backend/specs/contentcollector.js | 144 +++++++++++------------ 2 files changed, 146 insertions(+), 146 deletions(-) diff --git a/tests/backend/specs/api/importexport.js b/tests/backend/specs/api/importexport.js index fefd146f3..1a9e94332 100644 --- a/tests/backend/specs/api/importexport.js +++ b/tests/backend/specs/api/importexport.js @@ -17,135 +17,135 @@ const apiVersion = 1; const testImports = { 'malformed': { input: '
    2. wtf
', - expectedHTML: 'wtf

', - expectedText: 'wtf\n\n', + wantHTML: 'wtf

', + wantText: 'wtf\n\n', disabled: true, }, 'nonelistiteminlist #3620': { input: '
    test
  • FOO
', - expectedHTML: '
    test
  • FOO

', - expectedText: '\ttest\n\t* FOO\n\n', + wantHTML: '
    test
  • FOO

', + wantText: '\ttest\n\t* FOO\n\n', disabled: true, }, 'whitespaceinlist #3620': { input: '
  • FOO
', - expectedHTML: '
  • FOO

', - expectedText: '\t* FOO\n\n', + wantHTML: '
  • FOO

', + wantText: '\t* FOO\n\n', }, 'prefixcorrectlinenumber': { input: '
  1. should be 1
  2. should be 2
', - expectedHTML: '
  1. should be 1
  2. should be 2

', - expectedText: '\t1. should be 1\n\t2. should be 2\n\n', + wantHTML: '
  1. should be 1
  2. should be 2

', + wantText: '\t1. should be 1\n\t2. should be 2\n\n', }, 'prefixcorrectlinenumbernested': { input: '
  1. should be 1
    1. foo
  2. should be 2
', - expectedHTML: '
  1. should be 1
    1. foo
  2. should be 2

', - expectedText: '\t1. should be 1\n\t\t1.1. foo\n\t2. should be 2\n\n', + wantHTML: '
  1. should be 1
    1. foo
  2. should be 2

', + wantText: '\t1. should be 1\n\t\t1.1. foo\n\t2. should be 2\n\n', }, /* "prefixcorrectlinenumber when introduced none list item - currently not supported see #3450": { input: '
  1. should be 1
  2. test
  3. should be 2
', - expectedHTML: '
  1. should be 1
  2. test
  3. should be 2

', - expectedText: '\t1. should be 1\n\ttest\n\t2. should be 2\n\n', + wantHTML: '
  1. should be 1
  2. test
  3. should be 2

', + wantText: '\t1. should be 1\n\ttest\n\t2. should be 2\n\n', } , "newlinesshouldntresetlinenumber #2194": { input: '
  1. should be 1
  2. test
  3. should be 2
', - expectedHTML: '
  1. should be 1
  2. test
  3. should be 2

', - expectedText: '\t1. should be 1\n\ttest\n\t2. should be 2\n\n', + wantHTML: '
  1. should be 1
  2. test
  3. should be 2

', + wantText: '\t1. should be 1\n\ttest\n\t2. should be 2\n\n', } */ 'ignoreAnyTagsOutsideBody': { description: 'Content outside body should be ignored', input: 'titleempty
', - expectedHTML: 'empty

', - expectedText: 'empty\n\n', + wantHTML: 'empty

', + wantText: 'empty\n\n', }, 'indentedListsAreNotBullets': { description: 'Indented lists are represented with tabs and without bullets', input: '
  • indent
  • indent
', - expectedHTML: '
  • indent
  • indent

', - expectedText: '\tindent\n\tindent\n\n', + wantHTML: '
  • indent
  • indent

', + wantText: '\tindent\n\tindent\n\n', }, 'lineWithMultipleSpaces': { description: 'Multiple spaces should be collapsed', input: 'Text with more than one space.
', - expectedHTML: 'Text with more than one space.

', - expectedText: 'Text with more than one space.\n\n', + wantHTML: 'Text with more than one space.

', + wantText: 'Text with more than one space.\n\n', }, 'lineWithMultipleNonBreakingAndNormalSpaces': { // XXX the HTML between "than" and "one" looks strange description: 'non-breaking space should be preserved, but can be replaced when it', input: 'Text with  more   than  one space.
', - expectedHTML: 'Text with  more   than  one space.

', - expectedText: 'Text with more than one space.\n\n', + wantHTML: 'Text with  more   than  one space.

', + wantText: 'Text with more than one space.\n\n', }, 'multiplenbsp': { description: 'Multiple non-breaking space should be preserved', input: '  
', - expectedHTML: '  

', - expectedText: ' \n\n', + wantHTML: '  

', + wantText: ' \n\n', }, 'multipleNonBreakingSpaceBetweenWords': { description: 'A normal space is always inserted before a word', input: '  word1  word2   word3
', - expectedHTML: '  word1  word2   word3

', - expectedText: ' word1 word2 word3\n\n', + wantHTML: '  word1  word2   word3

', + wantText: ' word1 word2 word3\n\n', }, 'nonBreakingSpacePreceededBySpaceBetweenWords': { description: 'A non-breaking space preceeded by a normal space', input: '  word1  word2  word3
', - expectedHTML: ' word1  word2  word3

', - expectedText: ' word1 word2 word3\n\n', + wantHTML: ' word1  word2  word3

', + wantText: ' word1 word2 word3\n\n', }, 'nonBreakingSpaceFollowededBySpaceBetweenWords': { description: 'A non-breaking space followed by a normal space', input: '  word1  word2  word3
', - expectedHTML: '  word1  word2  word3

', - expectedText: ' word1 word2 word3\n\n', + wantHTML: '  word1  word2  word3

', + wantText: ' word1 word2 word3\n\n', }, 'spacesAfterNewline': { description: 'Collapse spaces that follow a newline', input: 'something
something
', - expectedHTML: 'something
something

', - expectedText: 'something\nsomething\n\n', + wantHTML: 'something
something

', + wantText: 'something\nsomething\n\n', }, 'spacesAfterNewlineP': { description: 'Collapse spaces that follow a paragraph', input: 'something

something
', - expectedHTML: 'something

something

', - expectedText: 'something\n\nsomething\n\n', + wantHTML: 'something

something

', + wantText: 'something\n\nsomething\n\n', }, 'spacesAtEndOfLine': { description: 'Collapse spaces that preceed/follow a newline', input: 'something
something
', - expectedHTML: 'something
something

', - expectedText: 'something\nsomething\n\n', + wantHTML: 'something
something

', + wantText: 'something\nsomething\n\n', }, 'spacesAtEndOfLineP': { description: 'Collapse spaces that preceed/follow a paragraph', input: 'something

something
', - expectedHTML: 'something

something

', - expectedText: 'something\n\nsomething\n\n', + wantHTML: 'something

something

', + wantText: 'something\n\nsomething\n\n', }, 'nonBreakingSpacesAfterNewlines': { description: 'Don\'t collapse non-breaking spaces that follow a newline', input: 'something
   something
', - expectedHTML: 'something
   something

', - expectedText: 'something\n something\n\n', + wantHTML: 'something
   something

', + wantText: 'something\n something\n\n', }, 'nonBreakingSpacesAfterNewlinesP': { description: 'Don\'t collapse non-breaking spaces that follow a paragraph', input: 'something

   something
', - expectedHTML: 'something

   something

', - expectedText: 'something\n\n something\n\n', + wantHTML: 'something

   something

', + wantText: 'something\n\n something\n\n', }, 'collapseSpacesInsideElements': { description: 'Preserve only one space when multiple are present', input: 'Need more space s !
', - expectedHTML: 'Need more space s !

', - expectedText: 'Need more space s !\n\n', + wantHTML: 'Need more space s !

', + wantText: 'Need more space s !\n\n', }, 'collapseSpacesAcrossNewlines': { description: 'Newlines and multiple spaces across newlines should be collapsed', @@ -155,14 +155,14 @@ const testImports = { space s !
`, - expectedHTML: 'Need more space s !

', - expectedText: 'Need more space s !\n\n', + wantHTML: 'Need more space s !

', + wantText: 'Need more space s !\n\n', }, 'multipleNewLinesAtBeginning': { description: 'Multiple new lines and paragraphs at the beginning should be preserved', input: '

first line

second line
', - expectedHTML: '



first line

second line

', - expectedText: '\n\n\n\nfirst line\n\nsecond line\n\n', + wantHTML: '



first line

second line

', + wantText: '\n\n\n\nfirst line\n\nsecond line\n\n', }, 'multiLineParagraph': { description: 'A paragraph with multiple lines should not loose spaces when lines are combined', @@ -172,8 +172,8 @@ const testImports = { п р с т у ф х ц ч ш щ ю я ь

`, - expectedHTML: 'а б в г ґ д е є ж з и і ї й к л м н о п р с т у ф х ц ч ш щ ю я ь

', - expectedText: 'а б в г ґ д е є ж з и і ї й к л м н о п р с т у ф х ц ч ш щ ю я ь\n\n', + wantHTML: 'а б в г ґ д е є ж з и і ї й к л м н о п р с т у ф х ц ч ш щ ю я ь

', + wantText: 'а б в г ґ д е є ж з и і ї й к л м н о п р с т у ф х ц ч ш щ ю я ь\n\n', }, 'multiLineParagraphWithPre': { // XXX why is there   before "in"? @@ -187,8 +187,8 @@ const testImports = {

п р с т у ф х ц ч ш щ ю я ь

`, - expectedHTML: 'а б в г ґ д е є ж з и і ї й к л м н о
multiple
   lines
 in
      pre

п р с т у ф х ц ч ш щ ю я ь

', - expectedText: 'а б в г ґ д е є ж з и і ї й к л м н о\nmultiple\n lines\n in\n pre\n\nп р с т у ф х ц ч ш щ ю я ь\n\n', + wantHTML: 'а б в г ґ д е є ж з и і ї й к л м н о
multiple
   lines
 in
      pre

п р с т у ф х ц ч ш щ ю я ь

', + wantText: 'а б в г ґ д е є ж з и і ї й к л м н о\nmultiple\n lines\n in\n pre\n\nп р с т у ф х ц ч ш щ ю я ь\n\n', }, 'preIntroducesASpace': { description: 'pre should be on a new line not preceeded by a space', @@ -196,32 +196,32 @@ const testImports = { 1
preline
 

`, - expectedHTML: '1
preline


', - expectedText: '1\npreline\n\n\n', + wantHTML: '1
preline


', + wantText: '1\npreline\n\n\n', }, 'dontDeleteSpaceInsideElements': { description: 'Preserve spaces inside elements', input: 'Need more space s !
', - expectedHTML: 'Need more space s !

', - expectedText: 'Need more space s !\n\n', + wantHTML: 'Need more space s !

', + wantText: 'Need more space s !\n\n', }, 'dontDeleteSpaceOutsideElements': { description: 'Preserve spaces outside elements', input: 'Need more space s !
', - expectedHTML: 'Need more space s !

', - expectedText: 'Need more space s !\n\n', + wantHTML: 'Need more space s !

', + wantText: 'Need more space s !\n\n', }, 'dontDeleteSpaceAtEndOfElement': { description: 'Preserve spaces at the end of an element', input: 'Need more space s !
', - expectedHTML: 'Need more space s !

', - expectedText: 'Need more space s !\n\n', + wantHTML: 'Need more space s !

', + wantText: 'Need more space s !\n\n', }, 'dontDeleteSpaceAtBeginOfElements': { description: 'Preserve spaces at the start of an element', input: 'Need more space s !
', - expectedHTML: 'Need more space s !

', - expectedText: 'Need more space s !\n\n', + wantHTML: 'Need more space s !

', + wantText: 'Need more space s !\n\n', }, }; @@ -256,17 +256,17 @@ describe(__filename, function () { it('getHTML', function (done) { api.get(`${endPoint('getHTML')}&padID=${testPadId}`) .expect((res) => { - const receivedHtml = res.body.data.html; - if (receivedHtml !== test.expectedHTML) { + const gotHtml = res.body.data.html; + if (gotHtml !== test.wantHTML) { throw new Error(`HTML received from export is not the one we were expecting. Test Name: ${testName} - Received: - ${JSON.stringify(receivedHtml)} + Got: + ${JSON.stringify(gotHtml)} - Expected: - ${JSON.stringify(test.expectedHTML)} + Want: + ${JSON.stringify(test.wantHTML)} Which is a different version of the originally imported one: ${test.input}`); @@ -279,17 +279,17 @@ describe(__filename, function () { it('getText', function (done) { api.get(`${endPoint('getText')}&padID=${testPadId}`) .expect((res) => { - const receivedText = res.body.data.text; - if (receivedText !== test.expectedText) { + const gotText = res.body.data.text; + if (gotText !== test.wantText) { throw new Error(`Text received from export is not the one we were expecting. Test Name: ${testName} - Received: - ${JSON.stringify(receivedText)} + Got: + ${JSON.stringify(gotText)} - Expected: - ${JSON.stringify(test.expectedText)} + Want: + ${JSON.stringify(test.wantText)} Which is a different version of the originally imported one: ${test.input}`); diff --git a/tests/backend/specs/contentcollector.js b/tests/backend/specs/contentcollector.js index 7014cac2a..c9ebb7482 100644 --- a/tests/backend/specs/contentcollector.js +++ b/tests/backend/specs/contentcollector.js @@ -18,17 +18,17 @@ const tests = { nestedLi: { description: 'Complex nested Li', html: '
  1. one
    1. 1.1
  2. two
', - expectedLineAttribs: [ + wantLineAttribs: [ '*0*1*2*3+1+3', '*0*4*2*5+1+3', '*0*1*2*5+1+3', ], - expectedText: [ + wantText: [ '*one', '*1.1', '*two', ], }, complexNest: { description: 'Complex list of different types', html: '
  1. item
    1. item1
    2. item2
', - expectedLineAttribs: [ + wantLineAttribs: [ '*0*1*2+1+3', '*0*1*2+1+3', '*0*1*2+1+1', @@ -40,7 +40,7 @@ const tests = { '*0*6*2*7+1+5', '*0*6*2*7+1+5', ], - expectedText: [ + wantText: [ '*one', '*two', '*0', @@ -56,142 +56,142 @@ const tests = { ul: { description: 'Tests if uls properly get attributes', html: '
div

foo

', - expectedLineAttribs: ['*0*1*2+1+1', '*0*1*2+1+1', '+3', '+3'], - expectedText: ['*a', '*b', 'div', 'foo'], + wantLineAttribs: ['*0*1*2+1+1', '*0*1*2+1+1', '+3', '+3'], + wantText: ['*a', '*b', 'div', 'foo'], }, ulIndented: { description: 'Tests if indented uls properly get attributes', html: '

foo

', - expectedLineAttribs: ['*0*1*2+1+1', '*0*3*2+1+1', '*0*1*2+1+1', '+3'], - expectedText: ['*a', '*b', '*a', 'foo'], + wantLineAttribs: ['*0*1*2+1+1', '*0*3*2+1+1', '*0*1*2+1+1', '+3'], + wantText: ['*a', '*b', '*a', 'foo'], }, ol: { description: 'Tests if ols properly get line numbers when in a normal OL', html: '
  1. a
  2. b
  3. c

test

', - expectedLineAttribs: ['*0*1*2*3+1+1', '*0*1*2*3+1+1', '*0*1*2*3+1+1', '+4'], - expectedText: ['*a', '*b', '*c', 'test'], + wantLineAttribs: ['*0*1*2*3+1+1', '*0*1*2*3+1+1', '*0*1*2*3+1+1', '+4'], + wantText: ['*a', '*b', '*c', 'test'], noteToSelf: 'Ensure empty P does not induce line attribute marker, wont this break the editor?', }, lineDoBreakInOl: { description: 'A single completely empty line break within an ol should reset count if OL is closed off..', html: '
  1. should be 1

hello

  1. should be 1
  2. should be 2

', - expectedLineAttribs: ['*0*1*2*3+1+b', '+5', '*0*1*2*4+1+b', '*0*1*2*4+1+b', ''], - expectedText: ['*should be 1', 'hello', '*should be 1', '*should be 2', ''], + wantLineAttribs: ['*0*1*2*3+1+b', '+5', '*0*1*2*4+1+b', '*0*1*2*4+1+b', ''], + wantText: ['*should be 1', 'hello', '*should be 1', '*should be 2', ''], noteToSelf: "Shouldn't include attribute marker in the

line", }, testP: { description: 'A single

should create a new line', html: '

', - expectedLineAttribs: ['', ''], - expectedText: ['', ''], + wantLineAttribs: ['', ''], + wantText: ['', ''], noteToSelf: '

should create a line break but not break numbering', }, nestedOl: { description: 'Tests if ols properly get line numbers when in a normal OL', html: 'a
  1. b
    1. c
notlist

foo

', - expectedLineAttribs: ['+1', '*0*1*2*3+1+1', '*0*4*2*5+1+1', '+7', '+3'], - expectedText: ['a', '*b', '*c', 'notlist', 'foo'], + wantLineAttribs: ['+1', '*0*1*2*3+1+1', '*0*4*2*5+1+1', '+7', '+3'], + wantText: ['a', '*b', '*c', 'notlist', 'foo'], noteToSelf: 'Ensure empty P does not induce line attribute marker, wont this break the editor?', }, nestedOl2: { description: 'First item being an UL then subsequent being OL will fail', html: '', - expectedLineAttribs: ['+1', '*0*1*2*3+1+1', '*0*4*2*5+1+1'], - expectedText: ['a', '*b', '*c'], + wantLineAttribs: ['+1', '*0*1*2*3+1+1', '*0*4*2*5+1+1'], + wantText: ['a', '*b', '*c'], noteToSelf: 'Ensure empty P does not induce line attribute marker, wont this break the editor?', disabled: true, }, lineDontBreakOL: { description: 'A single completely empty line break within an ol should NOT reset count', html: '
  1. should be 1
  2. should be 2
  3. should be 3

', - expectedLineAttribs: [], - expectedText: ['*should be 1', '*should be 2', '*should be 3'], + wantLineAttribs: [], + wantText: ['*should be 1', '*should be 2', '*should be 3'], noteToSelf: "

should create a line break but not break numbering -- This is what I can't get working!", disabled: true, }, ignoreAnyTagsOutsideBody: { description: 'Content outside body should be ignored', html: 'titleempty
', - expectedLineAttribs: ['+5'], - expectedText: ['empty'], + wantLineAttribs: ['+5'], + wantText: ['empty'], }, lineWithMultipleSpaces: { description: 'Multiple spaces should be preserved', html: 'Text with more than one space.
', - expectedLineAttribs: ['+10'], - expectedText: ['Text with more than one space.'], + wantLineAttribs: ['+10'], + wantText: ['Text with more than one space.'], }, lineWithMultipleNonBreakingAndNormalSpaces: { description: 'non-breaking and normal space should be preserved', html: 'Text with  more   than  one space.
', - expectedLineAttribs: ['+10'], - expectedText: ['Text with more than one space.'], + wantLineAttribs: ['+10'], + wantText: ['Text with more than one space.'], }, multiplenbsp: { description: 'Multiple nbsp should be preserved', html: '  
', - expectedLineAttribs: ['+2'], - expectedText: [' '], + wantLineAttribs: ['+2'], + wantText: [' '], }, multipleNonBreakingSpaceBetweenWords: { description: 'Multiple nbsp between words ', html: '  word1  word2   word3
', - expectedLineAttribs: ['+m'], - expectedText: [' word1 word2 word3'], + wantLineAttribs: ['+m'], + wantText: [' word1 word2 word3'], }, nonBreakingSpacePreceededBySpaceBetweenWords: { description: 'A non-breaking space preceeded by a normal space', html: '  word1  word2  word3
', - expectedLineAttribs: ['+l'], - expectedText: [' word1 word2 word3'], + wantLineAttribs: ['+l'], + wantText: [' word1 word2 word3'], }, nonBreakingSpaceFollowededBySpaceBetweenWords: { description: 'A non-breaking space followed by a normal space', html: '  word1  word2  word3
', - expectedLineAttribs: ['+l'], - expectedText: [' word1 word2 word3'], + wantLineAttribs: ['+l'], + wantText: [' word1 word2 word3'], }, spacesAfterNewline: { description: 'Don\'t collapse spaces that follow a newline', html: 'something
something
', - expectedLineAttribs: ['+9', '+m'], - expectedText: ['something', ' something'], + wantLineAttribs: ['+9', '+m'], + wantText: ['something', ' something'], }, spacesAfterNewlineP: { description: 'Don\'t collapse spaces that follow a empty paragraph', html: 'something

something
', - expectedLineAttribs: ['+9', '', '+m'], - expectedText: ['something', '', ' something'], + wantLineAttribs: ['+9', '', '+m'], + wantText: ['something', '', ' something'], }, spacesAtEndOfLine: { description: 'Don\'t collapse spaces that preceed/follow a newline', html: 'something
something
', - expectedLineAttribs: ['+l', '+m'], - expectedText: ['something ', ' something'], + wantLineAttribs: ['+l', '+m'], + wantText: ['something ', ' something'], }, spacesAtEndOfLineP: { description: 'Don\'t collapse spaces that preceed/follow a empty paragraph', html: 'something

something
', - expectedLineAttribs: ['+l', '', '+m'], - expectedText: ['something ', '', ' something'], + wantLineAttribs: ['+l', '', '+m'], + wantText: ['something ', '', ' something'], }, nonBreakingSpacesAfterNewlines: { description: 'Don\'t collapse non-breaking spaces that follow a newline', html: 'something
   something
', - expectedLineAttribs: ['+9', '+c'], - expectedText: ['something', ' something'], + wantLineAttribs: ['+9', '+c'], + wantText: ['something', ' something'], }, nonBreakingSpacesAfterNewlinesP: { description: 'Don\'t collapse non-breaking spaces that follow a paragraph', html: 'something

   something
', - expectedLineAttribs: ['+9', '', '+c'], - expectedText: ['something', '', ' something'], + wantLineAttribs: ['+9', '', '+c'], + wantText: ['something', '', ' something'], }, preserveSpacesInsideElements: { description: 'Preserve all spaces when multiple are present', html: 'Need more space s !
', - expectedLineAttribs: ['+h*0+4+2'], - expectedText: ['Need more space s !'], + wantLineAttribs: ['+h*0+4+2'], + wantText: ['Need more space s !'], }, preserveSpacesAcrossNewlines: { description: 'Newlines and multiple spaces across newlines should be preserved', @@ -201,14 +201,14 @@ const tests = { space s !
`, - expectedLineAttribs: ['+19*0+4+b'], - expectedText: ['Need more space s !'], + wantLineAttribs: ['+19*0+4+b'], + wantText: ['Need more space s !'], }, multipleNewLinesAtBeginning: { description: 'Multiple new lines at the beginning should be preserved', html: '

first line

second line
', - expectedLineAttribs: ['', '', '', '', '+a', '', '+b'], - expectedText: ['', '', '', '', 'first line', '', 'second line'], + wantLineAttribs: ['', '', '', '', '+a', '', '+b'], + wantText: ['', '', '', '', 'first line', '', 'second line'], }, multiLineParagraph: { description: 'A paragraph with multiple lines should not loose spaces when lines are combined', @@ -216,8 +216,8 @@ const tests = { а б в г ґ д е є ж з и і ї й к л м н о п р с т у ф х ц ч ш щ ю я ь

`, - expectedLineAttribs: ['+1t'], - expectedText: ['а б в г ґ д е є ж з и і ї й к л м н о п р с т у ф х ц ч ш щ ю я ь'], + wantLineAttribs: ['+1t'], + wantText: ['а б в г ґ д е є ж з и і ї й к л м н о п р с т у ф х ц ч ш щ ю я ь'], }, multiLineParagraphWithPre: { description: 'lines in preformatted text should be kept intact', @@ -229,8 +229,8 @@ pre

п р с т у ф х ц ч ш щ ю я ь

`, - expectedLineAttribs: ['+11', '+8', '+5', '+2', '+3', '+r'], - expectedText: [ + wantLineAttribs: ['+11', '+8', '+5', '+2', '+3', '+r'], + wantText: [ 'а б в г ґ д е є ж з и і ї й к л м н о', 'multiple', 'lines', @@ -245,32 +245,32 @@ pre 1

preline
 
`, - expectedLineAttribs: ['+6', '+7'], - expectedText: [' 1 ', 'preline'], + wantLineAttribs: ['+6', '+7'], + wantText: [' 1 ', 'preline'], }, dontDeleteSpaceInsideElements: { description: 'Preserve spaces on the beginning and end of a element', html: 'Need more space s !
', - expectedLineAttribs: ['+f*0+3+1'], - expectedText: ['Need more space s !'], + wantLineAttribs: ['+f*0+3+1'], + wantText: ['Need more space s !'], }, dontDeleteSpaceOutsideElements: { description: 'Preserve spaces outside elements', html: 'Need more space s !
', - expectedLineAttribs: ['+g*0+1+2'], - expectedText: ['Need more space s !'], + wantLineAttribs: ['+g*0+1+2'], + wantText: ['Need more space s !'], }, dontDeleteSpaceAtEndOfElement: { description: 'Preserve spaces at the end of an element', html: 'Need more space s !
', - expectedLineAttribs: ['+g*0+2+1'], - expectedText: ['Need more space s !'], + wantLineAttribs: ['+g*0+2+1'], + wantText: ['Need more space s !'], }, dontDeleteSpaceAtBeginOfElements: { description: 'Preserve spaces at the start of an element', html: 'Need more space s !
', - expectedLineAttribs: ['+f*0+2+2'], - expectedText: ['Need more space s !'], + wantLineAttribs: ['+f*0+2+2'], + wantText: ['Need more space s !'], }, }; @@ -294,13 +294,13 @@ describe(__filename, function () { const cc = contentcollector.makeContentCollector(true, null, apool); cc.collectContent(doc); const result = cc.finish(); - const recievedAttributes = result.lineAttribs; - const expectedAttributes = testObj.expectedLineAttribs; - const recievedText = new Array(result.lines); - const expectedText = testObj.expectedText; + const gotAttributes = result.lineAttribs; + const wantAttributes = testObj.wantLineAttribs; + const gotText = new Array(result.lines); + const wantText = testObj.wantText; - assert.deepEqual(recievedText[0], expectedText); - assert.deepEqual(recievedAttributes, expectedAttributes); + assert.deepEqual(gotText[0], wantText); + assert.deepEqual(gotAttributes, wantAttributes); }); }); } From 56f617060a18d5830e29e582879030e87c2274ce Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sun, 24 Jan 2021 01:57:11 -0500 Subject: [PATCH 08/26] tests: Fix missing call to `done` callback --- tests/backend/specs/contentcollector.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/backend/specs/contentcollector.js b/tests/backend/specs/contentcollector.js index c9ebb7482..e6aff389a 100644 --- a/tests/backend/specs/contentcollector.js +++ b/tests/backend/specs/contentcollector.js @@ -284,7 +284,7 @@ describe(__filename, function () { }); } - it(testObj.description, function (done) { + it(testObj.description, async function () { const $ = cheerio.load(testObj.html); // Load HTML into Cheerio const doc = $('body')[0]; // Creates a dom-like representation of HTML // Create an empty attribute pool From 42c25b25363a97ec3ab7a1e5d1eafe345a87a14d Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sun, 24 Jan 2021 00:52:24 -0500 Subject: [PATCH 09/26] openapi: Fix error logging --- src/node/hooks/express/openapi.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/node/hooks/express/openapi.js b/src/node/hooks/express/openapi.js index d7059896a..444077fda 100644 --- a/src/node/hooks/express/openapi.js +++ b/src/node/hooks/express/openapi.js @@ -623,7 +623,7 @@ exports.expressCreateServer = (hookName, args, cb) => { } else { // an unknown error happened // log it and throw internal error - apiLogger.error(err); + apiLogger.error(err.stack || err.toString()); throw new createHTTPError.InternalError('internal error'); } }); From 74bb2f76ccd1474543390bd159bb52cb6119d3af Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Thu, 21 Jan 2021 02:30:29 -0500 Subject: [PATCH 10/26] contentcollector: Delete unused `domInterface` parameter --- src/static/js/ace2_inner.js | 2 +- src/static/js/contentcollector.js | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/static/js/ace2_inner.js b/src/static/js/ace2_inner.js index 95db2c14f..60b029f3b 100644 --- a/src/static/js/ace2_inner.js +++ b/src/static/js/ace2_inner.js @@ -1139,7 +1139,7 @@ function Ace2Inner() { lastDirtyNode = (lastDirtyNode && isNodeDirty(lastDirtyNode) && lastDirtyNode); if (firstDirtyNode && lastDirtyNode) { - const cc = makeContentCollector(isStyled, browser, rep.apool, null, className2Author); + const cc = makeContentCollector(isStyled, browser, rep.apool, className2Author); cc.notifySelection(selection); const dirtyNodes = []; for (let n = firstDirtyNode; n && diff --git a/src/static/js/contentcollector.js b/src/static/js/contentcollector.js index ab5781725..a77c4e5f0 100644 --- a/src/static/js/contentcollector.js +++ b/src/static/js/contentcollector.js @@ -32,8 +32,8 @@ const hooks = require('./pluginfw/hooks'); const sanitizeUnicode = (s) => UNorm.nfc(s); -const makeContentCollector = (collectStyles, abrowser, apool, domInterface, className2Author) => { - const dom = domInterface || { +const makeContentCollector = (collectStyles, abrowser, apool, className2Author) => { + const dom = { isNodeText: (n) => n.nodeType === 3, nodeTagName: (n) => n.tagName, nodeValue: (n) => n.nodeValue, From dd7fb1babee440e8395aa0862e96e57446199001 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sun, 24 Jan 2021 18:03:13 -0500 Subject: [PATCH 11/26] contentcollector: Document the `dom` object --- src/static/js/contentcollector.js | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/static/js/contentcollector.js b/src/static/js/contentcollector.js index a77c4e5f0..a717df3a3 100644 --- a/src/static/js/contentcollector.js +++ b/src/static/js/contentcollector.js @@ -33,26 +33,47 @@ const hooks = require('./pluginfw/hooks'); const sanitizeUnicode = (s) => UNorm.nfc(s); const makeContentCollector = (collectStyles, abrowser, apool, className2Author) => { + // This file is used both in browsers and with cheerio in Node.js (for importing HTML). Cheerio's + // Node-like objects are not 100% API compatible with the DOM Node specification; this `dom` + // object abstracts away the differences. const dom = { - isNodeText: (n) => n.nodeType === 3, + // .nodeType works with DOM and cheerio 0.22.0. Note: Cheerio 0.22.0 does not provide the + // Node.*_NODE constants, so they cannot be used here. + isNodeText: (n) => n.nodeType === 3, // Node.TEXT_NODE + // .tagName works with DOM and cheerio 0.22.0, but: + // * With DOM, .tagName is an uppercase string. + // * With cheerio 0.22.0, .tagName is a lowercase string. nodeTagName: (n) => n.tagName, + // .nodeValue works with DOM and cheerio 0.22.0. nodeValue: (n) => n.nodeValue, + // Returns the number of Node children (n.childNodes.length), not the number of Element children + // (n.children.length in DOM). nodeNumChildren: (n) => { + // .childNodes.length works with DOM and cheerio 0.22.0, except in cheerio the .childNodes + // property does not exist on text nodes (and maybe other non-element nodes). if (n.childNodes == null) return 0; return n.childNodes.length; }, + // Returns the i'th Node child (n.childNodes[i]), not the i'th Element child (n.children[i] in + // DOM). nodeChild: (n, i) => { if (n.childNodes.item == null) { + // .childNodes[] works with DOM and cheerio 0.22.0. return n.childNodes[i]; } + // .childNodes.item() works with DOM but not with cheerio 0.22.0. return n.childNodes.item(i); }, nodeProp: (n, p) => n[p], nodeAttr: (n, a) => { + // .getAttribute() works with DOM but not with cheerio 0.22.0. if (n.getAttribute != null) return n.getAttribute(a); + // .attribs[] works with cheerio 0.22.0 but not with DOM. if (n.attribs != null) return n.attribs[a]; return null; }, + // .innerHTML works with DOM but not with cheerio 0.22.0. Cheerio's Element-like objects have no + // equivalent. (Cheerio objects have an .html() method, but that isn't accessible here.) optNodeInnerHTML: (n) => n.innerHTML, }; From 99625950c8ceb78e652d2ac52e5dc4e521ae6a39 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Thu, 21 Jan 2021 02:27:15 -0500 Subject: [PATCH 12/26] contentcollector: Factor out call to `.toLowerCase()` --- src/static/js/contentcollector.js | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/static/js/contentcollector.js b/src/static/js/contentcollector.js index a717df3a3..9f60b9033 100644 --- a/src/static/js/contentcollector.js +++ b/src/static/js/contentcollector.js @@ -43,7 +43,8 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) // .tagName works with DOM and cheerio 0.22.0, but: // * With DOM, .tagName is an uppercase string. // * With cheerio 0.22.0, .tagName is a lowercase string. - nodeTagName: (n) => n.tagName, + // For consistency, this function always returns a lowercase string. + nodeTagName: (n) => n.tagName && n.tagName.toLowerCase(), // .nodeValue works with DOM and cheerio 0.22.0. nodeValue: (n) => n.nodeValue, // Returns the number of Node children (n.childNodes.length), not the number of Element children @@ -88,7 +89,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) _blockElems[element] = 1; }); - const isBlockElement = (n) => !!_blockElems[(dom.nodeTagName(n) || '').toLowerCase()]; + const isBlockElement = (n) => !!_blockElems[dom.nodeTagName(n) || '']; const textify = (str) => sanitizeUnicode( str.replace(/(\n | \n)/g, ' ') @@ -406,7 +407,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) } } } else { - const tname = (dom.nodeTagName(node) || '').toLowerCase(); + const tname = dom.nodeTagName(node) || ''; if (tname === 'img') { hooks.callAll('collectContentImage', { From 8763c3bb2965874d2497f22d6bb347bc92aad22b Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Thu, 21 Jan 2021 00:30:55 -0500 Subject: [PATCH 13/26] contentcollector: Fix Element attribute accesses The `attribs` property is only available on cheerio's Element-like objects; DOM Element objects do not have an `attribs` property. Switch to `dom.nodeAttr()` to fix the logic for browsers. --- src/static/js/contentcollector.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/static/js/contentcollector.js b/src/static/js/contentcollector.js index 9f60b9033..ecd84c6bb 100644 --- a/src/static/js/contentcollector.js +++ b/src/static/js/contentcollector.js @@ -491,14 +491,14 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) cc.doAttrib(state, 'strikethrough'); } if (tname === 'ul' || tname === 'ol') { - let type = node.attribs ? node.attribs.class : null; + let type = dom.nodeAttr(node, 'class'); const rr = cls && /(?:^| )list-([a-z]+[0-9]+)\b/.exec(cls); // lists do not need to have a type, so before we make a wrong guess // check if we find a better hint within the node's children if (!rr && !type) { for (const i in node.children) { if (node.children[i] && node.children[i].name === 'ul') { - type = node.children[i].attribs.class; + type = dom.nodeAttr(node.children[i], 'class'); if (type) { break; } @@ -509,8 +509,8 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) type = rr[1]; } else { if (tname === 'ul') { - if ((type && type.match('indent')) || - (node.attribs && node.attribs.class && node.attribs.class.match('indent'))) { + const cls = dom.nodeAttr(node, 'class'); + if ((type && type.match('indent')) || (cls && cls.match('indent'))) { type = 'indent'; } else { type = 'bullet'; From 3cfec58948f652f3306ec0cb68d3fbe57ae1805b Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sun, 24 Jan 2021 18:20:19 -0500 Subject: [PATCH 14/26] contentcollector: Rename `dom` functions for consistency with DOM spec --- src/static/js/contentcollector.js | 46 +++++++++++++++---------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/src/static/js/contentcollector.js b/src/static/js/contentcollector.js index ecd84c6bb..884f4b693 100644 --- a/src/static/js/contentcollector.js +++ b/src/static/js/contentcollector.js @@ -39,17 +39,17 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) const dom = { // .nodeType works with DOM and cheerio 0.22.0. Note: Cheerio 0.22.0 does not provide the // Node.*_NODE constants, so they cannot be used here. - isNodeText: (n) => n.nodeType === 3, // Node.TEXT_NODE + isTextNode: (n) => n.nodeType === 3, // Node.TEXT_NODE // .tagName works with DOM and cheerio 0.22.0, but: // * With DOM, .tagName is an uppercase string. // * With cheerio 0.22.0, .tagName is a lowercase string. // For consistency, this function always returns a lowercase string. - nodeTagName: (n) => n.tagName && n.tagName.toLowerCase(), + tagName: (n) => n.tagName && n.tagName.toLowerCase(), // .nodeValue works with DOM and cheerio 0.22.0. nodeValue: (n) => n.nodeValue, // Returns the number of Node children (n.childNodes.length), not the number of Element children // (n.children.length in DOM). - nodeNumChildren: (n) => { + numChildNodes: (n) => { // .childNodes.length works with DOM and cheerio 0.22.0, except in cheerio the .childNodes // property does not exist on text nodes (and maybe other non-element nodes). if (n.childNodes == null) return 0; @@ -57,7 +57,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) }, // Returns the i'th Node child (n.childNodes[i]), not the i'th Element child (n.children[i] in // DOM). - nodeChild: (n, i) => { + childNode: (n, i) => { if (n.childNodes.item == null) { // .childNodes[] works with DOM and cheerio 0.22.0. return n.childNodes[i]; @@ -66,7 +66,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) return n.childNodes.item(i); }, nodeProp: (n, p) => n[p], - nodeAttr: (n, a) => { + getAttribute: (n, a) => { // .getAttribute() works with DOM but not with cheerio 0.22.0. if (n.getAttribute != null) return n.getAttribute(a); // .attribs[] works with cheerio 0.22.0 but not with DOM. @@ -75,7 +75,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) }, // .innerHTML works with DOM but not with cheerio 0.22.0. Cheerio's Element-like objects have no // equivalent. (Cheerio objects have an .html() method, but that isn't accessible here.) - optNodeInnerHTML: (n) => n.innerHTML, + innerHTML: (n) => n.innerHTML, }; const _blockElems = { @@ -89,7 +89,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) _blockElems[element] = 1; }); - const isBlockElement = (n) => !!_blockElems[dom.nodeTagName(n) || '']; + const isBlockElement = (n) => !!_blockElems[dom.tagName(n) || '']; const textify = (str) => sanitizeUnicode( str.replace(/(\n | \n)/g, ' ') @@ -145,13 +145,13 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) let selEnd = [-1, -1]; const _isEmpty = (node, state) => { // consider clean blank lines pasted in IE to be empty - if (dom.nodeNumChildren(node) === 0) return true; - if (dom.nodeNumChildren(node) === 1 && + if (dom.numChildNodes(node) === 0) return true; + if (dom.numChildNodes(node) === 1 && getAssoc(node, 'shouldBeEmpty') && - dom.optNodeInnerHTML(node) === ' ' && + dom.innerHTML(node) === ' ' && !getAssoc(node, 'unpasted')) { if (state) { - const child = dom.nodeChild(node, 0); + const child = dom.childNode(node, 0); _reachPoint(child, 0, state); _reachPoint(child, 1, state); } @@ -171,7 +171,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) }; const _reachBlockPoint = (nd, idx, state) => { - if (!dom.isNodeText(nd)) _reachPoint(nd, idx, state); + if (!dom.isTextNode(nd)) _reachPoint(nd, idx, state); }; const _reachPoint = (nd, idx, state) => { @@ -338,9 +338,9 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) const startLine = lines.length() - 1; _reachBlockPoint(node, 0, state); - if (dom.isNodeText(node)) { + if (dom.isTextNode(node)) { let txt = dom.nodeValue(node); - const tname = dom.nodeAttr(node.parentNode, 'name'); + const tname = dom.getAttribute(node.parentNode, 'name'); const txtFromHook = hooks.callAll('collectContentLineText', { cc: this, @@ -407,7 +407,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) } } } else { - const tname = dom.nodeTagName(node) || ''; + const tname = dom.tagName(node) || ''; if (tname === 'img') { hooks.callAll('collectContentImage', { @@ -425,7 +425,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) if (tname === 'br') { this.breakLine = true; - const tvalue = dom.nodeAttr(node, 'value'); + const tvalue = dom.getAttribute(node, 'value'); const induceLineBreak = hooks.callAll('collectContentLineBreak', { cc: this, state, @@ -443,8 +443,8 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) } else if (tname === 'script' || tname === 'style') { // ignore } else if (!isEmpty) { - let styl = dom.nodeAttr(node, 'style'); - let cls = dom.nodeAttr(node, 'class'); + let styl = dom.getAttribute(node, 'style'); + let cls = dom.getAttribute(node, 'class'); let isPre = (tname === 'pre'); if ((!isPre) && abrowser && abrowser.safari) { isPre = (styl && /\bwhite-space:\s*pre\b/i.exec(styl)); @@ -491,14 +491,14 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) cc.doAttrib(state, 'strikethrough'); } if (tname === 'ul' || tname === 'ol') { - let type = dom.nodeAttr(node, 'class'); + let type = dom.getAttribute(node, 'class'); const rr = cls && /(?:^| )list-([a-z]+[0-9]+)\b/.exec(cls); // lists do not need to have a type, so before we make a wrong guess // check if we find a better hint within the node's children if (!rr && !type) { for (const i in node.children) { if (node.children[i] && node.children[i].name === 'ul') { - type = dom.nodeAttr(node.children[i], 'class'); + type = dom.getAttribute(node.children[i], 'class'); if (type) { break; } @@ -509,7 +509,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) type = rr[1]; } else { if (tname === 'ul') { - const cls = dom.nodeAttr(node, 'class'); + const cls = dom.getAttribute(node, 'class'); if ((type && type.match('indent')) || (cls && cls.match('indent'))) { type = 'indent'; } else { @@ -581,9 +581,9 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) } } - const nc = dom.nodeNumChildren(node); + const nc = dom.numChildNodes(node); for (let i = 0; i < nc; i++) { - const c = dom.nodeChild(node, i); + const c = dom.childNode(node, i); cc.collectContent(c, state); } From d0bfb54c0ad695854e09e6a8f0bce3814f72dc99 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Wed, 20 Jan 2021 23:50:33 -0500 Subject: [PATCH 15/26] contentcollector: Avoid `for..in` iteration of object properties `for..in` iterates over inherited properties, which is almost never desired. In most cases there aren't any inherited enumerable properties so it's not that big of a deal, but in the case of HTMLCollection it's very bad because it iterates over every entry twice (once by numerical index and once by name) plus it includes the `length` property in the iteration. --- src/static/js/contentcollector.js | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/static/js/contentcollector.js b/src/static/js/contentcollector.js index 884f4b693..86043baf8 100644 --- a/src/static/js/contentcollector.js +++ b/src/static/js/contentcollector.js @@ -250,8 +250,8 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) const _recalcAttribString = (state) => { const lst = []; - for (const a in state.attribs) { - if (state.attribs[a]) { + for (const [a, count] of Object.entries(state.attribs)) { + if (count) { // The following splitting of the attribute name is a workaround // to enable the content collector to store key-value attributes // see https://github.com/ether/etherpad-lite/issues/2567 for more information @@ -496,9 +496,18 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) // lists do not need to have a type, so before we make a wrong guess // check if we find a better hint within the node's children if (!rr && !type) { - for (const i in node.children) { - if (node.children[i] && node.children[i].name === 'ul') { - type = dom.getAttribute(node.children[i], 'class'); + // If `node` is from the DOM (not cheerio) then it implements the ParentNode interface + // and `node.children` is a HTMLCollection. The DOM + Web IDL specs guarantee that + // HTMLCollection implements the iterable protocol, so for..of iteration should always + // work. See: https://stackoverflow.com/a/41759532. Cheerio behaves the same with + // regard to iteration. + // + // TODO: The set of Nodes included in node.children differs between the DOM and + // cheerio 0.22.0: cheerio includes all child Nodes (including non-Element Nodes) + // whereas the DOM only includes Nodes that are Elements. + for (const child of node.children) { + if (child && child.name === 'ul') { + type = dom.getAttribute(child, 'class'); if (type) { break; } From fc2420c244b682ad514363fbdb7cac3d3ae574bf Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sun, 24 Jan 2021 18:50:50 -0500 Subject: [PATCH 16/26] contentcollector: Fix iteration over child Nodes In the DOM, `.children` only includes children that are Element objects. In cheerio 0.22.0, `.children` includes all child Nodes, not just Elements. Use `dom.numChildNodes()` and `dom.childNode()` so that browsers behave the same as cheerio. --- src/static/js/contentcollector.js | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/static/js/contentcollector.js b/src/static/js/contentcollector.js index 86043baf8..f940f257b 100644 --- a/src/static/js/contentcollector.js +++ b/src/static/js/contentcollector.js @@ -496,16 +496,8 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) // lists do not need to have a type, so before we make a wrong guess // check if we find a better hint within the node's children if (!rr && !type) { - // If `node` is from the DOM (not cheerio) then it implements the ParentNode interface - // and `node.children` is a HTMLCollection. The DOM + Web IDL specs guarantee that - // HTMLCollection implements the iterable protocol, so for..of iteration should always - // work. See: https://stackoverflow.com/a/41759532. Cheerio behaves the same with - // regard to iteration. - // - // TODO: The set of Nodes included in node.children differs between the DOM and - // cheerio 0.22.0: cheerio includes all child Nodes (including non-Element Nodes) - // whereas the DOM only includes Nodes that are Elements. - for (const child of node.children) { + for (let i = 0; i < dom.numChildNodes(node); i++) { + const child = dom.childNode(node, i); if (child && child.name === 'ul') { type = dom.getAttribute(child, 'class'); if (type) { From b811030846b72d5449f58510e32c75e12995c987 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Thu, 21 Jan 2021 02:34:50 -0500 Subject: [PATCH 17/26] contentcollector: Delete unnecessary truthiness check --- src/static/js/contentcollector.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/static/js/contentcollector.js b/src/static/js/contentcollector.js index f940f257b..f4fc015cf 100644 --- a/src/static/js/contentcollector.js +++ b/src/static/js/contentcollector.js @@ -498,7 +498,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) if (!rr && !type) { for (let i = 0; i < dom.numChildNodes(node); i++) { const child = dom.childNode(node, i); - if (child && child.name === 'ul') { + if (child.name === 'ul') { type = dom.getAttribute(child, 'class'); if (type) { break; From b547ce9a47eece44a1c23e22aa90e39a996d0710 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Thu, 21 Jan 2021 02:35:44 -0500 Subject: [PATCH 18/26] contentcollector: Invert logic to improve readability --- src/static/js/contentcollector.js | 42 ++++++++++++++----------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/src/static/js/contentcollector.js b/src/static/js/contentcollector.js index f4fc015cf..c03ab6d64 100644 --- a/src/static/js/contentcollector.js +++ b/src/static/js/contentcollector.js @@ -251,24 +251,23 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) const _recalcAttribString = (state) => { const lst = []; for (const [a, count] of Object.entries(state.attribs)) { - if (count) { - // The following splitting of the attribute name is a workaround - // to enable the content collector to store key-value attributes - // see https://github.com/ether/etherpad-lite/issues/2567 for more information - // in long term the contentcollector should be refactored to get rid of this workaround - const ATTRIBUTE_SPLIT_STRING = '::'; + if (!count) continue; + // The following splitting of the attribute name is a workaround + // to enable the content collector to store key-value attributes + // see https://github.com/ether/etherpad-lite/issues/2567 for more information + // in long term the contentcollector should be refactored to get rid of this workaround + const ATTRIBUTE_SPLIT_STRING = '::'; - // see if attributeString is splittable - const attributeSplits = a.split(ATTRIBUTE_SPLIT_STRING); - if (attributeSplits.length > 1) { - // the attribute name follows the convention key::value - // so save it as a key value attribute - lst.push([attributeSplits[0], attributeSplits[1]]); - } else { - // the "normal" case, the attribute is just a switch - // so set it true - lst.push([a, 'true']); - } + // see if attributeString is splittable + const attributeSplits = a.split(ATTRIBUTE_SPLIT_STRING); + if (attributeSplits.length > 1) { + // the attribute name follows the convention key::value + // so save it as a key value attribute + lst.push([attributeSplits[0], attributeSplits[1]]); + } else { + // the "normal" case, the attribute is just a switch + // so set it true + lst.push([a, 'true']); } } if (state.authorLevel > 0) { @@ -498,12 +497,9 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) if (!rr && !type) { for (let i = 0; i < dom.numChildNodes(node); i++) { const child = dom.childNode(node, i); - if (child.name === 'ul') { - type = dom.getAttribute(child, 'class'); - if (type) { - break; - } - } + if (child.name !== 'ul') continue; + type = dom.getAttribute(child, 'class'); + if (type) break; } } if (rr && rr[1]) { From 4e220538a15b0b6108251b18141dc43a0beccc11 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Thu, 21 Jan 2021 00:06:43 -0500 Subject: [PATCH 19/26] contentcollector: Use destructuring to improve readability --- src/static/js/contentcollector.js | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/static/js/contentcollector.js b/src/static/js/contentcollector.js index c03ab6d64..b20c2583a 100644 --- a/src/static/js/contentcollector.js +++ b/src/static/js/contentcollector.js @@ -425,7 +425,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) if (tname === 'br') { this.breakLine = true; const tvalue = dom.getAttribute(node, 'value'); - const induceLineBreak = hooks.callAll('collectContentLineBreak', { + const [startNewLine = true] = hooks.callAll('collectContentLineBreak', { cc: this, state, tname, @@ -433,9 +433,6 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) styl: null, cls: null, }); - const startNewLine = ( - typeof (induceLineBreak) === 'object' && - induceLineBreak.length === 0) ? true : induceLineBreak[0]; if (startNewLine) { cc.startNewLine(state); } From e3a47e48f9ee2391fb4e60c97eb7137855e580af Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Fri, 22 Jan 2021 02:06:06 -0500 Subject: [PATCH 20/26] contentcollector: Fix collectContentLineText hook Before, the hook always ignored the return values provided by the hook functions. Now the hook functions can change the text by either returning a string or setting `context.text` to the desired value. Also drop the `styl` and `cls` context properties. They were never documented and they were always null. --- doc/api/hooks_client-side.md | 15 ++++++++++++++- src/static/js/contentcollector.js | 24 +++++++----------------- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/doc/api/hooks_client-side.md b/doc/api/hooks_client-side.md index 478f5d0a5..e87536cfe 100755 --- a/doc/api/hooks_client-side.md +++ b/doc/api/hooks_client-side.md @@ -421,7 +421,20 @@ Things in context: 4. text - the text for that line This hook allows you to validate/manipulate the text before it's sent to the -server side. The return value should be the validated/manipulated text. +server side. To change the text, either: + +* Set the `text` context property to the desired value and return `undefined`. +* (Deprecated) Return a string. If a hook function changes the `text` context + property, the return value is ignored. If no hook function changes `text` but + multiple hook functions return a string, the first one wins. + +Example: + +``` +exports.collectContentLineText = (hookName, context) => { + context.text = tweakText(context.text); +}; +``` ## collectContentLineBreak diff --git a/src/static/js/contentcollector.js b/src/static/js/contentcollector.js index b20c2583a..357b50195 100644 --- a/src/static/js/contentcollector.js +++ b/src/static/js/contentcollector.js @@ -338,24 +338,14 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) _reachBlockPoint(node, 0, state); if (dom.isTextNode(node)) { - let txt = dom.nodeValue(node); const tname = dom.getAttribute(node.parentNode, 'name'); - - const txtFromHook = hooks.callAll('collectContentLineText', { - cc: this, - state, - tname, - node, - text: txt, - styl: null, - cls: null, - }); - - if (typeof (txtFromHook) === 'object') { - txt = dom.nodeValue(node); - } else if (txtFromHook) { - txt = txtFromHook; - } + const context = {cc: this, state, tname, node, text: dom.nodeValue(node)}; + // Hook functions may either return a string (deprecated) or modify context.text. If any hook + // function modifies context.text then all returned strings are ignored. If no hook functions + // modify context.text, the first hook function to return a string wins. + const [hookTxt] = + hooks.callAll('collectContentLineText', context).filter((s) => typeof s === 'string'); + let txt = context.text === dom.nodeValue(node) && hookTxt != null ? hookTxt : context.text; let rest = ''; let x = 0; // offset into original text From 1d36549152c383264877b0f9a395ab93a7408b13 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sun, 24 Jan 2021 19:27:00 -0500 Subject: [PATCH 21/26] contentcollector: Delete unnecessary parentheses --- src/static/js/contentcollector.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/static/js/contentcollector.js b/src/static/js/contentcollector.js index 357b50195..c7db55fa0 100644 --- a/src/static/js/contentcollector.js +++ b/src/static/js/contentcollector.js @@ -509,7 +509,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) // This has undesirable behavior in Chrome but is right in other browsers. // See https://github.com/ether/etherpad-lite/issues/2412 for reasoning if (!abrowser.chrome) oldListTypeOrNull = (_enterList(state, undefined) || 'none'); - } else if ((tname === 'li')) { + } else if (tname === 'li') { state.lineAttributes.start = state.start || 0; _recalcAttribString(state); if (state.lineAttributes.list.indexOf('number') !== -1) { From e3ec9d9a4c9afba34a3edb14f73c7a1fa1f4988f Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sun, 24 Jan 2021 19:29:56 -0500 Subject: [PATCH 22/26] contentcollector: Fix parent node access The `parent` property is only available on cheerio's Node-like objects; DOM Node objects do not have a `parent` property. Switch to the `parentNode` property so that the code works in browsers as well as cheerio. --- src/static/js/contentcollector.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/static/js/contentcollector.js b/src/static/js/contentcollector.js index c7db55fa0..0b85b434b 100644 --- a/src/static/js/contentcollector.js +++ b/src/static/js/contentcollector.js @@ -519,7 +519,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) Note how the
    item has to be inside a
  1. Because of this we don't increment the start number */ - if (node.parent && node.parent.name !== 'ol') { + if (node.parentNode && node.parentNode.name !== 'ol') { /* TODO: start number has to increment based on indentLevel(numberX) This means we have to build an object IE @@ -536,7 +536,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) } } // UL list items never modify the start value. - if (node.parent && node.parent.name === 'ul') { + if (node.parentNode && node.parentNode.name === 'ul') { state.start++; // TODO, this is hacky. // Because if the first item is an UL it will increment a list no? From 075969aea08e5cf767db489f8feeadbc0ef0ae50 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Thu, 21 Jan 2021 02:46:22 -0500 Subject: [PATCH 23/26] contentcollector: Fix Element tag name fetch The `name` property is only available on cheerio's Element-like objects; DOM Element objects do not have a `name` property. Switch to `dom.tagName()` to fix the logic for browsers. --- src/static/js/contentcollector.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/static/js/contentcollector.js b/src/static/js/contentcollector.js index 0b85b434b..89dfd8ff8 100644 --- a/src/static/js/contentcollector.js +++ b/src/static/js/contentcollector.js @@ -484,7 +484,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) if (!rr && !type) { for (let i = 0; i < dom.numChildNodes(node); i++) { const child = dom.childNode(node, i); - if (child.name !== 'ul') continue; + if (dom.tagName(child) !== 'ul') continue; type = dom.getAttribute(child, 'class'); if (type) break; } @@ -519,7 +519,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) Note how the
      item has to be inside a
    1. Because of this we don't increment the start number */ - if (node.parentNode && node.parentNode.name !== 'ol') { + if (node.parentNode && dom.tagName(node.parentNode) !== 'ol') { /* TODO: start number has to increment based on indentLevel(numberX) This means we have to build an object IE @@ -536,7 +536,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) } } // UL list items never modify the start value. - if (node.parentNode && node.parentNode.name === 'ul') { + if (node.parentNode && dom.tagName(node.parentNode) === 'ul') { state.start++; // TODO, this is hacky. // Because if the first item is an UL it will increment a list no? From 1cb5453aeb98bfd5521500bfbead23ad92f137c4 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sun, 24 Jan 2021 02:07:33 -0500 Subject: [PATCH 24/26] contentcollector: Skip over non-Text, non-Element Nodes --- src/static/js/contentcollector.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/static/js/contentcollector.js b/src/static/js/contentcollector.js index 89dfd8ff8..28ac8cb6e 100644 --- a/src/static/js/contentcollector.js +++ b/src/static/js/contentcollector.js @@ -39,6 +39,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) const dom = { // .nodeType works with DOM and cheerio 0.22.0. Note: Cheerio 0.22.0 does not provide the // Node.*_NODE constants, so they cannot be used here. + isElementNode: (n) => n.nodeType === 1, // Node.ELEMENT_NODE isTextNode: (n) => n.nodeType === 3, // Node.TEXT_NODE // .tagName works with DOM and cheerio 0.22.0, but: // * With DOM, .tagName is an uppercase string. @@ -395,7 +396,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) cc.startNewLine(state); } } - } else { + } else if (dom.isElementNode(node)) { const tname = dom.tagName(node) || ''; if (tname === 'img') { From 275f041fbb39c0c6518f7f26d73fd760045412d0 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Mon, 25 Jan 2021 01:29:08 -0500 Subject: [PATCH 25/26] contentcollector: Simplify child node access --- src/static/js/contentcollector.js | 34 ++++++++----------------------- 1 file changed, 8 insertions(+), 26 deletions(-) diff --git a/src/static/js/contentcollector.js b/src/static/js/contentcollector.js index 28ac8cb6e..ff9b5d9e4 100644 --- a/src/static/js/contentcollector.js +++ b/src/static/js/contentcollector.js @@ -48,24 +48,9 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) tagName: (n) => n.tagName && n.tagName.toLowerCase(), // .nodeValue works with DOM and cheerio 0.22.0. nodeValue: (n) => n.nodeValue, - // Returns the number of Node children (n.childNodes.length), not the number of Element children - // (n.children.length in DOM). - numChildNodes: (n) => { - // .childNodes.length works with DOM and cheerio 0.22.0, except in cheerio the .childNodes - // property does not exist on text nodes (and maybe other non-element nodes). - if (n.childNodes == null) return 0; - return n.childNodes.length; - }, - // Returns the i'th Node child (n.childNodes[i]), not the i'th Element child (n.children[i] in - // DOM). - childNode: (n, i) => { - if (n.childNodes.item == null) { - // .childNodes[] works with DOM and cheerio 0.22.0. - return n.childNodes[i]; - } - // .childNodes.item() works with DOM but not with cheerio 0.22.0. - return n.childNodes.item(i); - }, + // .childNodes works with DOM and cheerio 0.22.0, except in cheerio the .childNodes property + // does not exist on text nodes (and maybe other non-element nodes). + childNodes: (n) => n.childNodes || [], nodeProp: (n, p) => n[p], getAttribute: (n, a) => { // .getAttribute() works with DOM but not with cheerio 0.22.0. @@ -146,13 +131,13 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) let selEnd = [-1, -1]; const _isEmpty = (node, state) => { // consider clean blank lines pasted in IE to be empty - if (dom.numChildNodes(node) === 0) return true; - if (dom.numChildNodes(node) === 1 && + if (dom.childNodes(node).length === 0) return true; + if (dom.childNodes(node).length === 1 && getAssoc(node, 'shouldBeEmpty') && dom.innerHTML(node) === ' ' && !getAssoc(node, 'unpasted')) { if (state) { - const child = dom.childNode(node, 0); + const child = dom.childNodes(node)[0]; _reachPoint(child, 0, state); _reachPoint(child, 1, state); } @@ -483,8 +468,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) // lists do not need to have a type, so before we make a wrong guess // check if we find a better hint within the node's children if (!rr && !type) { - for (let i = 0; i < dom.numChildNodes(node); i++) { - const child = dom.childNode(node, i); + for (const child of dom.childNodes(node)) { if (dom.tagName(child) !== 'ul') continue; type = dom.getAttribute(child, 'class'); if (type) break; @@ -566,9 +550,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) } } - const nc = dom.numChildNodes(node); - for (let i = 0; i < nc; i++) { - const c = dom.childNode(node, i); + for (const c of dom.childNodes(node)) { cc.collectContent(c, state); } From e5b45cc98479470463cb66882f61121807d586c7 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Mon, 25 Jan 2021 00:33:18 -0500 Subject: [PATCH 26/26] contentcollector: Delete unnecessary `dom` functions And move the remaining functions out of the `makeContentCollector()` function. --- src/static/js/contentcollector.js | 107 +++++++++++++++--------------- 1 file changed, 52 insertions(+), 55 deletions(-) diff --git a/src/static/js/contentcollector.js b/src/static/js/contentcollector.js index ff9b5d9e4..7ac27168f 100644 --- a/src/static/js/contentcollector.js +++ b/src/static/js/contentcollector.js @@ -32,38 +32,31 @@ const hooks = require('./pluginfw/hooks'); const sanitizeUnicode = (s) => UNorm.nfc(s); -const makeContentCollector = (collectStyles, abrowser, apool, className2Author) => { - // This file is used both in browsers and with cheerio in Node.js (for importing HTML). Cheerio's - // Node-like objects are not 100% API compatible with the DOM Node specification; this `dom` - // object abstracts away the differences. - const dom = { - // .nodeType works with DOM and cheerio 0.22.0. Note: Cheerio 0.22.0 does not provide the - // Node.*_NODE constants, so they cannot be used here. - isElementNode: (n) => n.nodeType === 1, // Node.ELEMENT_NODE - isTextNode: (n) => n.nodeType === 3, // Node.TEXT_NODE - // .tagName works with DOM and cheerio 0.22.0, but: - // * With DOM, .tagName is an uppercase string. - // * With cheerio 0.22.0, .tagName is a lowercase string. - // For consistency, this function always returns a lowercase string. - tagName: (n) => n.tagName && n.tagName.toLowerCase(), - // .nodeValue works with DOM and cheerio 0.22.0. - nodeValue: (n) => n.nodeValue, - // .childNodes works with DOM and cheerio 0.22.0, except in cheerio the .childNodes property - // does not exist on text nodes (and maybe other non-element nodes). - childNodes: (n) => n.childNodes || [], - nodeProp: (n, p) => n[p], - getAttribute: (n, a) => { - // .getAttribute() works with DOM but not with cheerio 0.22.0. - if (n.getAttribute != null) return n.getAttribute(a); - // .attribs[] works with cheerio 0.22.0 but not with DOM. - if (n.attribs != null) return n.attribs[a]; - return null; - }, - // .innerHTML works with DOM but not with cheerio 0.22.0. Cheerio's Element-like objects have no - // equivalent. (Cheerio objects have an .html() method, but that isn't accessible here.) - innerHTML: (n) => n.innerHTML, - }; +// This file is used both in browsers and with cheerio in Node.js (for importing HTML). Cheerio's +// Node-like objects are not 100% API compatible with the DOM specification; the following functions +// abstract away the differences. +// .nodeType works with DOM and cheerio 0.22.0, but cheerio 0.22.0 does not provide the Node.*_NODE +// constants so they cannot be used here. +const isElementNode = (n) => n.nodeType === 1; // Node.ELEMENT_NODE +const isTextNode = (n) => n.nodeType === 3; // Node.TEXT_NODE +// .tagName works with DOM and cheerio 0.22.0, but: +// * With DOM, .tagName is an uppercase string. +// * With cheerio 0.22.0, .tagName is a lowercase string. +// For consistency, this function always returns a lowercase string. +const tagName = (n) => n.tagName && n.tagName.toLowerCase(); +// .childNodes works with DOM and cheerio 0.22.0, except in cheerio the .childNodes property does +// not exist on text nodes (and maybe other non-element nodes). +const childNodes = (n) => n.childNodes || []; +const getAttribute = (n, a) => { + // .getAttribute() works with DOM but not with cheerio 0.22.0. + if (n.getAttribute != null) return n.getAttribute(a); + // .attribs[] works with cheerio 0.22.0 but not with DOM. + if (n.attribs != null) return n.attribs[a]; + return null; +}; + +const makeContentCollector = (collectStyles, abrowser, apool, className2Author) => { const _blockElems = { div: 1, p: 1, @@ -75,7 +68,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) _blockElems[element] = 1; }); - const isBlockElement = (n) => !!_blockElems[dom.tagName(n) || '']; + const isBlockElement = (n) => !!_blockElems[tagName(n) || '']; const textify = (str) => sanitizeUnicode( str.replace(/(\n | \n)/g, ' ') @@ -83,7 +76,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) .replace(/\xa0/g, ' ') .replace(/\t/g, ' ')); - const getAssoc = (node, name) => dom.nodeProp(node, `_magicdom_${name}`); + const getAssoc = (node, name) => node[`_magicdom_${name}`]; const lines = (() => { const textArray = []; @@ -131,13 +124,17 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) let selEnd = [-1, -1]; const _isEmpty = (node, state) => { // consider clean blank lines pasted in IE to be empty - if (dom.childNodes(node).length === 0) return true; - if (dom.childNodes(node).length === 1 && + if (childNodes(node).length === 0) return true; + if (childNodes(node).length === 1 && getAssoc(node, 'shouldBeEmpty') && - dom.innerHTML(node) === ' ' && + // Note: The .innerHTML property exists on DOM Element objects but not on cheerio's + // Element-like objects (cheerio v0.22.0) so this equality check will always be false. + // Cheerio's Element-like objects have no equivalent to .innerHTML. (Cheerio objects have an + // .html() method, but that isn't accessible here.) + node.innerHTML === ' ' && !getAssoc(node, 'unpasted')) { if (state) { - const child = dom.childNodes(node)[0]; + const child = childNodes(node)[0]; _reachPoint(child, 0, state); _reachPoint(child, 1, state); } @@ -157,7 +154,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) }; const _reachBlockPoint = (nd, idx, state) => { - if (!dom.isTextNode(nd)) _reachPoint(nd, idx, state); + if (!isTextNode(nd)) _reachPoint(nd, idx, state); }; const _reachPoint = (nd, idx, state) => { @@ -323,15 +320,15 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) const startLine = lines.length() - 1; _reachBlockPoint(node, 0, state); - if (dom.isTextNode(node)) { - const tname = dom.getAttribute(node.parentNode, 'name'); - const context = {cc: this, state, tname, node, text: dom.nodeValue(node)}; + if (isTextNode(node)) { + const tname = getAttribute(node.parentNode, 'name'); + const context = {cc: this, state, tname, node, text: node.nodeValue}; // Hook functions may either return a string (deprecated) or modify context.text. If any hook // function modifies context.text then all returned strings are ignored. If no hook functions // modify context.text, the first hook function to return a string wins. const [hookTxt] = hooks.callAll('collectContentLineText', context).filter((s) => typeof s === 'string'); - let txt = context.text === dom.nodeValue(node) && hookTxt != null ? hookTxt : context.text; + let txt = context.text === node.nodeValue && hookTxt != null ? hookTxt : context.text; let rest = ''; let x = 0; // offset into original text @@ -381,8 +378,8 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) cc.startNewLine(state); } } - } else if (dom.isElementNode(node)) { - const tname = dom.tagName(node) || ''; + } else if (isElementNode(node)) { + const tname = tagName(node) || ''; if (tname === 'img') { hooks.callAll('collectContentImage', { @@ -400,7 +397,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) if (tname === 'br') { this.breakLine = true; - const tvalue = dom.getAttribute(node, 'value'); + const tvalue = getAttribute(node, 'value'); const [startNewLine = true] = hooks.callAll('collectContentLineBreak', { cc: this, state, @@ -415,8 +412,8 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) } else if (tname === 'script' || tname === 'style') { // ignore } else if (!isEmpty) { - let styl = dom.getAttribute(node, 'style'); - let cls = dom.getAttribute(node, 'class'); + let styl = getAttribute(node, 'style'); + let cls = getAttribute(node, 'class'); let isPre = (tname === 'pre'); if ((!isPre) && abrowser && abrowser.safari) { isPre = (styl && /\bwhite-space:\s*pre\b/i.exec(styl)); @@ -463,14 +460,14 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) cc.doAttrib(state, 'strikethrough'); } if (tname === 'ul' || tname === 'ol') { - let type = dom.getAttribute(node, 'class'); + let type = getAttribute(node, 'class'); const rr = cls && /(?:^| )list-([a-z]+[0-9]+)\b/.exec(cls); // lists do not need to have a type, so before we make a wrong guess // check if we find a better hint within the node's children if (!rr && !type) { - for (const child of dom.childNodes(node)) { - if (dom.tagName(child) !== 'ul') continue; - type = dom.getAttribute(child, 'class'); + for (const child of childNodes(node)) { + if (tagName(child) !== 'ul') continue; + type = getAttribute(child, 'class'); if (type) break; } } @@ -478,7 +475,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) type = rr[1]; } else { if (tname === 'ul') { - const cls = dom.getAttribute(node, 'class'); + const cls = getAttribute(node, 'class'); if ((type && type.match('indent')) || (cls && cls.match('indent'))) { type = 'indent'; } else { @@ -504,7 +501,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) Note how the
        item has to be inside a
      1. Because of this we don't increment the start number */ - if (node.parentNode && dom.tagName(node.parentNode) !== 'ol') { + if (node.parentNode && tagName(node.parentNode) !== 'ol') { /* TODO: start number has to increment based on indentLevel(numberX) This means we have to build an object IE @@ -521,7 +518,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) } } // UL list items never modify the start value. - if (node.parentNode && dom.tagName(node.parentNode) === 'ul') { + if (node.parentNode && tagName(node.parentNode) === 'ul') { state.start++; // TODO, this is hacky. // Because if the first item is an UL it will increment a list no? @@ -550,7 +547,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) } } - for (const c of dom.childNodes(node)) { + for (const c of childNodes(node)) { cc.collectContent(c, state); }