tests/editor/ul/li/ol/import/export: Introduce contentcollector.js tests & various OL/UL/LI related bugfixes

1. Introduce contentcollector.js backend tests 1. Fix issue with OL LI items not being properly numbered after import 1. Fix issue with nested OL LI items being improperly numbered on export 1. Fix issue with new lines not being introduced after lists in on import #3961 1. Sanitize HTML on the way in (import) 1. Fix ExportHTML CSS because it needs to support OL > LI > OL not OL > OL [The latter being the correct format] 1. Fix backend tests.
2025-07-05 12:22:15 -04:00 · 2020-06-05 20:54:16 +01:00 · 2020-06-05 20:54:16 +01:00 · a4bdcc3392
commit a4bdcc3392
parent fc88f12bba
14 changed files with 10870 additions and 1188 deletions
--- a/src/node/utils/ExportHelper.js
+++ b/src/node/utils/ExportHelper.js
@ -63,6 +63,13 @@ exports._analyzeLine = function(text, aline, apool){
        }
      }
    }
+    var opIter2 = Changeset.opIterator(aline);
+    if (opIter2.hasNext()){
+      var start = Changeset.opAttributeValue(opIter2.next(), 'start', apool);
+      if (start){
+          line.start = start;
+      }
+    }
  }
  if (lineMarker){
    line.text = text.substring(1);
--- a/src/node/utils/ExportHtml.js
+++ b/src/node/utils/ExportHtml.js
@ -298,7 +298,7 @@ function getHTMLFromAtext(pad, atext, authorColors)
      });
    }
    processNextChars(text.length - idx);
-
+    
    return _processSpaces(assem.toString());
  } // end getLineHTML
  var pieces = [css];
@ -316,7 +316,6 @@ function getHTMLFromAtext(pad, atext, authorColors)
    var context;
    var line = _analyzeLine(textLines[i], attribLines[i], apool);
    var lineContent = getLineHTML(line.text, line.aline);
-
    if (line.listLevel)//If we are inside a list
    {
      context = {
@ -361,12 +360,50 @@ function getHTMLFromAtext(pad, atext, authorColors)

            if (prevPiece.indexOf("<ul") === 0 || prevPiece.indexOf("<ol") === 0 || prevPiece.indexOf("</li>") === 0)
            {
-              pieces.push("<li>");
+               /*
+                 uncommenting this breaks nested ols..
+                 if the previous item is NOT a ul, NOT an ol OR closing li then close the list
+                 so we consider this HTML, I inserted ** where it throws a problem in Example Wrong..
+                 <ol><li>one</li><li><ol><li>1.1</li><li><ol><li>1.1.1</li></ol></li></ol></li><li>two</li></ol>
+
+                 Note that closing the li then re-opening for another li item here is wrong.  The correct markup is
+                 <ol><li>one<ol><li>1.1<ol><li>1.1.1</li></ol></li></ol></li><li>two</li></ol>
+
+                 Exmaple Right: <ol class="number"><li>one</li><ol start="2" class="number"><li>1.1</li><ol start="3" class="number"><li>1.1.1</li></ol></li></ol><li>two</li></ol>
+                 Example Wrong: <ol class="number"><li>one</li>**</li>**<ol start="2" class="number"><li>1.1</li>**</li>**<ol start="3" class="number"><li>1.1.1</li></ol></li></ol><li>two</li></ol>
+                 So it's firing wrong where the current piece is an li and the previous piece is an ol and next piece is an ol
+                 So to remedy this we can say if next piece is NOT an OL or UL.
+                 // pieces.push("</li>");
+
+              */
+              if( (nextLine.listTypeName === 'number') && (nextLine.text === '') ){
+                // is the listTypeName check needed here?  null text might be completely fine!
+                // TODO Check against Uls
+                // don't do anything because the next item is a nested ol openener so we need to keep the li open
+              }else{
+                pieces.push("</li>");
+              }
+
+
            }

            if (line.listTypeName === "number")
            {
-              pieces.push("<ol class=\"" + line.listTypeName + "\">");
+              // We introduce line.start here, this is useful for continuing Ordered list line numbers
+              // in case you have a bullet in a list IE you Want
+              // 1. hello
+              //   * foo
+              // 2. world
+              // Without this line.start logic it would be
+              // 1. hello * foo 1. world because the bullet would kill the OL
+
+              // TODO: This logic could also be used to continue OL with indented content
+              // but that's a job for another day....
+              if(line.start){
+                pieces.push("<ol start=\""+Number(line.start)+"\" class=\"" + line.listTypeName + "\">");
+              }else{
+                pieces.push("<ol class=\"" + line.listTypeName + "\">");
+              }
            }
            else
            {
@ -375,13 +412,24 @@ function getHTMLFromAtext(pad, atext, authorColors)
          }
        }
      }
-
-      pieces.push("<li>", context.lineContent);
+      // if we're going up a level we shouldn't be adding..
+      if(context.lineContent){
+        pieces.push("<li>", context.lineContent);
+      }

      // To close list elements
      if (nextLine && nextLine.listLevel === line.listLevel && line.listTypeName === nextLine.listTypeName)
      {
-        pieces.push("</li>");
+        if(context.lineContent){
+          if( (nextLine.listTypeName === 'number') && (nextLine.text === '') ){
+            // is the listTypeName check needed here?  null text might be completely fine!
+            // TODO Check against Uls
+            // don't do anything because the next item is a nested ol openener so we need to keep the li open
+          }else{
+            pieces.push("</li>");
+          }
+
+        }
      }
      if ((!nextLine || !nextLine.listLevel || nextLine.listLevel < line.listLevel) || (nextLine && line.listTypeName !== nextLine.listTypeName))
      {
--- a/src/node/utils/ImportHtml.js
+++ b/src/node/utils/ImportHtml.js
@ -14,15 +14,29 @@
 * limitations under the License.
 */

-var log4js = require('log4js');
-var Changeset = require("ep_etherpad-lite/static/js/Changeset");
-var contentcollector = require("ep_etherpad-lite/static/js/contentcollector");
-var cheerio = require("cheerio");
+const log4js            = require('log4js');
+const Changeset         = require("ep_etherpad-lite/static/js/Changeset");
+const contentcollector  = require("ep_etherpad-lite/static/js/contentcollector");
+const cheerio           = require("cheerio");
+const rehype            = require("rehype")
+const format            = require("rehype-format")
+

 exports.setPadHTML = function(pad, html)
 {
  var apiLogger = log4js.getLogger("ImportHtml");

+  var opts = {
+    indentInitial: false,
+    indent: -1
+  }
+
+  rehype()
+    .use(format, opts)
+    .process(html, function(err, output){
+      html = String(output).replace(/(\r\n|\n|\r)/gm,"");
+  })
+
  var $ = cheerio.load(html);

  // Appends a line break, used by Etherpad to ensure a caret is available
--- a/src/node/utils/Minify.js
+++ b/src/node/utils/Minify.js
@ -23,6 +23,8 @@ var ERR = require("async-stacktrace");
 var settings = require('./Settings');
 var async = require('async');
 var fs = require('fs');
+var StringDecoder = require('string_decoder').StringDecoder;
+var CleanCSS = require('clean-css');
 var path = require('path');
 var plugins = require("ep_etherpad-lite/static/js/pluginfw/plugins");
 var RequireKernel = require('etherpad-require-kernel');