From 60d44cd3df201088538d561795dee56badbcc6b2 Mon Sep 17 00:00:00 2001 From: John McLear Date: Tue, 25 Nov 2014 17:26:09 +0000 Subject: [PATCH 1/6] use cheerio instead of jsdom --- src/node/utils/ImportHtml.js | 14 +++++--------- src/package.json | 2 +- src/static/js/contentcollector.js | 5 +++++ 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/node/utils/ImportHtml.js b/src/node/utils/ImportHtml.js index 48188dfd0..79217cb65 100644 --- a/src/node/utils/ImportHtml.js +++ b/src/node/utils/ImportHtml.js @@ -14,22 +14,18 @@ * limitations under the License. */ -var jsdom = require('jsdom-nocontextifiy').jsdom; +var jsdom = require('jsdom').jsdom; var log4js = require('log4js'); var Changeset = require("ep_etherpad-lite/static/js/Changeset"); var contentcollector = require("ep_etherpad-lite/static/js/contentcollector"); +var cheerio = require("cheerio"); function setPadHTML(pad, html, callback) { var apiLogger = log4js.getLogger("ImportHtml"); - // Parse the incoming HTML with jsdom - try{ - var doc = jsdom(html.replace(/>\n+<')); - }catch(e){ - apiLogger.warn("Error importing, possibly caused by malformed HTML"); - var doc = jsdom("
Error during import, possibly malformed HTML
"); - } + var $ = cheerio.load(html); + var doc = $('html')[0]; apiLogger.debug('html:'); apiLogger.debug(html); @@ -38,7 +34,7 @@ function setPadHTML(pad, html, callback) // using the content collector object var cc = contentcollector.makeContentCollector(true, null, pad.pool); try{ // we use a try here because if the HTML is bad it will blow up - cc.collectContent(doc.childNodes[0]); + cc.collectContent(doc); }catch(e){ apiLogger.warn("HTML was not properly formed", e); return; // We don't process the HTML because it was bad.. diff --git a/src/package.json b/src/package.json index 4b0ae82b4..e69d97305 100644 --- a/src/package.json +++ b/src/package.json @@ -25,7 +25,7 @@ "formidable" : "1.0.9", "log4js" : "0.6.6", "nodemailer" : "0.3.x", - "jsdom-nocontextifiy" : "0.2.10", + "cheerio" : "0.18.0", "async-stacktrace" : "0.0.2", "npm" : "1.4.x", "ejs" : "0.6.1", diff --git a/src/static/js/contentcollector.js b/src/static/js/contentcollector.js index 5e393670f..048ed4915 100644 --- a/src/static/js/contentcollector.js +++ b/src/static/js/contentcollector.js @@ -54,10 +54,14 @@ function makeContentCollector(collectStyles, browser, apool, domInterface, class }, nodeNumChildren: function(n) { + if(n.childNodes == null) return 0; return n.childNodes.length; }, nodeChild: function(n, i) { + if(n.childNodes.item == null){ + return n.childNodes[i]; + } return n.childNodes.item(i); }, nodeProp: function(n, p) @@ -66,6 +70,7 @@ function makeContentCollector(collectStyles, browser, apool, domInterface, class }, nodeAttr: function(n, a) { + if(n.getAttribute == null) return null; return n.getAttribute(a); }, optNodeInnerHTML: function(n) From c2c0cb690763d1d0752f28ef4bca6738d319c66a Mon Sep 17 00:00:00 2001 From: John McLear Date: Tue, 25 Nov 2014 18:07:34 +0000 Subject: [PATCH 2/6] so yeah, prolly can do this anyway? --- src/node/handler/ImportHandler.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/node/handler/ImportHandler.js b/src/node/handler/ImportHandler.js index 8c410ecde..22d7a761b 100644 --- a/src/node/handler/ImportHandler.js +++ b/src/node/handler/ImportHandler.js @@ -234,7 +234,8 @@ exports.doImport = function(req, res, padId) ERR(err); //close the connection - res.send("", 200); + // res.send("", 200); + res.send("", 200); }); } From 2a062f8dc361de3845c4f5f33209047ed9ff985e Mon Sep 17 00:00:00 2001 From: John McLear Date: Tue, 25 Nov 2014 18:18:42 +0000 Subject: [PATCH 3/6] mheh no browser requirement at all --- src/node/handler/ImportHandler.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/node/handler/ImportHandler.js b/src/node/handler/ImportHandler.js index 22d7a761b..5cef08292 100644 --- a/src/node/handler/ImportHandler.js +++ b/src/node/handler/ImportHandler.js @@ -234,8 +234,8 @@ exports.doImport = function(req, res, padId) ERR(err); //close the connection - // res.send("", 200); - res.send("", 200); + res.send("", 200); + }); } From 77de2d918bbd61fe8be3d02caa3a62bf9a00733f Mon Sep 17 00:00:00 2001 From: John McLear Date: Tue, 25 Nov 2014 22:14:48 +0000 Subject: [PATCH 4/6] multiline --- src/node/handler/ImportHandler.js | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/node/handler/ImportHandler.js b/src/node/handler/ImportHandler.js index 5cef08292..55915d760 100644 --- a/src/node/handler/ImportHandler.js +++ b/src/node/handler/ImportHandler.js @@ -234,8 +234,19 @@ exports.doImport = function(req, res, padId) ERR(err); //close the connection - res.send("", 200); - + res.send( + " \ + \ + \ + " + , 200); }); } From 881763f663e4f9cfa1356e113b02dbeff777451f Mon Sep 17 00:00:00 2001 From: John McLear Date: Tue, 25 Nov 2014 22:15:29 +0000 Subject: [PATCH 5/6] remove jsdom dep --- src/node/utils/ImportHtml.js | 1 - 1 file changed, 1 deletion(-) diff --git a/src/node/utils/ImportHtml.js b/src/node/utils/ImportHtml.js index 79217cb65..34e9ad76a 100644 --- a/src/node/utils/ImportHtml.js +++ b/src/node/utils/ImportHtml.js @@ -14,7 +14,6 @@ * limitations under the License. */ -var jsdom = require('jsdom').jsdom; var log4js = require('log4js'); var Changeset = require("ep_etherpad-lite/static/js/Changeset"); var contentcollector = require("ep_etherpad-lite/static/js/contentcollector"); From 0a84379364c76a43fbab4c48af08346ff54a179d Mon Sep 17 00:00:00 2001 From: John McLear Date: Wed, 26 Nov 2014 15:19:22 +0000 Subject: [PATCH 6/6] make additional line break at end of imported doc --- bin/installDeps.sh | 2 +- bin/run.sh | 2 +- src/node/utils/ImportHtml.js | 6 +++++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/bin/installDeps.sh b/bin/installDeps.sh index a8bc88a87..58eb32f2d 100755 --- a/bin/installDeps.sh +++ b/bin/installDeps.sh @@ -99,7 +99,7 @@ fi echo "Clear minfified cache..." rm -f var/minified* -echo "ensure custom css/js files are created..." +echo "Ensure custom css/js files are created..." for f in "index" "pad" "timeslider" do diff --git a/bin/run.sh b/bin/run.sh index 92ae8d482..f21623101 100755 --- a/bin/run.sh +++ b/bin/run.sh @@ -32,7 +32,7 @@ fi bin/installDeps.sh $* || exit 1 #Move to the node folder and start -echo "start..." +echo "Started Etherpad..." SCRIPTPATH=`pwd -P` node $SCRIPTPATH/node_modules/ep_etherpad-lite/node/server.js $* diff --git a/src/node/utils/ImportHtml.js b/src/node/utils/ImportHtml.js index 34e9ad76a..59802f9bf 100644 --- a/src/node/utils/ImportHtml.js +++ b/src/node/utils/ImportHtml.js @@ -24,8 +24,12 @@ function setPadHTML(pad, html, callback) var apiLogger = log4js.getLogger("ImportHtml"); var $ = cheerio.load(html); - var doc = $('html')[0]; + // Appends a line break, used by Etherpad to ensure a caret is available + // below the last line of an import + $('body').append("

"); + + var doc = $('html')[0]; apiLogger.debug('html:'); apiLogger.debug(html);