diff --git a/src/node/db/PadManager.js b/src/node/db/PadManager.js index 5e0af4643..7d546fc71 100644 --- a/src/node/db/PadManager.js +++ b/src/node/db/PadManager.js @@ -146,12 +146,11 @@ exports.getPad = function(id, text, callback) else { pad = new Pad(id); - + //initalize the pad pad.init(text, function(err) { if(ERR(err, callback)) return; - globalPads.set(id, pad); callback(null, pad); }); diff --git a/src/node/handler/ExportHandler.js b/src/node/handler/ExportHandler.js index 1b7fcc26d..8ff5bc488 100644 --- a/src/node/handler/ExportHandler.js +++ b/src/node/handler/ExportHandler.js @@ -20,6 +20,7 @@ var ERR = require("async-stacktrace"); var exporthtml = require("../utils/ExportHtml"); +var exporttxt = require("../utils/ExportTxt"); var exportdokuwiki = require("../utils/ExportDokuWiki"); var padManager = require("../db/PadManager"); var async = require("async"); @@ -48,22 +49,75 @@ exports.doExport = function(req, res, padId, type) res.attachment(padId + "." + type); //if this is a plain text export, we can do this directly + // We have to over engineer this because tabs are stored as attributes and not plain text + if(type == "txt") { - padManager.getPad(padId, function(err, pad) - { - ERR(err); - if(req.params.rev){ - pad.getInternalRevisionAText(req.params.rev, function(junk, text) - { - res.send(text.text ? text.text : null); - }); - } - else + var txt; + var randNum; + var srcFile, destFile; + + async.series([ + //render the txt document + function(callback) { - res.send(pad.text()); + exporttxt.getPadTXTDocument(padId, req.params.rev, false, function(err, _txt) + { + if(ERR(err, callback)) return; + txt = _txt; + callback(); + }); + }, + //decide what to do with the txt export + function(callback) + { + //if this is a txt export, we can send this from here directly + res.send(txt); + callback("stop"); + }, + //send the convert job to abiword + function(callback) + { + //ensure html can be collected by the garbage collector + txt = null; + + destFile = tempDirectory + "/eplite_export_" + randNum + "." + type; + abiword.convertFile(srcFile, destFile, type, callback); + }, + //send the file + function(callback) + { + res.sendfile(destFile, null, callback); + }, + //clean up temporary files + function(callback) + { + async.parallel([ + function(callback) + { + fs.unlink(srcFile, callback); + }, + function(callback) + { + //100ms delay to accomidate for slow windows fs + if(os.type().indexOf("Windows") > -1) + { + setTimeout(function() + { + fs.unlink(destFile, callback); + }, 100); + } + else + { + fs.unlink(destFile, callback); + } + } + ], callback); } - }); + ], function(err) + { + if(err && err != "stop") ERR(err); + }) } else if(type == 'dokuwiki') { diff --git a/src/node/utils/ExportHelper.js b/src/node/utils/ExportHelper.js new file mode 100644 index 000000000..a939a8b6e --- /dev/null +++ b/src/node/utils/ExportHelper.js @@ -0,0 +1,87 @@ +/** + * Helpers for export requests + */ + +/* + * 2011 Peter 'Pita' Martischka (Primary Technology Ltd) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS-IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +var async = require("async"); +var Changeset = require("ep_etherpad-lite/static/js/Changeset"); +var padManager = require("../db/PadManager"); +var ERR = require("async-stacktrace"); +var Security = require('ep_etherpad-lite/static/js/security'); +var hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks'); + +exports.getPadPlainText = function(pad, revNum){ + var atext = ((revNum !== undefined) ? pad.getInternalRevisionAText(revNum) : pad.atext()); + var textLines = atext.text.slice(0, -1).split('\n'); + var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text); + var apool = pad.pool(); + + var pieces = []; + for (var i = 0; i < textLines.length; i++){ + var line = _analyzeLine(textLines[i], attribLines[i], apool); + if (line.listLevel){ + var numSpaces = line.listLevel * 2 - 1; + var bullet = '*'; + pieces.push(new Array(numSpaces + 1).join(' '), bullet, ' ', line.text, '\n'); + } + else{ + pieces.push(line.text, '\n'); + } + } + + return pieces.join(''); +} + + +exports._analyzeLine = function(text, aline, apool){ + var line = {}; + + // identify list + var lineMarker = 0; + line.listLevel = 0; + if (aline){ + var opIter = Changeset.opIterator(aline); + if (opIter.hasNext()){ + var listType = Changeset.opAttributeValue(opIter.next(), 'list', apool); + if (listType){ + lineMarker = 1; + listType = /([a-z]+)([12345678])/.exec(listType); + if (listType){ + line.listTypeName = listType[1]; + line.listLevel = Number(listType[2]); + } + } + } + } + if (lineMarker){ + line.text = text.substring(1); + line.aline = Changeset.subattribution(aline, 1); + } + else{ + line.text = text; + line.aline = aline; + } + return line; +} + + +exports._encodeWhitespace = function(s){ + return s.replace(/[^\x21-\x7E\s\t\n\r]/g, function(c){ + return "&#" +c.charCodeAt(0) + ";" + }); +} diff --git a/src/node/utils/ExportHtml.js b/src/node/utils/ExportHtml.js index 069194880..585694d4b 100644 --- a/src/node/utils/ExportHtml.js +++ b/src/node/utils/ExportHtml.js @@ -21,31 +21,9 @@ var padManager = require("../db/PadManager"); var ERR = require("async-stacktrace"); var Security = require('ep_etherpad-lite/static/js/security'); var hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks'); -function getPadPlainText(pad, revNum) -{ - var atext = ((revNum !== undefined) ? pad.getInternalRevisionAText(revNum) : pad.atext()); - var textLines = atext.text.slice(0, -1).split('\n'); - var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text); - var apool = pad.pool(); - - var pieces = []; - for (var i = 0; i < textLines.length; i++) - { - var line = _analyzeLine(textLines[i], attribLines[i], apool); - if (line.listLevel) - { - var numSpaces = line.listLevel * 2 - 1; - var bullet = '*'; - pieces.push(new Array(numSpaces + 1).join(' '), bullet, ' ', line.text, '\n'); - } - else - { - pieces.push(line.text, '\n'); - } - } - - return pieces.join(''); -} +var getPadPlainText = require('./ExportHelper').getPadPlainText +var _analyzeLine = require('./ExportHelper')._analyzeLine; +var _encodeWhitespace = require('./ExportHelper')._encodeWhitespace; function getPadHTML(pad, revNum, callback) { @@ -503,45 +481,6 @@ function getHTMLFromAtext(pad, atext, authorColors) return pieces.join(''); } -function _analyzeLine(text, aline, apool) -{ - var line = {}; - - // identify list - var lineMarker = 0; - line.listLevel = 0; - if (aline) - { - var opIter = Changeset.opIterator(aline); - if (opIter.hasNext()) - { - var listType = Changeset.opAttributeValue(opIter.next(), 'list', apool); - if (listType) - { - lineMarker = 1; - listType = /([a-z]+)([12345678])/.exec(listType); - if (listType) - { - line.listTypeName = listType[1]; - line.listLevel = Number(listType[2]); - } - } - } - } - if (lineMarker) - { - line.text = text.substring(1); - line.aline = Changeset.subattribution(aline, 1); - } - else - { - line.text = text; - line.aline = aline; - } - - return line; -} - exports.getPadHTMLDocument = function (padId, revNum, noDocType, callback) { padManager.getPad(padId, function (err, pad) @@ -578,79 +517,6 @@ exports.getPadHTMLDocument = function (padId, revNum, noDocType, callback) }); } -function _encodeWhitespace(s) { - return s.replace(/[^\x21-\x7E\s\t\n\r]/g, function(c) - { - return "&#" +c.charCodeAt(0) + ";" - }); -} - -// copied from ACE - - -function _processSpaces(s) -{ - var doesWrap = true; - if (s.indexOf("<") < 0 && !doesWrap) - { - // short-cut - return s.replace(/ /g, ' '); - } - var parts = []; - s.replace(/<[^>]*>?| |[^ <]+/g, function (m) - { - parts.push(m); - }); - if (doesWrap) - { - var endOfLine = true; - var beforeSpace = false; - // last space in a run is normal, others are nbsp, - // end of line is nbsp - for (var i = parts.length - 1; i >= 0; i--) - { - var p = parts[i]; - if (p == " ") - { - if (endOfLine || beforeSpace) parts[i] = ' '; - endOfLine = false; - beforeSpace = true; - } - else if (p.charAt(0) != "<") - { - endOfLine = false; - beforeSpace = false; - } - } - // beginning of line is nbsp - for (var i = 0; i < parts.length; i++) - { - var p = parts[i]; - if (p == " ") - { - parts[i] = ' '; - break; - } - else if (p.charAt(0) != "<") - { - break; - } - } - } - else - { - for (var i = 0; i < parts.length; i++) - { - var p = parts[i]; - if (p == " ") - { - parts[i] = ' '; - } - } - } - return parts.join(''); -} - // copied from ACE var _REGEX_WORDCHAR = /[\u0030-\u0039\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u1FFF\u3040-\u9FFF\uF900-\uFDFF\uFE70-\uFEFE\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFDC]/; @@ -676,3 +542,57 @@ function _findURLs(text) return urls; } + + +// copied from ACE +function _processSpaces(s){ + var doesWrap = true; + if (s.indexOf("<") < 0 && !doesWrap){ + // short-cut + return s.replace(/ /g, ' '); + } + var parts = []; + s.replace(/<[^>]*>?| |[^ <]+/g, function (m){ + parts.push(m); + }); + if (doesWrap){ + var endOfLine = true; + var beforeSpace = false; + // last space in a run is normal, others are nbsp, + // end of line is nbsp + for (var i = parts.length - 1; i >= 0; i--){ + var p = parts[i]; + if (p == " "){ + if (endOfLine || beforeSpace) parts[i] = ' '; + endOfLine = false; + beforeSpace = true; + } + else if (p.charAt(0) != "<"){ + endOfLine = false; + beforeSpace = false; + } + } + // beginning of line is nbsp + for (var i = 0; i < parts.length; i++){ + var p = parts[i]; + if (p == " "){ + parts[i] = ' '; + break; + } + else if (p.charAt(0) != "<"){ + break; + } + } + } + else + { + for (var i = 0; i < parts.length; i++){ + var p = parts[i]; + if (p == " "){ + parts[i] = ' '; + } + } + } + return parts.join(''); +} + diff --git a/src/node/utils/ExportTxt.js b/src/node/utils/ExportTxt.js new file mode 100644 index 000000000..c57424f1d --- /dev/null +++ b/src/node/utils/ExportTxt.js @@ -0,0 +1,293 @@ +/** + * TXT export + */ + +/* + * 2013 John McLear + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS-IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +var async = require("async"); +var Changeset = require("ep_etherpad-lite/static/js/Changeset"); +var padManager = require("../db/PadManager"); +var ERR = require("async-stacktrace"); +var Security = require('ep_etherpad-lite/static/js/security'); +var hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks'); +var getPadPlainText = require('./ExportHelper').getPadPlainText; +var _analyzeLine = require('./ExportHelper')._analyzeLine; + +// This is slightly different than the HTML method as it passes the output to getTXTFromAText +function getPadTXT(pad, revNum, callback) +{ + var atext = pad.atext; + var html; + async.waterfall([ + // fetch revision atext + + + function (callback) + { + if (revNum != undefined) + { + pad.getInternalRevisionAText(revNum, function (err, revisionAtext) + { + if(ERR(err, callback)) return; + atext = revisionAtext; + callback(); + }); + } + else + { + callback(null); + } + }, + + // convert atext to html + + + function (callback) + { + html = getTXTFromAtext(pad, atext); // only this line is different to the HTML function + callback(null); + }], + // run final callback + + + function (err) + { + if(ERR(err, callback)) return; + callback(null, html); + }); +} + +exports.getPadTXT = getPadTXT; + + +// This is different than the functionality provided in ExportHtml as it provides formatting +// functionality that is designed specifically for TXT exports +function getTXTFromAtext(pad, atext, authorColors) +{ + var apool = pad.apool(); + var textLines = atext.text.slice(0, -1).split('\n'); + var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text); + + var tags = ['h1', 'h2', 'strong', 'em', 'u', 's']; + var props = ['heading1', 'heading2', 'bold', 'italic', 'underline', 'strikethrough']; + var anumMap = {}; + var css = ""; + + props.forEach(function (propName, i) + { + var propTrueNum = apool.putAttrib([propName, true], true); + if (propTrueNum >= 0) + { + anumMap[propTrueNum] = i; + } + }); + + function getLineTXT(text, attribs) + { + var propVals = [false, false, false]; + var ENTER = 1; + var STAY = 2; + var LEAVE = 0; + + // Use order of tags (b/i/u) as order of nesting, for simplicity + // and decent nesting. For example, + // Just bold Bold and italics Just italics + // becomes + // Just bold Bold and italics Just italics + var taker = Changeset.stringIterator(text); + var assem = Changeset.stringAssembler(); + var openTags = []; + + var idx = 0; + + function processNextChars(numChars) + { + if (numChars <= 0) + { + return; + } + + var iter = Changeset.opIterator(Changeset.subattribution(attribs, idx, idx + numChars)); + idx += numChars; + + while (iter.hasNext()) + { + var o = iter.next(); + var propChanged = false; + Changeset.eachAttribNumber(o.attribs, function (a) + { + if (a in anumMap) + { + var i = anumMap[a]; // i = 0 => bold, etc. + if (!propVals[i]) + { + propVals[i] = ENTER; + propChanged = true; + } + else + { + propVals[i] = STAY; + } + } + }); + for (var i = 0; i < propVals.length; i++) + { + if (propVals[i] === true) + { + propVals[i] = LEAVE; + propChanged = true; + } + else if (propVals[i] === STAY) + { + propVals[i] = true; // set it back + } + } + // now each member of propVal is in {false,LEAVE,ENTER,true} + // according to what happens at start of span + if (propChanged) + { + // leaving bold (e.g.) also leaves italics, etc. + var left = false; + for (var i = 0; i < propVals.length; i++) + { + var v = propVals[i]; + if (!left) + { + if (v === LEAVE) + { + left = true; + } + } + else + { + if (v === true) + { + propVals[i] = STAY; // tag will be closed and re-opened + } + } + } + + var tags2close = []; + + for (var i = propVals.length - 1; i >= 0; i--) + { + if (propVals[i] === LEAVE) + { + //emitCloseTag(i); + tags2close.push(i); + propVals[i] = false; + } + else if (propVals[i] === STAY) + { + //emitCloseTag(i); + tags2close.push(i); + } + } + + for (var i = 0; i < propVals.length; i++) + { + if (propVals[i] === ENTER || propVals[i] === STAY) + { + propVals[i] = true; + } + } + // propVals is now all {true,false} again + } // end if (propChanged) + + var chars = o.chars; + if (o.lines) + { + chars--; // exclude newline at end of line, if present + } + + var s = taker.take(chars); + + // removes the characters with the code 12. Don't know where they come + // from but they break the abiword parser and are completly useless + // s = s.replace(String.fromCharCode(12), ""); + + // remove * from s, it's just not needed on a blank line.. This stops + // plugins from being able to display * at the beginning of a line + // s = s.replace("*", ""); // Then remove it + + assem.append(s); + } // end iteration over spans in line + + var tags2close = []; + for (var i = propVals.length - 1; i >= 0; i--) + { + if (propVals[i]) + { + tags2close.push(i); + propVals[i] = false; + } + } + + } // end processNextChars + processNextChars(text.length - idx); + return(assem.toString()); + } // end getLineHTML + var pieces = [css]; + + // Need to deal with constraints imposed on HTML lists; can + // only gain one level of nesting at once, can't change type + // mid-list, etc. + // People might use weird indenting, e.g. skip a level, + // so we want to do something reasonable there. We also + // want to deal gracefully with blank lines. + // => keeps track of the parents level of indentation + var lists = []; // e.g. [[1,'bullet'], [3,'bullet'], ...] + for (var i = 0; i < textLines.length; i++) + { + var line = _analyzeLine(textLines[i], attribLines[i], apool); + var lineContent = getLineTXT(line.text, line.aline); + if(line.listTypeName == "bullet"){ + lineContent = "* " + lineContent; // add a bullet + } + if(line.listLevel > 0){ + for (var j = line.listLevel - 1; j >= 0; j--){ + pieces.push('\t'); + } + if(line.listTypeName == "number"){ + pieces.push(line.listLevel + ". "); + // This is bad because it doesn't truly reflect what the user + // sees because browsers do magic on nested
  1. s + } + pieces.push(lineContent, '\n'); + }else{ + pieces.push(lineContent, '\n'); + } + } + + return pieces.join(''); +} +exports.getTXTFromAtext = getTXTFromAtext; + +exports.getPadTXTDocument = function (padId, revNum, noDocType, callback) +{ + padManager.getPad(padId, function (err, pad) + { + if(ERR(err, callback)) return; + + getPadTXT(pad, revNum, function (err, html) + { + if(ERR(err, callback)) return; + callback(null, html); + }); + }); +} +