From d16b15f3eb7d5753b60fbc7907e85ca996674bdb Mon Sep 17 00:00:00 2001 From: John McLear Date: Sun, 10 Feb 2013 17:34:34 +0000 Subject: [PATCH 01/13] begin support for better txt output --- src/node/handler/ExportHandler.js | 78 ++++- src/node/utils/ExportTxt.js | 477 ++++++++++++++++++++++++++++++ 2 files changed, 543 insertions(+), 12 deletions(-) create mode 100644 src/node/utils/ExportTxt.js diff --git a/src/node/handler/ExportHandler.js b/src/node/handler/ExportHandler.js index 1b7fcc26d..8ff5bc488 100644 --- a/src/node/handler/ExportHandler.js +++ b/src/node/handler/ExportHandler.js @@ -20,6 +20,7 @@ var ERR = require("async-stacktrace"); var exporthtml = require("../utils/ExportHtml"); +var exporttxt = require("../utils/ExportTxt"); var exportdokuwiki = require("../utils/ExportDokuWiki"); var padManager = require("../db/PadManager"); var async = require("async"); @@ -48,22 +49,75 @@ exports.doExport = function(req, res, padId, type) res.attachment(padId + "." + type); //if this is a plain text export, we can do this directly + // We have to over engineer this because tabs are stored as attributes and not plain text + if(type == "txt") { - padManager.getPad(padId, function(err, pad) - { - ERR(err); - if(req.params.rev){ - pad.getInternalRevisionAText(req.params.rev, function(junk, text) - { - res.send(text.text ? text.text : null); - }); - } - else + var txt; + var randNum; + var srcFile, destFile; + + async.series([ + //render the txt document + function(callback) { - res.send(pad.text()); + exporttxt.getPadTXTDocument(padId, req.params.rev, false, function(err, _txt) + { + if(ERR(err, callback)) return; + txt = _txt; + callback(); + }); + }, + //decide what to do with the txt export + function(callback) + { + //if this is a txt export, we can send this from here directly + res.send(txt); + callback("stop"); + }, + //send the convert job to abiword + function(callback) + { + //ensure html can be collected by the garbage collector + txt = null; + + destFile = tempDirectory + "/eplite_export_" + randNum + "." + type; + abiword.convertFile(srcFile, destFile, type, callback); + }, + //send the file + function(callback) + { + res.sendfile(destFile, null, callback); + }, + //clean up temporary files + function(callback) + { + async.parallel([ + function(callback) + { + fs.unlink(srcFile, callback); + }, + function(callback) + { + //100ms delay to accomidate for slow windows fs + if(os.type().indexOf("Windows") > -1) + { + setTimeout(function() + { + fs.unlink(destFile, callback); + }, 100); + } + else + { + fs.unlink(destFile, callback); + } + } + ], callback); } - }); + ], function(err) + { + if(err && err != "stop") ERR(err); + }) } else if(type == 'dokuwiki') { diff --git a/src/node/utils/ExportTxt.js b/src/node/utils/ExportTxt.js new file mode 100644 index 000000000..99f6085e3 --- /dev/null +++ b/src/node/utils/ExportTxt.js @@ -0,0 +1,477 @@ +/** + * Copyright 2009 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS-IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +var async = require("async"); +var Changeset = require("ep_etherpad-lite/static/js/Changeset"); +var padManager = require("../db/PadManager"); +var ERR = require("async-stacktrace"); +var Security = require('ep_etherpad-lite/static/js/security'); +var hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks'); +function getPadPlainText(pad, revNum) +{ + var atext = ((revNum !== undefined) ? pad.getInternalRevisionAText(revNum) : pad.atext()); + var textLines = atext.text.slice(0, -1).split('\n'); + var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text); + var apool = pad.pool(); + + var pieces = []; + for (var i = 0; i < textLines.length; i++) + { + var line = _analyzeLine(textLines[i], attribLines[i], apool); + if (line.listLevel) + { + var numSpaces = line.listLevel * 2 - 1; + var bullet = '*'; + pieces.push(new Array(numSpaces + 1).join(' '), bullet, ' ', line.text, '\n'); + } + else + { + pieces.push(line.text, '\n'); + } + } + + return pieces.join(''); +} + +function getPadTXT(pad, revNum, callback) +{ + var atext = pad.atext; + var html; + async.waterfall([ + // fetch revision atext + + + function (callback) + { + if (revNum != undefined) + { + pad.getInternalRevisionAText(revNum, function (err, revisionAtext) + { + if(ERR(err, callback)) return; + atext = revisionAtext; + callback(); + }); + } + else + { + callback(null); + } + }, + + // convert atext to html + + + function (callback) + { + html = getTXTFromAtext(pad, atext); + callback(null); + }], + // run final callback + + + function (err) + { + if(ERR(err, callback)) return; + callback(null, html); + }); +} + +exports.getPadTXT = getPadTXT; +exports.getTXTFromAtext = getTXTFromAtext; + +function getTXTFromAtext(pad, atext, authorColors) +{ + var apool = pad.apool(); + var textLines = atext.text.slice(0, -1).split('\n'); + var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text); + + var tags = ['h1', 'h2', 'strong', 'em', 'u', 's']; + var props = ['heading1', 'heading2', 'bold', 'italic', 'underline', 'strikethrough']; + var anumMap = {}; + var css = ""; + + props.forEach(function (propName, i) + { + var propTrueNum = apool.putAttrib([propName, true], true); + if (propTrueNum >= 0) + { + anumMap[propTrueNum] = i; + } + }); + + function getLineTXT(text, attribs) + { + var propVals = [false, false, false]; + var ENTER = 1; + var STAY = 2; + var LEAVE = 0; + + // Use order of tags (b/i/u) as order of nesting, for simplicity + // and decent nesting. For example, + // Just bold Bold and italics Just italics + // becomes + // Just bold Bold and italics Just italics + var taker = Changeset.stringIterator(text); + var assem = Changeset.stringAssembler(); + var openTags = []; + + var urls = _findURLs(text); + + var idx = 0; + + function processNextChars(numChars) + { + if (numChars <= 0) + { + return; + } + + var iter = Changeset.opIterator(Changeset.subattribution(attribs, idx, idx + numChars)); + idx += numChars; + + while (iter.hasNext()) + { + var o = iter.next(); + var propChanged = false; + Changeset.eachAttribNumber(o.attribs, function (a) + { + if (a in anumMap) + { + var i = anumMap[a]; // i = 0 => bold, etc. + if (!propVals[i]) + { + propVals[i] = ENTER; + propChanged = true; + } + else + { + propVals[i] = STAY; + } + } + }); + for (var i = 0; i < propVals.length; i++) + { + if (propVals[i] === true) + { + propVals[i] = LEAVE; + propChanged = true; + } + else if (propVals[i] === STAY) + { + propVals[i] = true; // set it back + } + } + // now each member of propVal is in {false,LEAVE,ENTER,true} + // according to what happens at start of span + if (propChanged) + { + // leaving bold (e.g.) also leaves italics, etc. + var left = false; + for (var i = 0; i < propVals.length; i++) + { + var v = propVals[i]; + if (!left) + { + if (v === LEAVE) + { + left = true; + } + } + else + { + if (v === true) + { + propVals[i] = STAY; // tag will be closed and re-opened + } + } + } + + var tags2close = []; + + for (var i = propVals.length - 1; i >= 0; i--) + { + if (propVals[i] === LEAVE) + { + //emitCloseTag(i); + tags2close.push(i); + propVals[i] = false; + } + else if (propVals[i] === STAY) + { + //emitCloseTag(i); + tags2close.push(i); + } + } + + for (var i = 0; i < propVals.length; i++) + { + if (propVals[i] === ENTER || propVals[i] === STAY) + { + emitOpenTag(i); + propVals[i] = true; + } + } + // propVals is now all {true,false} again + } // end if (propChanged) + var chars = o.chars; + if (o.lines) + { + chars--; // exclude newline at end of line, if present + } + + var s = taker.take(chars); + + //removes the characters with the code 12. Don't know where they come + //from but they break the abiword parser and are completly useless + s = s.replace(String.fromCharCode(12), ""); + + assem.append(_encodeWhitespace(Security.escapeHTML(s))); + } // end iteration over spans in line + + var tags2close = []; + for (var i = propVals.length - 1; i >= 0; i--) + { + if (propVals[i]) + { + tags2close.push(i); + propVals[i] = false; + } + } + + } // end processNextChars + if (urls) + { + urls.forEach(function (urlData) + { + var startIndex = urlData[0]; + var url = urlData[1]; + var urlLength = url.length; + processNextChars(startIndex - idx); + console.warn(url); + // assem.append(''); + assem.append(url); + processNextChars(urlLength); + // assem.append(''); + }); + } + processNextChars(text.length - idx); + + return _processSpaces(assem.toString()); + } // end getLineHTML + var pieces = [css]; + + // Need to deal with constraints imposed on HTML lists; can + // only gain one level of nesting at once, can't change type + // mid-list, etc. + // People might use weird indenting, e.g. skip a level, + // so we want to do something reasonable there. We also + // want to deal gracefully with blank lines. + // => keeps track of the parents level of indentation + var lists = []; // e.g. [[1,'bullet'], [3,'bullet'], ...] + for (var i = 0; i < textLines.length; i++) + { + var line = _analyzeLine(textLines[i], attribLines[i], apool); + var lineContent = getLineTXT(line.text, line.aline); + if(line.listTypeName == "bullet"){ + lineContent = "* " + lineContent; // add a bullet + } + if(line.listLevel > 0){ + for (var j = line.listLevel - 1; j >= 0; j--){ + pieces.push('\t'); + } + if(line.listTypeName == "number"){ + pieces.push(line.listLevel + ". "); + // This is bad because it doesn't truly reflect what the user + // sees because browsers do magic on nested
  1. s + } + pieces.push(lineContent, '\n'); + }else{ + console.warn(line); + pieces.push(lineContent, '\n'); + } + + // I'm not too keen about using teh HTML export filters here, they could cause real pain in the future + // I'd suggest supporting getLineTXTForExport + var lineContentFromHook = hooks.callAllStr("getLineHTMLForExport", + { + line: line, + apool: apool, + attribLine: attribLines[i], + text: textLines[i] + }, " ", " ", ""); + if (lineContentFromHook) + { + pieces.push(lineContentFromHook, ''); + } + else + { + // pieces.push(lineContent, '\n'); + } + } + + return pieces.join(''); +} + +function _analyzeLine(text, aline, apool) +{ + var line = {}; + + // identify list + var lineMarker = 0; + line.listLevel = 0; + if (aline) + { + var opIter = Changeset.opIterator(aline); + if (opIter.hasNext()) + { + var listType = Changeset.opAttributeValue(opIter.next(), 'list', apool); + if (listType) + { + lineMarker = 1; + listType = /([a-z]+)([12345678])/.exec(listType); + if (listType) + { + line.listTypeName = listType[1]; + line.listLevel = Number(listType[2]); + } + } + } + } + if (lineMarker) + { + line.text = text.substring(1); + line.aline = Changeset.subattribution(aline, 1); + } + else + { + line.text = text; + line.aline = aline; + } + + return line; +} + +exports.getPadTXTDocument = function (padId, revNum, noDocType, callback) +{ + padManager.getPad(padId, function (err, pad) + { + if(ERR(err, callback)) return; + + getPadTXT(pad, revNum, function (err, html) + { + if(ERR(err, callback)) return; + callback(null, html); + }); + }); +} + +function _encodeWhitespace(s) { + return s.replace(/[^\x21-\x7E\s\t\n\r]/g, function(c) + { + return "&#" +c.charCodeAt(0) + ";" + }); +} + +// copied from ACE +function _processSpaces(s) +{ + var doesWrap = true; + if (s.indexOf("<") < 0 && !doesWrap) + { + // short-cut + return s.replace(/ /g, ' '); + } + var parts = []; + s.replace(/<[^>]*>?| |[^ <]+/g, function (m) + { + parts.push(m); + }); + if (doesWrap) + { + var endOfLine = true; + var beforeSpace = false; + // last space in a run is normal, others are nbsp, + // end of line is nbsp + for (var i = parts.length - 1; i >= 0; i--) + { + var p = parts[i]; + if (p == " ") + { + if (endOfLine || beforeSpace) parts[i] = ' '; + endOfLine = false; + beforeSpace = true; + } + else if (p.charAt(0) != "<") + { + endOfLine = false; + beforeSpace = false; + } + } + // beginning of line is nbsp + for (var i = 0; i < parts.length; i++) + { + var p = parts[i]; + if (p == " ") + { + parts[i] = ' '; + break; + } + else if (p.charAt(0) != "<") + { + break; + } + } + } + else + { + for (var i = 0; i < parts.length; i++) + { + var p = parts[i]; + if (p == " ") + { + parts[i] = ' '; + } + } + } + return parts.join(''); +} + + +// copied from ACE +var _REGEX_WORDCHAR = /[\u0030-\u0039\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u1FFF\u3040-\u9FFF\uF900-\uFDFF\uFE70-\uFEFE\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFDC]/; +var _REGEX_SPACE = /\s/; +var _REGEX_URLCHAR = new RegExp('(' + /[-:@a-zA-Z0-9_.,~%+\/\\?=&#;()$]/.source + '|' + _REGEX_WORDCHAR.source + ')'); +var _REGEX_URL = new RegExp(/(?:(?:https?|s?ftp|ftps|file|smb|afp|nfs|(x-)?man|gopher|txmt):\/\/|mailto:)/.source + _REGEX_URLCHAR.source + '*(?![:.,;])' + _REGEX_URLCHAR.source, 'g'); + +// returns null if no URLs, or [[startIndex1, url1], [startIndex2, url2], ...] + +function _findURLs(text) +{ + _REGEX_URL.lastIndex = 0; + var urls = null; + var execResult; + while ((execResult = _REGEX_URL.exec(text))) + { + urls = (urls || []); + var startIndex = execResult.index; + var url = execResult[0]; + urls.push([startIndex, url]); + } + + return urls; +} + From a378f48c00fca7b78d888e3fd5957668fbff8811 Mon Sep 17 00:00:00 2001 From: John McLear Date: Sun, 10 Feb 2013 17:39:02 +0000 Subject: [PATCH 02/13] remove console warns --- src/node/utils/ExportTxt.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/node/utils/ExportTxt.js b/src/node/utils/ExportTxt.js index 99f6085e3..d9ee708f9 100644 --- a/src/node/utils/ExportTxt.js +++ b/src/node/utils/ExportTxt.js @@ -261,7 +261,6 @@ function getTXTFromAtext(pad, atext, authorColors) var url = urlData[1]; var urlLength = url.length; processNextChars(startIndex - idx); - console.warn(url); // assem.append(''); assem.append(url); processNextChars(urlLength); @@ -300,7 +299,6 @@ function getTXTFromAtext(pad, atext, authorColors) } pieces.push(lineContent, '\n'); }else{ - console.warn(line); pieces.push(lineContent, '\n'); } From a67a0950dd05bbe494ede5dfd696fcc47dcc9107 Mon Sep 17 00:00:00 2001 From: John McLear Date: Sun, 10 Feb 2013 19:21:27 +0000 Subject: [PATCH 03/13] stop urls being encoded, not sure about other security implications here... --- src/node/utils/ExportTxt.js | 40 ++----------------------------------- 1 file changed, 2 insertions(+), 38 deletions(-) diff --git a/src/node/utils/ExportTxt.js b/src/node/utils/ExportTxt.js index d9ee708f9..462583d3a 100644 --- a/src/node/utils/ExportTxt.js +++ b/src/node/utils/ExportTxt.js @@ -129,8 +129,6 @@ function getTXTFromAtext(pad, atext, authorColors) var assem = Changeset.stringAssembler(); var openTags = []; - var urls = _findURLs(text); - var idx = 0; function processNextChars(numChars) @@ -239,7 +237,8 @@ function getTXTFromAtext(pad, atext, authorColors) //from but they break the abiword parser and are completly useless s = s.replace(String.fromCharCode(12), ""); - assem.append(_encodeWhitespace(Security.escapeHTML(s))); + // assem.append(_encodeWhitespace(Security.escapeHTML(s))); + assem.append(_encodeWhitespace(s)); } // end iteration over spans in line var tags2close = []; @@ -253,20 +252,6 @@ function getTXTFromAtext(pad, atext, authorColors) } } // end processNextChars - if (urls) - { - urls.forEach(function (urlData) - { - var startIndex = urlData[0]; - var url = urlData[1]; - var urlLength = url.length; - processNextChars(startIndex - idx); - // assem.append(''); - assem.append(url); - processNextChars(urlLength); - // assem.append(''); - }); - } processNextChars(text.length - idx); return _processSpaces(assem.toString()); @@ -452,24 +437,3 @@ function _processSpaces(s) // copied from ACE var _REGEX_WORDCHAR = /[\u0030-\u0039\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u1FFF\u3040-\u9FFF\uF900-\uFDFF\uFE70-\uFEFE\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFDC]/; var _REGEX_SPACE = /\s/; -var _REGEX_URLCHAR = new RegExp('(' + /[-:@a-zA-Z0-9_.,~%+\/\\?=&#;()$]/.source + '|' + _REGEX_WORDCHAR.source + ')'); -var _REGEX_URL = new RegExp(/(?:(?:https?|s?ftp|ftps|file|smb|afp|nfs|(x-)?man|gopher|txmt):\/\/|mailto:)/.source + _REGEX_URLCHAR.source + '*(?![:.,;])' + _REGEX_URLCHAR.source, 'g'); - -// returns null if no URLs, or [[startIndex1, url1], [startIndex2, url2], ...] - -function _findURLs(text) -{ - _REGEX_URL.lastIndex = 0; - var urls = null; - var execResult; - while ((execResult = _REGEX_URL.exec(text))) - { - urls = (urls || []); - var startIndex = execResult.index; - var url = execResult[0]; - urls.push([startIndex, url]); - } - - return urls; -} - From 626ee97669767ea0f18fc41a99591211c35238ec Mon Sep 17 00:00:00 2001 From: John McLear Date: Sun, 10 Feb 2013 19:36:46 +0000 Subject: [PATCH 04/13] kinda brutal way of stopping plugins being able to pass *s instead of attributes --- src/node/db/PadManager.js | 4 ++-- src/node/utils/ExportTxt.js | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/node/db/PadManager.js b/src/node/db/PadManager.js index 5e0af4643..2be9da369 100644 --- a/src/node/db/PadManager.js +++ b/src/node/db/PadManager.js @@ -146,12 +146,12 @@ exports.getPad = function(id, text, callback) else { pad = new Pad(id); - + //initalize the pad pad.init(text, function(err) { if(ERR(err, callback)) return; - + console.warn(pad); globalPads.set(id, pad); callback(null, pad); }); diff --git a/src/node/utils/ExportTxt.js b/src/node/utils/ExportTxt.js index 462583d3a..c236df479 100644 --- a/src/node/utils/ExportTxt.js +++ b/src/node/utils/ExportTxt.js @@ -236,6 +236,10 @@ function getTXTFromAtext(pad, atext, authorColors) //removes the characters with the code 12. Don't know where they come //from but they break the abiword parser and are completly useless s = s.replace(String.fromCharCode(12), ""); + + // remove * from s, it's just not needed on a blank line.. This stops + // plugins from being able to display * at the beginning of a line + s = s.replace("*", ""); // assem.append(_encodeWhitespace(Security.escapeHTML(s))); assem.append(_encodeWhitespace(s)); From bcf9c23b4e9b52bd86e4b7936b713d71c7cd3e93 Mon Sep 17 00:00:00 2001 From: John McLear Date: Sun, 10 Feb 2013 19:38:16 +0000 Subject: [PATCH 05/13] dont use HTML filter hooks on txt export --- src/node/utils/ExportTxt.js | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/src/node/utils/ExportTxt.js b/src/node/utils/ExportTxt.js index c236df479..9451fd6e8 100644 --- a/src/node/utils/ExportTxt.js +++ b/src/node/utils/ExportTxt.js @@ -290,24 +290,6 @@ function getTXTFromAtext(pad, atext, authorColors) }else{ pieces.push(lineContent, '\n'); } - - // I'm not too keen about using teh HTML export filters here, they could cause real pain in the future - // I'd suggest supporting getLineTXTForExport - var lineContentFromHook = hooks.callAllStr("getLineHTMLForExport", - { - line: line, - apool: apool, - attribLine: attribLines[i], - text: textLines[i] - }, " ", " ", ""); - if (lineContentFromHook) - { - pieces.push(lineContentFromHook, ''); - } - else - { - // pieces.push(lineContent, '\n'); - } } return pieces.join(''); From 28f6d50011abd7c12b2b3948b14bd547d2ef4aee Mon Sep 17 00:00:00 2001 From: John McLear Date: Sun, 10 Feb 2013 22:14:05 +0000 Subject: [PATCH 06/13] remove console warn --- src/node/db/PadManager.js | 1 - 1 file changed, 1 deletion(-) diff --git a/src/node/db/PadManager.js b/src/node/db/PadManager.js index 2be9da369..7d546fc71 100644 --- a/src/node/db/PadManager.js +++ b/src/node/db/PadManager.js @@ -151,7 +151,6 @@ exports.getPad = function(id, text, callback) pad.init(text, function(err) { if(ERR(err, callback)) return; - console.warn(pad); globalPads.set(id, pad); callback(null, pad); }); From 60ef5f210ae47529325abf0968966ddf8348bfa7 Mon Sep 17 00:00:00 2001 From: John McLear Date: Sun, 10 Feb 2013 23:41:14 +0000 Subject: [PATCH 07/13] remove duplicate code to the best of my ability right now --- src/node/utils/ExportHtml.js | 7 +- src/node/utils/ExportTxt.js | 145 +++-------------------------------- 2 files changed, 15 insertions(+), 137 deletions(-) diff --git a/src/node/utils/ExportHtml.js b/src/node/utils/ExportHtml.js index 069194880..74b6546f1 100644 --- a/src/node/utils/ExportHtml.js +++ b/src/node/utils/ExportHtml.js @@ -92,6 +92,7 @@ function getPadHTML(pad, revNum, callback) exports.getPadHTML = getPadHTML; exports.getHTMLFromAtext = getHTMLFromAtext; +exports.getPadPlainText = getPadPlainText; function getHTMLFromAtext(pad, atext, authorColors) { @@ -542,6 +543,8 @@ function _analyzeLine(text, aline, apool) return line; } +exports._analyzeLine = _analyzeLine; + exports.getPadHTMLDocument = function (padId, revNum, noDocType, callback) { padManager.getPad(padId, function (err, pad) @@ -584,10 +587,9 @@ function _encodeWhitespace(s) { return "&#" +c.charCodeAt(0) + ";" }); } +exports._encodeWhitespace = _encodeWhitespace; // copied from ACE - - function _processSpaces(s) { var doesWrap = true; @@ -651,6 +653,7 @@ function _processSpaces(s) return parts.join(''); } +exports._processSpaces = _processSpaces; // copied from ACE var _REGEX_WORDCHAR = /[\u0030-\u0039\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u1FFF\u3040-\u9FFF\uF900-\uFDFF\uFE70-\uFEFE\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFDC]/; diff --git a/src/node/utils/ExportTxt.js b/src/node/utils/ExportTxt.js index 9451fd6e8..edcccfd77 100644 --- a/src/node/utils/ExportTxt.js +++ b/src/node/utils/ExportTxt.js @@ -21,32 +21,12 @@ var padManager = require("../db/PadManager"); var ERR = require("async-stacktrace"); var Security = require('ep_etherpad-lite/static/js/security'); var hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks'); -function getPadPlainText(pad, revNum) -{ - var atext = ((revNum !== undefined) ? pad.getInternalRevisionAText(revNum) : pad.atext()); - var textLines = atext.text.slice(0, -1).split('\n'); - var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text); - var apool = pad.pool(); - - var pieces = []; - for (var i = 0; i < textLines.length; i++) - { - var line = _analyzeLine(textLines[i], attribLines[i], apool); - if (line.listLevel) - { - var numSpaces = line.listLevel * 2 - 1; - var bullet = '*'; - pieces.push(new Array(numSpaces + 1).join(' '), bullet, ' ', line.text, '\n'); - } - else - { - pieces.push(line.text, '\n'); - } - } - - return pieces.join(''); -} +var getPadPlainText = require('./ExportHtml').getPadPlainText; +var _processSpaces = require('./ExportHtml')._processSpaces; +var _analyzeLine = require('./ExportHtml')._analyzeLine; +var _encodeWhitespace = require('./ExportHtml')._encodeWhitespace; +// This is slightly different than the HTML method as it passes the output to getTXTFromAText function getPadTXT(pad, revNum, callback) { var atext = pad.atext; @@ -77,7 +57,7 @@ function getPadTXT(pad, revNum, callback) function (callback) { - html = getTXTFromAtext(pad, atext); + html = getTXTFromAtext(pad, atext); // only this line is different to the HTML function callback(null); }], // run final callback @@ -91,8 +71,10 @@ function getPadTXT(pad, revNum, callback) } exports.getPadTXT = getPadTXT; -exports.getTXTFromAtext = getTXTFromAtext; + +// This is different than the functionality provided in ExportHtml as it provides formatting +// functionality that is designed specifically for TXT exports function getTXTFromAtext(pad, atext, authorColors) { var apool = pad.apool(); @@ -294,45 +276,7 @@ function getTXTFromAtext(pad, atext, authorColors) return pieces.join(''); } - -function _analyzeLine(text, aline, apool) -{ - var line = {}; - - // identify list - var lineMarker = 0; - line.listLevel = 0; - if (aline) - { - var opIter = Changeset.opIterator(aline); - if (opIter.hasNext()) - { - var listType = Changeset.opAttributeValue(opIter.next(), 'list', apool); - if (listType) - { - lineMarker = 1; - listType = /([a-z]+)([12345678])/.exec(listType); - if (listType) - { - line.listTypeName = listType[1]; - line.listLevel = Number(listType[2]); - } - } - } - } - if (lineMarker) - { - line.text = text.substring(1); - line.aline = Changeset.subattribution(aline, 1); - } - else - { - line.text = text; - line.aline = aline; - } - - return line; -} +exports.getTXTFromAtext = getTXTFromAtext; exports.getPadTXTDocument = function (padId, revNum, noDocType, callback) { @@ -354,72 +298,3 @@ function _encodeWhitespace(s) { return "&#" +c.charCodeAt(0) + ";" }); } - -// copied from ACE -function _processSpaces(s) -{ - var doesWrap = true; - if (s.indexOf("<") < 0 && !doesWrap) - { - // short-cut - return s.replace(/ /g, ' '); - } - var parts = []; - s.replace(/<[^>]*>?| |[^ <]+/g, function (m) - { - parts.push(m); - }); - if (doesWrap) - { - var endOfLine = true; - var beforeSpace = false; - // last space in a run is normal, others are nbsp, - // end of line is nbsp - for (var i = parts.length - 1; i >= 0; i--) - { - var p = parts[i]; - if (p == " ") - { - if (endOfLine || beforeSpace) parts[i] = ' '; - endOfLine = false; - beforeSpace = true; - } - else if (p.charAt(0) != "<") - { - endOfLine = false; - beforeSpace = false; - } - } - // beginning of line is nbsp - for (var i = 0; i < parts.length; i++) - { - var p = parts[i]; - if (p == " ") - { - parts[i] = ' '; - break; - } - else if (p.charAt(0) != "<") - { - break; - } - } - } - else - { - for (var i = 0; i < parts.length; i++) - { - var p = parts[i]; - if (p == " ") - { - parts[i] = ' '; - } - } - } - return parts.join(''); -} - - -// copied from ACE -var _REGEX_WORDCHAR = /[\u0030-\u0039\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u1FFF\u3040-\u9FFF\uF900-\uFDFF\uFE70-\uFEFE\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFDC]/; -var _REGEX_SPACE = /\s/; From 0b5c948549a03e858302bb9740529741c3be4e22 Mon Sep 17 00:00:00 2001 From: John McLear Date: Tue, 12 Feb 2013 19:45:46 +0000 Subject: [PATCH 08/13] Move code from Html export to a Helper file --- src/node/utils/ExportHelper.js | 139 +++++++++++++++++++++++++++++++ src/node/utils/ExportHtml.js | 144 +-------------------------------- src/node/utils/ExportTxt.js | 8 +- 3 files changed, 147 insertions(+), 144 deletions(-) create mode 100644 src/node/utils/ExportHelper.js diff --git a/src/node/utils/ExportHelper.js b/src/node/utils/ExportHelper.js new file mode 100644 index 000000000..030b0dc75 --- /dev/null +++ b/src/node/utils/ExportHelper.js @@ -0,0 +1,139 @@ +/** + * Helpers for export requests + */ + +/* + * 2011 Peter 'Pita' Martischka (Primary Technology Ltd) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS-IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +var async = require("async"); +var Changeset = require("ep_etherpad-lite/static/js/Changeset"); +var padManager = require("../db/PadManager"); +var ERR = require("async-stacktrace"); +var Security = require('ep_etherpad-lite/static/js/security'); +var hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks'); + +exports.getPadPlainText = function(pad, revNum){ + var atext = ((revNum !== undefined) ? pad.getInternalRevisionAText(revNum) : pad.atext()); + var textLines = atext.text.slice(0, -1).split('\n'); + var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text); + var apool = pad.pool(); + + var pieces = []; + for (var i = 0; i < textLines.length; i++){ + var line = _analyzeLine(textLines[i], attribLines[i], apool); + if (line.listLevel){ + var numSpaces = line.listLevel * 2 - 1; + var bullet = '*'; + pieces.push(new Array(numSpaces + 1).join(' '), bullet, ' ', line.text, '\n'); + } + else{ + pieces.push(line.text, '\n'); + } + } + + return pieces.join(''); +} + +// copied from ACE +exports._processSpaces = function(s){ + var doesWrap = true; + if (s.indexOf("<") < 0 && !doesWrap){ + // short-cut + return s.replace(/ /g, ' '); + } + var parts = []; + s.replace(/<[^>]*>?| |[^ <]+/g, function (m){ + parts.push(m); + }); + if (doesWrap){ + var endOfLine = true; + var beforeSpace = false; + // last space in a run is normal, others are nbsp, + // end of line is nbsp + for (var i = parts.length - 1; i >= 0; i--){ + var p = parts[i]; + if (p == " "){ + if (endOfLine || beforeSpace) parts[i] = ' '; + endOfLine = false; + beforeSpace = true; + } + else if (p.charAt(0) != "<"){ + endOfLine = false; + beforeSpace = false; + } + } + // beginning of line is nbsp + for (var i = 0; i < parts.length; i++){ + var p = parts[i]; + if (p == " "){ + parts[i] = ' '; + break; + } + else if (p.charAt(0) != "<"){ + break; + } + } + } + else + { + for (var i = 0; i < parts.length; i++){ + var p = parts[i]; + if (p == " "){ + parts[i] = ' '; + } + } + } + return parts.join(''); +} + + +exports._analyzeLine = function(text, aline, apool){ + var line = {}; + + // identify list + var lineMarker = 0; + line.listLevel = 0; + if (aline){ + var opIter = Changeset.opIterator(aline); + if (opIter.hasNext()){ + var listType = Changeset.opAttributeValue(opIter.next(), 'list', apool); + if (listType){ + lineMarker = 1; + listType = /([a-z]+)([12345678])/.exec(listType); + if (listType){ + line.listTypeName = listType[1]; + line.listLevel = Number(listType[2]); + } + } + } + } + if (lineMarker){ + line.text = text.substring(1); + line.aline = Changeset.subattribution(aline, 1); + } + else{ + line.text = text; + line.aline = aline; + } + return line; +} + + +exports._encodeWhitespace = function(s){ + return s.replace(/[^\x21-\x7E\s\t\n\r]/g, function(c){ + return "&#" +c.charCodeAt(0) + ";" + }); +} diff --git a/src/node/utils/ExportHtml.js b/src/node/utils/ExportHtml.js index 74b6546f1..a54f566c2 100644 --- a/src/node/utils/ExportHtml.js +++ b/src/node/utils/ExportHtml.js @@ -21,31 +21,10 @@ var padManager = require("../db/PadManager"); var ERR = require("async-stacktrace"); var Security = require('ep_etherpad-lite/static/js/security'); var hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks'); -function getPadPlainText(pad, revNum) -{ - var atext = ((revNum !== undefined) ? pad.getInternalRevisionAText(revNum) : pad.atext()); - var textLines = atext.text.slice(0, -1).split('\n'); - var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text); - var apool = pad.pool(); - - var pieces = []; - for (var i = 0; i < textLines.length; i++) - { - var line = _analyzeLine(textLines[i], attribLines[i], apool); - if (line.listLevel) - { - var numSpaces = line.listLevel * 2 - 1; - var bullet = '*'; - pieces.push(new Array(numSpaces + 1).join(' '), bullet, ' ', line.text, '\n'); - } - else - { - pieces.push(line.text, '\n'); - } - } - - return pieces.join(''); -} +var getPadPlainText = require('./ExportHelper').getPadPlainText +var _processSpaces = require('./ExportHelper')._processSpaces; +var _analyzeLine = require('./ExportHelper')._analyzeLine; +var _encodeWhitespace = require('./ExportHelper')._encodeWhitespace; function getPadHTML(pad, revNum, callback) { @@ -92,7 +71,6 @@ function getPadHTML(pad, revNum, callback) exports.getPadHTML = getPadHTML; exports.getHTMLFromAtext = getHTMLFromAtext; -exports.getPadPlainText = getPadPlainText; function getHTMLFromAtext(pad, atext, authorColors) { @@ -504,47 +482,6 @@ function getHTMLFromAtext(pad, atext, authorColors) return pieces.join(''); } -function _analyzeLine(text, aline, apool) -{ - var line = {}; - - // identify list - var lineMarker = 0; - line.listLevel = 0; - if (aline) - { - var opIter = Changeset.opIterator(aline); - if (opIter.hasNext()) - { - var listType = Changeset.opAttributeValue(opIter.next(), 'list', apool); - if (listType) - { - lineMarker = 1; - listType = /([a-z]+)([12345678])/.exec(listType); - if (listType) - { - line.listTypeName = listType[1]; - line.listLevel = Number(listType[2]); - } - } - } - } - if (lineMarker) - { - line.text = text.substring(1); - line.aline = Changeset.subattribution(aline, 1); - } - else - { - line.text = text; - line.aline = aline; - } - - return line; -} - -exports._analyzeLine = _analyzeLine; - exports.getPadHTMLDocument = function (padId, revNum, noDocType, callback) { padManager.getPad(padId, function (err, pad) @@ -581,79 +518,6 @@ exports.getPadHTMLDocument = function (padId, revNum, noDocType, callback) }); } -function _encodeWhitespace(s) { - return s.replace(/[^\x21-\x7E\s\t\n\r]/g, function(c) - { - return "&#" +c.charCodeAt(0) + ";" - }); -} -exports._encodeWhitespace = _encodeWhitespace; - -// copied from ACE -function _processSpaces(s) -{ - var doesWrap = true; - if (s.indexOf("<") < 0 && !doesWrap) - { - // short-cut - return s.replace(/ /g, ' '); - } - var parts = []; - s.replace(/<[^>]*>?| |[^ <]+/g, function (m) - { - parts.push(m); - }); - if (doesWrap) - { - var endOfLine = true; - var beforeSpace = false; - // last space in a run is normal, others are nbsp, - // end of line is nbsp - for (var i = parts.length - 1; i >= 0; i--) - { - var p = parts[i]; - if (p == " ") - { - if (endOfLine || beforeSpace) parts[i] = ' '; - endOfLine = false; - beforeSpace = true; - } - else if (p.charAt(0) != "<") - { - endOfLine = false; - beforeSpace = false; - } - } - // beginning of line is nbsp - for (var i = 0; i < parts.length; i++) - { - var p = parts[i]; - if (p == " ") - { - parts[i] = ' '; - break; - } - else if (p.charAt(0) != "<") - { - break; - } - } - } - else - { - for (var i = 0; i < parts.length; i++) - { - var p = parts[i]; - if (p == " ") - { - parts[i] = ' '; - } - } - } - return parts.join(''); -} - -exports._processSpaces = _processSpaces; // copied from ACE var _REGEX_WORDCHAR = /[\u0030-\u0039\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u1FFF\u3040-\u9FFF\uF900-\uFDFF\uFE70-\uFEFE\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFDC]/; diff --git a/src/node/utils/ExportTxt.js b/src/node/utils/ExportTxt.js index edcccfd77..0f3b1a634 100644 --- a/src/node/utils/ExportTxt.js +++ b/src/node/utils/ExportTxt.js @@ -21,10 +21,10 @@ var padManager = require("../db/PadManager"); var ERR = require("async-stacktrace"); var Security = require('ep_etherpad-lite/static/js/security'); var hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks'); -var getPadPlainText = require('./ExportHtml').getPadPlainText; -var _processSpaces = require('./ExportHtml')._processSpaces; -var _analyzeLine = require('./ExportHtml')._analyzeLine; -var _encodeWhitespace = require('./ExportHtml')._encodeWhitespace; +var getPadPlainText = require('./ExportHelper').getPadPlainText; +var _processSpaces = require('./ExportHelper')._processSpaces; +var _analyzeLine = require('./ExportHelper')._analyzeLine; +var _encodeWhitespace = require('./ExportHelper')._encodeWhitespace; // This is slightly different than the HTML method as it passes the output to getTXTFromAText function getPadTXT(pad, revNum, callback) From da246d183daa1a5a012937f0dc0f54ca96873553 Mon Sep 17 00:00:00 2001 From: John McLear Date: Tue, 12 Feb 2013 19:47:53 +0000 Subject: [PATCH 09/13] Correct license header --- src/node/utils/ExportTxt.js | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/node/utils/ExportTxt.js b/src/node/utils/ExportTxt.js index 0f3b1a634..30673a981 100644 --- a/src/node/utils/ExportTxt.js +++ b/src/node/utils/ExportTxt.js @@ -1,5 +1,9 @@ /** - * Copyright 2009 Google Inc. + * TXT export + */ + +/* + * 2013 John McLear * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +18,6 @@ * limitations under the License. */ - var async = require("async"); var Changeset = require("ep_etherpad-lite/static/js/Changeset"); var padManager = require("../db/PadManager"); From d3f730e2ba060b0d2b26d03b55088f266fefb1b9 Mon Sep 17 00:00:00 2001 From: John McLear Date: Wed, 13 Feb 2013 18:01:15 +0000 Subject: [PATCH 10/13] fix various issues dont stop random *'s appearing --- src/node/utils/ExportHelper.js | 52 -------------------------------- src/node/utils/ExportHtml.js | 55 +++++++++++++++++++++++++++++++++- src/node/utils/ExportTxt.js | 19 +++++------- 3 files changed, 62 insertions(+), 64 deletions(-) diff --git a/src/node/utils/ExportHelper.js b/src/node/utils/ExportHelper.js index 030b0dc75..a939a8b6e 100644 --- a/src/node/utils/ExportHelper.js +++ b/src/node/utils/ExportHelper.js @@ -47,58 +47,6 @@ exports.getPadPlainText = function(pad, revNum){ return pieces.join(''); } -// copied from ACE -exports._processSpaces = function(s){ - var doesWrap = true; - if (s.indexOf("<") < 0 && !doesWrap){ - // short-cut - return s.replace(/ /g, ' '); - } - var parts = []; - s.replace(/<[^>]*>?| |[^ <]+/g, function (m){ - parts.push(m); - }); - if (doesWrap){ - var endOfLine = true; - var beforeSpace = false; - // last space in a run is normal, others are nbsp, - // end of line is nbsp - for (var i = parts.length - 1; i >= 0; i--){ - var p = parts[i]; - if (p == " "){ - if (endOfLine || beforeSpace) parts[i] = ' '; - endOfLine = false; - beforeSpace = true; - } - else if (p.charAt(0) != "<"){ - endOfLine = false; - beforeSpace = false; - } - } - // beginning of line is nbsp - for (var i = 0; i < parts.length; i++){ - var p = parts[i]; - if (p == " "){ - parts[i] = ' '; - break; - } - else if (p.charAt(0) != "<"){ - break; - } - } - } - else - { - for (var i = 0; i < parts.length; i++){ - var p = parts[i]; - if (p == " "){ - parts[i] = ' '; - } - } - } - return parts.join(''); -} - exports._analyzeLine = function(text, aline, apool){ var line = {}; diff --git a/src/node/utils/ExportHtml.js b/src/node/utils/ExportHtml.js index a54f566c2..585694d4b 100644 --- a/src/node/utils/ExportHtml.js +++ b/src/node/utils/ExportHtml.js @@ -22,7 +22,6 @@ var ERR = require("async-stacktrace"); var Security = require('ep_etherpad-lite/static/js/security'); var hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks'); var getPadPlainText = require('./ExportHelper').getPadPlainText -var _processSpaces = require('./ExportHelper')._processSpaces; var _analyzeLine = require('./ExportHelper')._analyzeLine; var _encodeWhitespace = require('./ExportHelper')._encodeWhitespace; @@ -543,3 +542,57 @@ function _findURLs(text) return urls; } + + +// copied from ACE +function _processSpaces(s){ + var doesWrap = true; + if (s.indexOf("<") < 0 && !doesWrap){ + // short-cut + return s.replace(/ /g, ' '); + } + var parts = []; + s.replace(/<[^>]*>?| |[^ <]+/g, function (m){ + parts.push(m); + }); + if (doesWrap){ + var endOfLine = true; + var beforeSpace = false; + // last space in a run is normal, others are nbsp, + // end of line is nbsp + for (var i = parts.length - 1; i >= 0; i--){ + var p = parts[i]; + if (p == " "){ + if (endOfLine || beforeSpace) parts[i] = ' '; + endOfLine = false; + beforeSpace = true; + } + else if (p.charAt(0) != "<"){ + endOfLine = false; + beforeSpace = false; + } + } + // beginning of line is nbsp + for (var i = 0; i < parts.length; i++){ + var p = parts[i]; + if (p == " "){ + parts[i] = ' '; + break; + } + else if (p.charAt(0) != "<"){ + break; + } + } + } + else + { + for (var i = 0; i < parts.length; i++){ + var p = parts[i]; + if (p == " "){ + parts[i] = ' '; + } + } + } + return parts.join(''); +} + diff --git a/src/node/utils/ExportTxt.js b/src/node/utils/ExportTxt.js index 30673a981..05847f162 100644 --- a/src/node/utils/ExportTxt.js +++ b/src/node/utils/ExportTxt.js @@ -25,7 +25,6 @@ var ERR = require("async-stacktrace"); var Security = require('ep_etherpad-lite/static/js/security'); var hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks'); var getPadPlainText = require('./ExportHelper').getPadPlainText; -var _processSpaces = require('./ExportHelper')._processSpaces; var _analyzeLine = require('./ExportHelper')._analyzeLine; var _encodeWhitespace = require('./ExportHelper')._encodeWhitespace; @@ -204,12 +203,12 @@ function getTXTFromAtext(pad, atext, authorColors) { if (propVals[i] === ENTER || propVals[i] === STAY) { - emitOpenTag(i); propVals[i] = true; } } // propVals is now all {true,false} again } // end if (propChanged) + var chars = o.chars; if (o.lines) { @@ -217,16 +216,15 @@ function getTXTFromAtext(pad, atext, authorColors) } var s = taker.take(chars); - - //removes the characters with the code 12. Don't know where they come - //from but they break the abiword parser and are completly useless - s = s.replace(String.fromCharCode(12), ""); + + // removes the characters with the code 12. Don't know where they come + // from but they break the abiword parser and are completly useless + // s = s.replace(String.fromCharCode(12), ""); // remove * from s, it's just not needed on a blank line.. This stops // plugins from being able to display * at the beginning of a line - s = s.replace("*", ""); - - // assem.append(_encodeWhitespace(Security.escapeHTML(s))); + // s = s.replace("*", ""); // Then remove it + assem.append(_encodeWhitespace(s)); } // end iteration over spans in line @@ -242,8 +240,7 @@ function getTXTFromAtext(pad, atext, authorColors) } // end processNextChars processNextChars(text.length - idx); - - return _processSpaces(assem.toString()); + return(assem.toString()); } // end getLineHTML var pieces = [css]; From be56272e66f4921f68fe12cd05cbaad8eb54894b Mon Sep 17 00:00:00 2001 From: John McLear Date: Wed, 13 Feb 2013 18:30:55 +0000 Subject: [PATCH 11/13] allow non ascii chars in txt export --- src/node/utils/ExportHelper.js | 6 +- src/node/utils/ExportHtml.js | 7 +- src/node/utils/ExportTxt.js | 10 +- src/node/utils/padDiffHTML.js | 554 +++++++++++++++++++++++++++++++++ 4 files changed, 562 insertions(+), 15 deletions(-) create mode 100644 src/node/utils/padDiffHTML.js diff --git a/src/node/utils/ExportHelper.js b/src/node/utils/ExportHelper.js index a939a8b6e..41d440f30 100644 --- a/src/node/utils/ExportHelper.js +++ b/src/node/utils/ExportHelper.js @@ -80,8 +80,4 @@ exports._analyzeLine = function(text, aline, apool){ } -exports._encodeWhitespace = function(s){ - return s.replace(/[^\x21-\x7E\s\t\n\r]/g, function(c){ - return "&#" +c.charCodeAt(0) + ";" - }); -} + diff --git a/src/node/utils/ExportHtml.js b/src/node/utils/ExportHtml.js index 585694d4b..51a4b2c3d 100644 --- a/src/node/utils/ExportHtml.js +++ b/src/node/utils/ExportHtml.js @@ -23,7 +23,6 @@ var Security = require('ep_etherpad-lite/static/js/security'); var hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks'); var getPadPlainText = require('./ExportHelper').getPadPlainText var _analyzeLine = require('./ExportHelper')._analyzeLine; -var _encodeWhitespace = require('./ExportHelper')._encodeWhitespace; function getPadHTML(pad, revNum, callback) { @@ -596,3 +595,9 @@ function _processSpaces(s){ return parts.join(''); } +function _encodeWhitespace(s){ + return s.replace(/[^\x21-\x7E\s\t\n\r]/g, function(c){ + return "&#" +c.charCodeAt(0) + ";" + }); +} + diff --git a/src/node/utils/ExportTxt.js b/src/node/utils/ExportTxt.js index 05847f162..4a3e458b4 100644 --- a/src/node/utils/ExportTxt.js +++ b/src/node/utils/ExportTxt.js @@ -26,7 +26,6 @@ var Security = require('ep_etherpad-lite/static/js/security'); var hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks'); var getPadPlainText = require('./ExportHelper').getPadPlainText; var _analyzeLine = require('./ExportHelper')._analyzeLine; -var _encodeWhitespace = require('./ExportHelper')._encodeWhitespace; // This is slightly different than the HTML method as it passes the output to getTXTFromAText function getPadTXT(pad, revNum, callback) @@ -112,7 +111,6 @@ function getTXTFromAtext(pad, atext, authorColors) var taker = Changeset.stringIterator(text); var assem = Changeset.stringAssembler(); var openTags = []; - var idx = 0; function processNextChars(numChars) @@ -225,7 +223,7 @@ function getTXTFromAtext(pad, atext, authorColors) // plugins from being able to display * at the beginning of a line // s = s.replace("*", ""); // Then remove it - assem.append(_encodeWhitespace(s)); + assem.append(s); } // end iteration over spans in line var tags2close = []; @@ -292,9 +290,3 @@ exports.getPadTXTDocument = function (padId, revNum, noDocType, callback) }); } -function _encodeWhitespace(s) { - return s.replace(/[^\x21-\x7E\s\t\n\r]/g, function(c) - { - return "&#" +c.charCodeAt(0) + ";" - }); -} diff --git a/src/node/utils/padDiffHTML.js b/src/node/utils/padDiffHTML.js new file mode 100644 index 000000000..00e967287 --- /dev/null +++ b/src/node/utils/padDiffHTML.js @@ -0,0 +1,554 @@ +var Changeset = require("../../static/js/Changeset"); +var async = require("async"); +var exportHtml = require('./ExportHtml'); + +function padDiffHTML (pad, fromRev, toRev){ + //check parameters + if(!pad || !pad.id || !pad.atext || !pad.pool) + { + throw new Error('Invalid pad'); + } + + var range = pad.getValidRevisionRange(fromRev, toRev); + if(!range) { throw new Error('Invalid revision range.' + + ' startRev: ' + fromRev + + ' endRev: ' + toRev); } + + this._pad = pad; + this._fromRev = range.startRev; + this._toRev = range.endRev; + this._html = null; + this._authors = []; +} + +padDiffHTML.prototype._isClearAuthorship = function(changeset){ + //unpack + var unpacked = Changeset.unpack(changeset); + + //check if there is nothing in the charBank + if(unpacked.charBank !== "") + return false; + + //check if oldLength == newLength + if(unpacked.oldLen !== unpacked.newLen) + return false; + + //lets iterator over the operators + var iterator = Changeset.opIterator(unpacked.ops); + + //get the first operator, this should be a clear operator + var clearOperator = iterator.next(); + + //check if there is only one operator + if(iterator.hasNext() === true) + return false; + + //check if this operator doesn't change text + if(clearOperator.opcode !== "=") + return false; + + //check that this operator applys to the complete text + //if the text ends with a new line, its exactly one character less, else it has the same length + if(clearOperator.chars !== unpacked.oldLen-1 && clearOperator.chars !== unpacked.oldLen) + return false; + + var attributes = []; + Changeset.eachAttribNumber(changeset, function(attrNum){ + attributes.push(attrNum); + }); + + //check that this changeset uses only one attribute + if(attributes.length !== 1) + return false; + + var appliedAttribute = this._pad.pool.getAttrib(attributes[0]); + + //check if the applied attribute is an anonymous author attribute + if(appliedAttribute[0] !== "author" || appliedAttribute[1] !== "") + return false; + + return true; +} + +padDiffHTML.prototype._createClearAuthorship = function(rev, callback){ + var self = this; + this._pad.getInternalRevisionAText(rev, function(err, atext){ + if(err){ + return callback(err); + } + + //build clearAuthorship changeset + var builder = Changeset.builder(atext.text.length); + builder.keepText(atext.text, [['author','']], self._pad.pool); + var changeset = builder.toString(); + + callback(null, changeset); + }); +} + +padDiffHTML.prototype._createClearStartAtext = function(rev, callback){ + var self = this; + + //get the atext of this revision + this._pad.getInternalRevisionAText(rev, function(err, atext){ + if(err){ + return callback(err); + } + + //create the clearAuthorship changeset + self._createClearAuthorship(rev, function(err, changeset){ + if(err){ + return callback(err); + } + + //apply the clearAuthorship changeset + var newAText = Changeset.applyToAText(changeset, atext, self._pad.pool); + + callback(null, newAText); + }); + }); +} + +padDiffHTML.prototype._getChangesetsInBulk = function(startRev, count, callback) { + var self = this; + + //find out which revisions we need + var revisions = []; + for(var i=startRev;i<(startRev+count) && i<=this._pad.head;i++){ + revisions.push(i); + } + + var changesets = [], authors = []; + + //get all needed revisions + async.forEach(revisions, function(rev, callback){ + self._pad.getRevision(rev, function(err, revision){ + if(err){ + return callback(err) + } + + var arrayNum = rev-startRev; + + changesets[arrayNum] = revision.changeset; + authors[arrayNum] = revision.meta.author; + + callback(); + }); + }, function(err){ + callback(err, changesets, authors); + }); +} + +padDiffHTML.prototype._addAuthors = function(authors) { + var self = this; + //add to array if not in the array + authors.forEach(function(author){ + if(self._authors.indexOf(author) == -1){ + self._authors.push(author); + } + }); +} + +padDiffHTML.prototype._createDiffAtext = function(callback) { + var self = this; + var bulkSize = 100; + + //get the cleaned startAText + self._createClearStartAtext(self._fromRev, function(err, atext){ + if(err) { return callback(err); } + + var superChangeset = null; + + var rev = self._fromRev + 1; + + //async while loop + async.whilst( + //loop condition + function () { return rev <= self._toRev; }, + + //loop body + function (callback) { + //get the bulk + self._getChangesetsInBulk(rev,bulkSize,function(err, changesets, authors){ + var addedAuthors = []; + + //run trough all changesets + for(var i=0;i= curChar) { + curLineNextOp.chars -= (curChar - indexIntoLine); + done = true; + } else { + indexIntoLine += curLineNextOp.chars; + } + } + } + + while (numChars > 0) { + if ((!curLineNextOp.chars) && (!curLineOpIter.hasNext())) { + curLine++; + curChar = 0; + curLineOpIterLine = curLine; + curLineNextOp.chars = 0; + curLineOpIter = Changeset.opIterator(alines_get(curLine)); + } + if (!curLineNextOp.chars) { + curLineOpIter.next(curLineNextOp); + } + var charsToUse = Math.min(numChars, curLineNextOp.chars); + func(charsToUse, curLineNextOp.attribs, charsToUse == curLineNextOp.chars && curLineNextOp.lines > 0); + numChars -= charsToUse; + curLineNextOp.chars -= charsToUse; + curChar += charsToUse; + } + + if ((!curLineNextOp.chars) && (!curLineOpIter.hasNext())) { + curLine++; + curChar = 0; + } + } + + function skip(N, L) { + if (L) { + curLine += L; + curChar = 0; + } else { + if (curLineOpIter && curLineOpIterLine == curLine) { + consumeAttribRuns(N, function () {}); + } else { + curChar += N; + } + } + } + + function nextText(numChars) { + var len = 0; + var assem = Changeset.stringAssembler(); + var firstString = lines_get(curLine).substring(curChar); + len += firstString.length; + assem.append(firstString); + + var lineNum = curLine + 1; + while (len < numChars) { + var nextString = lines_get(lineNum); + len += nextString.length; + assem.append(nextString); + lineNum++; + } + + return assem.toString().substring(0, numChars); + } + + function cachedStrFunc(func) { + var cache = {}; + return function (s) { + if (!cache[s]) { + cache[s] = func(s); + } + return cache[s]; + }; + } + + var attribKeys = []; + var attribValues = []; + + //iterate over all operators of this changeset + while (csIter.hasNext()) { + var csOp = csIter.next(); + + if (csOp.opcode == '=') { + var textBank = nextText(csOp.chars); + + // decide if this equal operator is an attribution change or not. We can see this by checkinf if attribs is set. + // If the text this operator applies to is only a star, than this is a false positive and should be ignored + if (csOp.attribs && textBank != "*") { + var deletedAttrib = apool.putAttrib(["removed", true]); + var authorAttrib = apool.putAttrib(["author", ""]);; + + attribKeys.length = 0; + attribValues.length = 0; + Changeset.eachAttribNumber(csOp.attribs, function (n) { + attribKeys.push(apool.getAttribKey(n)); + attribValues.push(apool.getAttribValue(n)); + + if(apool.getAttribKey(n) === "author"){ + authorAttrib = n; + }; + }); + + var undoBackToAttribs = cachedStrFunc(function (attribs) { + var backAttribs = []; + for (var i = 0; i < attribKeys.length; i++) { + var appliedKey = attribKeys[i]; + var appliedValue = attribValues[i]; + var oldValue = Changeset.attribsAttributeValue(attribs, appliedKey, apool); + if (appliedValue != oldValue) { + backAttribs.push([appliedKey, oldValue]); + } + } + return Changeset.makeAttribsString('=', backAttribs, apool); + }); + + var oldAttribsAddition = "*" + Changeset.numToString(deletedAttrib) + "*" + Changeset.numToString(authorAttrib); + + var textLeftToProcess = textBank; + + while(textLeftToProcess.length > 0){ + //process till the next line break or process only one line break + var lengthToProcess = textLeftToProcess.indexOf("\n"); + var lineBreak = false; + switch(lengthToProcess){ + case -1: + lengthToProcess=textLeftToProcess.length; + break; + case 0: + lineBreak = true; + lengthToProcess=1; + break; + } + + //get the text we want to procceed in this step + var processText = textLeftToProcess.substr(0, lengthToProcess); + textLeftToProcess = textLeftToProcess.substr(lengthToProcess); + + if(lineBreak){ + builder.keep(1, 1); //just skip linebreaks, don't do a insert + keep for a linebreak + + //consume the attributes of this linebreak + consumeAttribRuns(1, function(){}); + } else { + //add the old text via an insert, but add a deletion attribute + the author attribute of the author who deleted it + var textBankIndex = 0; + consumeAttribRuns(lengthToProcess, function (len, attribs, endsLine) { + //get the old attributes back + var attribs = (undoBackToAttribs(attribs) || "") + oldAttribsAddition; + + builder.insert(processText.substr(textBankIndex, len), attribs); + textBankIndex += len; + }); + + builder.keep(lengthToProcess, 0); + } + } + } else { + skip(csOp.chars, csOp.lines); + builder.keep(csOp.chars, csOp.lines); + } + } else if (csOp.opcode == '+') { + builder.keep(csOp.chars, csOp.lines); + } else if (csOp.opcode == '-') { + var textBank = nextText(csOp.chars); + var textBankIndex = 0; + + consumeAttribRuns(csOp.chars, function (len, attribs, endsLine) { + builder.insert(textBank.substr(textBankIndex, len), attribs + csOp.attribs); + textBankIndex += len; + }); + } + } + + return Changeset.checkRep(builder.toString()); +}; + +//export the constructor +module.exports = padDiffHTML; From dea892213e080b7637955413222384959b3b267f Mon Sep 17 00:00:00 2001 From: John McLear Date: Wed, 13 Feb 2013 18:41:04 +0000 Subject: [PATCH 12/13] Revert "allow non ascii chars in txt export" This reverts commit be56272e66f4921f68fe12cd05cbaad8eb54894b. --- src/node/utils/ExportHelper.js | 6 +- src/node/utils/ExportHtml.js | 7 +- src/node/utils/ExportTxt.js | 10 +- src/node/utils/padDiffHTML.js | 554 --------------------------------- 4 files changed, 15 insertions(+), 562 deletions(-) delete mode 100644 src/node/utils/padDiffHTML.js diff --git a/src/node/utils/ExportHelper.js b/src/node/utils/ExportHelper.js index 41d440f30..a939a8b6e 100644 --- a/src/node/utils/ExportHelper.js +++ b/src/node/utils/ExportHelper.js @@ -80,4 +80,8 @@ exports._analyzeLine = function(text, aline, apool){ } - +exports._encodeWhitespace = function(s){ + return s.replace(/[^\x21-\x7E\s\t\n\r]/g, function(c){ + return "&#" +c.charCodeAt(0) + ";" + }); +} diff --git a/src/node/utils/ExportHtml.js b/src/node/utils/ExportHtml.js index 51a4b2c3d..585694d4b 100644 --- a/src/node/utils/ExportHtml.js +++ b/src/node/utils/ExportHtml.js @@ -23,6 +23,7 @@ var Security = require('ep_etherpad-lite/static/js/security'); var hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks'); var getPadPlainText = require('./ExportHelper').getPadPlainText var _analyzeLine = require('./ExportHelper')._analyzeLine; +var _encodeWhitespace = require('./ExportHelper')._encodeWhitespace; function getPadHTML(pad, revNum, callback) { @@ -595,9 +596,3 @@ function _processSpaces(s){ return parts.join(''); } -function _encodeWhitespace(s){ - return s.replace(/[^\x21-\x7E\s\t\n\r]/g, function(c){ - return "&#" +c.charCodeAt(0) + ";" - }); -} - diff --git a/src/node/utils/ExportTxt.js b/src/node/utils/ExportTxt.js index 4a3e458b4..05847f162 100644 --- a/src/node/utils/ExportTxt.js +++ b/src/node/utils/ExportTxt.js @@ -26,6 +26,7 @@ var Security = require('ep_etherpad-lite/static/js/security'); var hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks'); var getPadPlainText = require('./ExportHelper').getPadPlainText; var _analyzeLine = require('./ExportHelper')._analyzeLine; +var _encodeWhitespace = require('./ExportHelper')._encodeWhitespace; // This is slightly different than the HTML method as it passes the output to getTXTFromAText function getPadTXT(pad, revNum, callback) @@ -111,6 +112,7 @@ function getTXTFromAtext(pad, atext, authorColors) var taker = Changeset.stringIterator(text); var assem = Changeset.stringAssembler(); var openTags = []; + var idx = 0; function processNextChars(numChars) @@ -223,7 +225,7 @@ function getTXTFromAtext(pad, atext, authorColors) // plugins from being able to display * at the beginning of a line // s = s.replace("*", ""); // Then remove it - assem.append(s); + assem.append(_encodeWhitespace(s)); } // end iteration over spans in line var tags2close = []; @@ -290,3 +292,9 @@ exports.getPadTXTDocument = function (padId, revNum, noDocType, callback) }); } +function _encodeWhitespace(s) { + return s.replace(/[^\x21-\x7E\s\t\n\r]/g, function(c) + { + return "&#" +c.charCodeAt(0) + ";" + }); +} diff --git a/src/node/utils/padDiffHTML.js b/src/node/utils/padDiffHTML.js deleted file mode 100644 index 00e967287..000000000 --- a/src/node/utils/padDiffHTML.js +++ /dev/null @@ -1,554 +0,0 @@ -var Changeset = require("../../static/js/Changeset"); -var async = require("async"); -var exportHtml = require('./ExportHtml'); - -function padDiffHTML (pad, fromRev, toRev){ - //check parameters - if(!pad || !pad.id || !pad.atext || !pad.pool) - { - throw new Error('Invalid pad'); - } - - var range = pad.getValidRevisionRange(fromRev, toRev); - if(!range) { throw new Error('Invalid revision range.' + - ' startRev: ' + fromRev + - ' endRev: ' + toRev); } - - this._pad = pad; - this._fromRev = range.startRev; - this._toRev = range.endRev; - this._html = null; - this._authors = []; -} - -padDiffHTML.prototype._isClearAuthorship = function(changeset){ - //unpack - var unpacked = Changeset.unpack(changeset); - - //check if there is nothing in the charBank - if(unpacked.charBank !== "") - return false; - - //check if oldLength == newLength - if(unpacked.oldLen !== unpacked.newLen) - return false; - - //lets iterator over the operators - var iterator = Changeset.opIterator(unpacked.ops); - - //get the first operator, this should be a clear operator - var clearOperator = iterator.next(); - - //check if there is only one operator - if(iterator.hasNext() === true) - return false; - - //check if this operator doesn't change text - if(clearOperator.opcode !== "=") - return false; - - //check that this operator applys to the complete text - //if the text ends with a new line, its exactly one character less, else it has the same length - if(clearOperator.chars !== unpacked.oldLen-1 && clearOperator.chars !== unpacked.oldLen) - return false; - - var attributes = []; - Changeset.eachAttribNumber(changeset, function(attrNum){ - attributes.push(attrNum); - }); - - //check that this changeset uses only one attribute - if(attributes.length !== 1) - return false; - - var appliedAttribute = this._pad.pool.getAttrib(attributes[0]); - - //check if the applied attribute is an anonymous author attribute - if(appliedAttribute[0] !== "author" || appliedAttribute[1] !== "") - return false; - - return true; -} - -padDiffHTML.prototype._createClearAuthorship = function(rev, callback){ - var self = this; - this._pad.getInternalRevisionAText(rev, function(err, atext){ - if(err){ - return callback(err); - } - - //build clearAuthorship changeset - var builder = Changeset.builder(atext.text.length); - builder.keepText(atext.text, [['author','']], self._pad.pool); - var changeset = builder.toString(); - - callback(null, changeset); - }); -} - -padDiffHTML.prototype._createClearStartAtext = function(rev, callback){ - var self = this; - - //get the atext of this revision - this._pad.getInternalRevisionAText(rev, function(err, atext){ - if(err){ - return callback(err); - } - - //create the clearAuthorship changeset - self._createClearAuthorship(rev, function(err, changeset){ - if(err){ - return callback(err); - } - - //apply the clearAuthorship changeset - var newAText = Changeset.applyToAText(changeset, atext, self._pad.pool); - - callback(null, newAText); - }); - }); -} - -padDiffHTML.prototype._getChangesetsInBulk = function(startRev, count, callback) { - var self = this; - - //find out which revisions we need - var revisions = []; - for(var i=startRev;i<(startRev+count) && i<=this._pad.head;i++){ - revisions.push(i); - } - - var changesets = [], authors = []; - - //get all needed revisions - async.forEach(revisions, function(rev, callback){ - self._pad.getRevision(rev, function(err, revision){ - if(err){ - return callback(err) - } - - var arrayNum = rev-startRev; - - changesets[arrayNum] = revision.changeset; - authors[arrayNum] = revision.meta.author; - - callback(); - }); - }, function(err){ - callback(err, changesets, authors); - }); -} - -padDiffHTML.prototype._addAuthors = function(authors) { - var self = this; - //add to array if not in the array - authors.forEach(function(author){ - if(self._authors.indexOf(author) == -1){ - self._authors.push(author); - } - }); -} - -padDiffHTML.prototype._createDiffAtext = function(callback) { - var self = this; - var bulkSize = 100; - - //get the cleaned startAText - self._createClearStartAtext(self._fromRev, function(err, atext){ - if(err) { return callback(err); } - - var superChangeset = null; - - var rev = self._fromRev + 1; - - //async while loop - async.whilst( - //loop condition - function () { return rev <= self._toRev; }, - - //loop body - function (callback) { - //get the bulk - self._getChangesetsInBulk(rev,bulkSize,function(err, changesets, authors){ - var addedAuthors = []; - - //run trough all changesets - for(var i=0;i= curChar) { - curLineNextOp.chars -= (curChar - indexIntoLine); - done = true; - } else { - indexIntoLine += curLineNextOp.chars; - } - } - } - - while (numChars > 0) { - if ((!curLineNextOp.chars) && (!curLineOpIter.hasNext())) { - curLine++; - curChar = 0; - curLineOpIterLine = curLine; - curLineNextOp.chars = 0; - curLineOpIter = Changeset.opIterator(alines_get(curLine)); - } - if (!curLineNextOp.chars) { - curLineOpIter.next(curLineNextOp); - } - var charsToUse = Math.min(numChars, curLineNextOp.chars); - func(charsToUse, curLineNextOp.attribs, charsToUse == curLineNextOp.chars && curLineNextOp.lines > 0); - numChars -= charsToUse; - curLineNextOp.chars -= charsToUse; - curChar += charsToUse; - } - - if ((!curLineNextOp.chars) && (!curLineOpIter.hasNext())) { - curLine++; - curChar = 0; - } - } - - function skip(N, L) { - if (L) { - curLine += L; - curChar = 0; - } else { - if (curLineOpIter && curLineOpIterLine == curLine) { - consumeAttribRuns(N, function () {}); - } else { - curChar += N; - } - } - } - - function nextText(numChars) { - var len = 0; - var assem = Changeset.stringAssembler(); - var firstString = lines_get(curLine).substring(curChar); - len += firstString.length; - assem.append(firstString); - - var lineNum = curLine + 1; - while (len < numChars) { - var nextString = lines_get(lineNum); - len += nextString.length; - assem.append(nextString); - lineNum++; - } - - return assem.toString().substring(0, numChars); - } - - function cachedStrFunc(func) { - var cache = {}; - return function (s) { - if (!cache[s]) { - cache[s] = func(s); - } - return cache[s]; - }; - } - - var attribKeys = []; - var attribValues = []; - - //iterate over all operators of this changeset - while (csIter.hasNext()) { - var csOp = csIter.next(); - - if (csOp.opcode == '=') { - var textBank = nextText(csOp.chars); - - // decide if this equal operator is an attribution change or not. We can see this by checkinf if attribs is set. - // If the text this operator applies to is only a star, than this is a false positive and should be ignored - if (csOp.attribs && textBank != "*") { - var deletedAttrib = apool.putAttrib(["removed", true]); - var authorAttrib = apool.putAttrib(["author", ""]);; - - attribKeys.length = 0; - attribValues.length = 0; - Changeset.eachAttribNumber(csOp.attribs, function (n) { - attribKeys.push(apool.getAttribKey(n)); - attribValues.push(apool.getAttribValue(n)); - - if(apool.getAttribKey(n) === "author"){ - authorAttrib = n; - }; - }); - - var undoBackToAttribs = cachedStrFunc(function (attribs) { - var backAttribs = []; - for (var i = 0; i < attribKeys.length; i++) { - var appliedKey = attribKeys[i]; - var appliedValue = attribValues[i]; - var oldValue = Changeset.attribsAttributeValue(attribs, appliedKey, apool); - if (appliedValue != oldValue) { - backAttribs.push([appliedKey, oldValue]); - } - } - return Changeset.makeAttribsString('=', backAttribs, apool); - }); - - var oldAttribsAddition = "*" + Changeset.numToString(deletedAttrib) + "*" + Changeset.numToString(authorAttrib); - - var textLeftToProcess = textBank; - - while(textLeftToProcess.length > 0){ - //process till the next line break or process only one line break - var lengthToProcess = textLeftToProcess.indexOf("\n"); - var lineBreak = false; - switch(lengthToProcess){ - case -1: - lengthToProcess=textLeftToProcess.length; - break; - case 0: - lineBreak = true; - lengthToProcess=1; - break; - } - - //get the text we want to procceed in this step - var processText = textLeftToProcess.substr(0, lengthToProcess); - textLeftToProcess = textLeftToProcess.substr(lengthToProcess); - - if(lineBreak){ - builder.keep(1, 1); //just skip linebreaks, don't do a insert + keep for a linebreak - - //consume the attributes of this linebreak - consumeAttribRuns(1, function(){}); - } else { - //add the old text via an insert, but add a deletion attribute + the author attribute of the author who deleted it - var textBankIndex = 0; - consumeAttribRuns(lengthToProcess, function (len, attribs, endsLine) { - //get the old attributes back - var attribs = (undoBackToAttribs(attribs) || "") + oldAttribsAddition; - - builder.insert(processText.substr(textBankIndex, len), attribs); - textBankIndex += len; - }); - - builder.keep(lengthToProcess, 0); - } - } - } else { - skip(csOp.chars, csOp.lines); - builder.keep(csOp.chars, csOp.lines); - } - } else if (csOp.opcode == '+') { - builder.keep(csOp.chars, csOp.lines); - } else if (csOp.opcode == '-') { - var textBank = nextText(csOp.chars); - var textBankIndex = 0; - - consumeAttribRuns(csOp.chars, function (len, attribs, endsLine) { - builder.insert(textBank.substr(textBankIndex, len), attribs + csOp.attribs); - textBankIndex += len; - }); - } - } - - return Changeset.checkRep(builder.toString()); -}; - -//export the constructor -module.exports = padDiffHTML; From aefd8d8d0dd33a93a4a930816555332f66156c47 Mon Sep 17 00:00:00 2001 From: John McLear Date: Wed, 13 Feb 2013 18:45:45 +0000 Subject: [PATCH 13/13] nice chars n no cruft --- src/node/utils/ExportTxt.js | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/node/utils/ExportTxt.js b/src/node/utils/ExportTxt.js index 05847f162..c57424f1d 100644 --- a/src/node/utils/ExportTxt.js +++ b/src/node/utils/ExportTxt.js @@ -26,7 +26,6 @@ var Security = require('ep_etherpad-lite/static/js/security'); var hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks'); var getPadPlainText = require('./ExportHelper').getPadPlainText; var _analyzeLine = require('./ExportHelper')._analyzeLine; -var _encodeWhitespace = require('./ExportHelper')._encodeWhitespace; // This is slightly different than the HTML method as it passes the output to getTXTFromAText function getPadTXT(pad, revNum, callback) @@ -225,7 +224,7 @@ function getTXTFromAtext(pad, atext, authorColors) // plugins from being able to display * at the beginning of a line // s = s.replace("*", ""); // Then remove it - assem.append(_encodeWhitespace(s)); + assem.append(s); } // end iteration over spans in line var tags2close = []; @@ -292,9 +291,3 @@ exports.getPadTXTDocument = function (padId, revNum, noDocType, callback) }); } -function _encodeWhitespace(s) { - return s.replace(/[^\x21-\x7E\s\t\n\r]/g, function(c) - { - return "&#" +c.charCodeAt(0) + ";" - }); -}