From dea892213e080b7637955413222384959b3b267f Mon Sep 17 00:00:00 2001 From: John McLear Date: Wed, 13 Feb 2013 18:41:04 +0000 Subject: [PATCH] Revert "allow non ascii chars in txt export" This reverts commit be56272e66f4921f68fe12cd05cbaad8eb54894b. --- src/node/utils/ExportHelper.js | 6 +- src/node/utils/ExportHtml.js | 7 +- src/node/utils/ExportTxt.js | 10 +- src/node/utils/padDiffHTML.js | 554 --------------------------------- 4 files changed, 15 insertions(+), 562 deletions(-) delete mode 100644 src/node/utils/padDiffHTML.js diff --git a/src/node/utils/ExportHelper.js b/src/node/utils/ExportHelper.js index 41d440f30..a939a8b6e 100644 --- a/src/node/utils/ExportHelper.js +++ b/src/node/utils/ExportHelper.js @@ -80,4 +80,8 @@ exports._analyzeLine = function(text, aline, apool){ } - +exports._encodeWhitespace = function(s){ + return s.replace(/[^\x21-\x7E\s\t\n\r]/g, function(c){ + return "&#" +c.charCodeAt(0) + ";" + }); +} diff --git a/src/node/utils/ExportHtml.js b/src/node/utils/ExportHtml.js index 51a4b2c3d..585694d4b 100644 --- a/src/node/utils/ExportHtml.js +++ b/src/node/utils/ExportHtml.js @@ -23,6 +23,7 @@ var Security = require('ep_etherpad-lite/static/js/security'); var hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks'); var getPadPlainText = require('./ExportHelper').getPadPlainText var _analyzeLine = require('./ExportHelper')._analyzeLine; +var _encodeWhitespace = require('./ExportHelper')._encodeWhitespace; function getPadHTML(pad, revNum, callback) { @@ -595,9 +596,3 @@ function _processSpaces(s){ return parts.join(''); } -function _encodeWhitespace(s){ - return s.replace(/[^\x21-\x7E\s\t\n\r]/g, function(c){ - return "&#" +c.charCodeAt(0) + ";" - }); -} - diff --git a/src/node/utils/ExportTxt.js b/src/node/utils/ExportTxt.js index 4a3e458b4..05847f162 100644 --- a/src/node/utils/ExportTxt.js +++ b/src/node/utils/ExportTxt.js @@ -26,6 +26,7 @@ var Security = require('ep_etherpad-lite/static/js/security'); var hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks'); var getPadPlainText = require('./ExportHelper').getPadPlainText; var _analyzeLine = require('./ExportHelper')._analyzeLine; +var _encodeWhitespace = require('./ExportHelper')._encodeWhitespace; // This is slightly different than the HTML method as it passes the output to getTXTFromAText function getPadTXT(pad, revNum, callback) @@ -111,6 +112,7 @@ function getTXTFromAtext(pad, atext, authorColors) var taker = Changeset.stringIterator(text); var assem = Changeset.stringAssembler(); var openTags = []; + var idx = 0; function processNextChars(numChars) @@ -223,7 +225,7 @@ function getTXTFromAtext(pad, atext, authorColors) // plugins from being able to display * at the beginning of a line // s = s.replace("*", ""); // Then remove it - assem.append(s); + assem.append(_encodeWhitespace(s)); } // end iteration over spans in line var tags2close = []; @@ -290,3 +292,9 @@ exports.getPadTXTDocument = function (padId, revNum, noDocType, callback) }); } +function _encodeWhitespace(s) { + return s.replace(/[^\x21-\x7E\s\t\n\r]/g, function(c) + { + return "&#" +c.charCodeAt(0) + ";" + }); +} diff --git a/src/node/utils/padDiffHTML.js b/src/node/utils/padDiffHTML.js deleted file mode 100644 index 00e967287..000000000 --- a/src/node/utils/padDiffHTML.js +++ /dev/null @@ -1,554 +0,0 @@ -var Changeset = require("../../static/js/Changeset"); -var async = require("async"); -var exportHtml = require('./ExportHtml'); - -function padDiffHTML (pad, fromRev, toRev){ - //check parameters - if(!pad || !pad.id || !pad.atext || !pad.pool) - { - throw new Error('Invalid pad'); - } - - var range = pad.getValidRevisionRange(fromRev, toRev); - if(!range) { throw new Error('Invalid revision range.' + - ' startRev: ' + fromRev + - ' endRev: ' + toRev); } - - this._pad = pad; - this._fromRev = range.startRev; - this._toRev = range.endRev; - this._html = null; - this._authors = []; -} - -padDiffHTML.prototype._isClearAuthorship = function(changeset){ - //unpack - var unpacked = Changeset.unpack(changeset); - - //check if there is nothing in the charBank - if(unpacked.charBank !== "") - return false; - - //check if oldLength == newLength - if(unpacked.oldLen !== unpacked.newLen) - return false; - - //lets iterator over the operators - var iterator = Changeset.opIterator(unpacked.ops); - - //get the first operator, this should be a clear operator - var clearOperator = iterator.next(); - - //check if there is only one operator - if(iterator.hasNext() === true) - return false; - - //check if this operator doesn't change text - if(clearOperator.opcode !== "=") - return false; - - //check that this operator applys to the complete text - //if the text ends with a new line, its exactly one character less, else it has the same length - if(clearOperator.chars !== unpacked.oldLen-1 && clearOperator.chars !== unpacked.oldLen) - return false; - - var attributes = []; - Changeset.eachAttribNumber(changeset, function(attrNum){ - attributes.push(attrNum); - }); - - //check that this changeset uses only one attribute - if(attributes.length !== 1) - return false; - - var appliedAttribute = this._pad.pool.getAttrib(attributes[0]); - - //check if the applied attribute is an anonymous author attribute - if(appliedAttribute[0] !== "author" || appliedAttribute[1] !== "") - return false; - - return true; -} - -padDiffHTML.prototype._createClearAuthorship = function(rev, callback){ - var self = this; - this._pad.getInternalRevisionAText(rev, function(err, atext){ - if(err){ - return callback(err); - } - - //build clearAuthorship changeset - var builder = Changeset.builder(atext.text.length); - builder.keepText(atext.text, [['author','']], self._pad.pool); - var changeset = builder.toString(); - - callback(null, changeset); - }); -} - -padDiffHTML.prototype._createClearStartAtext = function(rev, callback){ - var self = this; - - //get the atext of this revision - this._pad.getInternalRevisionAText(rev, function(err, atext){ - if(err){ - return callback(err); - } - - //create the clearAuthorship changeset - self._createClearAuthorship(rev, function(err, changeset){ - if(err){ - return callback(err); - } - - //apply the clearAuthorship changeset - var newAText = Changeset.applyToAText(changeset, atext, self._pad.pool); - - callback(null, newAText); - }); - }); -} - -padDiffHTML.prototype._getChangesetsInBulk = function(startRev, count, callback) { - var self = this; - - //find out which revisions we need - var revisions = []; - for(var i=startRev;i<(startRev+count) && i<=this._pad.head;i++){ - revisions.push(i); - } - - var changesets = [], authors = []; - - //get all needed revisions - async.forEach(revisions, function(rev, callback){ - self._pad.getRevision(rev, function(err, revision){ - if(err){ - return callback(err) - } - - var arrayNum = rev-startRev; - - changesets[arrayNum] = revision.changeset; - authors[arrayNum] = revision.meta.author; - - callback(); - }); - }, function(err){ - callback(err, changesets, authors); - }); -} - -padDiffHTML.prototype._addAuthors = function(authors) { - var self = this; - //add to array if not in the array - authors.forEach(function(author){ - if(self._authors.indexOf(author) == -1){ - self._authors.push(author); - } - }); -} - -padDiffHTML.prototype._createDiffAtext = function(callback) { - var self = this; - var bulkSize = 100; - - //get the cleaned startAText - self._createClearStartAtext(self._fromRev, function(err, atext){ - if(err) { return callback(err); } - - var superChangeset = null; - - var rev = self._fromRev + 1; - - //async while loop - async.whilst( - //loop condition - function () { return rev <= self._toRev; }, - - //loop body - function (callback) { - //get the bulk - self._getChangesetsInBulk(rev,bulkSize,function(err, changesets, authors){ - var addedAuthors = []; - - //run trough all changesets - for(var i=0;i= curChar) { - curLineNextOp.chars -= (curChar - indexIntoLine); - done = true; - } else { - indexIntoLine += curLineNextOp.chars; - } - } - } - - while (numChars > 0) { - if ((!curLineNextOp.chars) && (!curLineOpIter.hasNext())) { - curLine++; - curChar = 0; - curLineOpIterLine = curLine; - curLineNextOp.chars = 0; - curLineOpIter = Changeset.opIterator(alines_get(curLine)); - } - if (!curLineNextOp.chars) { - curLineOpIter.next(curLineNextOp); - } - var charsToUse = Math.min(numChars, curLineNextOp.chars); - func(charsToUse, curLineNextOp.attribs, charsToUse == curLineNextOp.chars && curLineNextOp.lines > 0); - numChars -= charsToUse; - curLineNextOp.chars -= charsToUse; - curChar += charsToUse; - } - - if ((!curLineNextOp.chars) && (!curLineOpIter.hasNext())) { - curLine++; - curChar = 0; - } - } - - function skip(N, L) { - if (L) { - curLine += L; - curChar = 0; - } else { - if (curLineOpIter && curLineOpIterLine == curLine) { - consumeAttribRuns(N, function () {}); - } else { - curChar += N; - } - } - } - - function nextText(numChars) { - var len = 0; - var assem = Changeset.stringAssembler(); - var firstString = lines_get(curLine).substring(curChar); - len += firstString.length; - assem.append(firstString); - - var lineNum = curLine + 1; - while (len < numChars) { - var nextString = lines_get(lineNum); - len += nextString.length; - assem.append(nextString); - lineNum++; - } - - return assem.toString().substring(0, numChars); - } - - function cachedStrFunc(func) { - var cache = {}; - return function (s) { - if (!cache[s]) { - cache[s] = func(s); - } - return cache[s]; - }; - } - - var attribKeys = []; - var attribValues = []; - - //iterate over all operators of this changeset - while (csIter.hasNext()) { - var csOp = csIter.next(); - - if (csOp.opcode == '=') { - var textBank = nextText(csOp.chars); - - // decide if this equal operator is an attribution change or not. We can see this by checkinf if attribs is set. - // If the text this operator applies to is only a star, than this is a false positive and should be ignored - if (csOp.attribs && textBank != "*") { - var deletedAttrib = apool.putAttrib(["removed", true]); - var authorAttrib = apool.putAttrib(["author", ""]);; - - attribKeys.length = 0; - attribValues.length = 0; - Changeset.eachAttribNumber(csOp.attribs, function (n) { - attribKeys.push(apool.getAttribKey(n)); - attribValues.push(apool.getAttribValue(n)); - - if(apool.getAttribKey(n) === "author"){ - authorAttrib = n; - }; - }); - - var undoBackToAttribs = cachedStrFunc(function (attribs) { - var backAttribs = []; - for (var i = 0; i < attribKeys.length; i++) { - var appliedKey = attribKeys[i]; - var appliedValue = attribValues[i]; - var oldValue = Changeset.attribsAttributeValue(attribs, appliedKey, apool); - if (appliedValue != oldValue) { - backAttribs.push([appliedKey, oldValue]); - } - } - return Changeset.makeAttribsString('=', backAttribs, apool); - }); - - var oldAttribsAddition = "*" + Changeset.numToString(deletedAttrib) + "*" + Changeset.numToString(authorAttrib); - - var textLeftToProcess = textBank; - - while(textLeftToProcess.length > 0){ - //process till the next line break or process only one line break - var lengthToProcess = textLeftToProcess.indexOf("\n"); - var lineBreak = false; - switch(lengthToProcess){ - case -1: - lengthToProcess=textLeftToProcess.length; - break; - case 0: - lineBreak = true; - lengthToProcess=1; - break; - } - - //get the text we want to procceed in this step - var processText = textLeftToProcess.substr(0, lengthToProcess); - textLeftToProcess = textLeftToProcess.substr(lengthToProcess); - - if(lineBreak){ - builder.keep(1, 1); //just skip linebreaks, don't do a insert + keep for a linebreak - - //consume the attributes of this linebreak - consumeAttribRuns(1, function(){}); - } else { - //add the old text via an insert, but add a deletion attribute + the author attribute of the author who deleted it - var textBankIndex = 0; - consumeAttribRuns(lengthToProcess, function (len, attribs, endsLine) { - //get the old attributes back - var attribs = (undoBackToAttribs(attribs) || "") + oldAttribsAddition; - - builder.insert(processText.substr(textBankIndex, len), attribs); - textBankIndex += len; - }); - - builder.keep(lengthToProcess, 0); - } - } - } else { - skip(csOp.chars, csOp.lines); - builder.keep(csOp.chars, csOp.lines); - } - } else if (csOp.opcode == '+') { - builder.keep(csOp.chars, csOp.lines); - } else if (csOp.opcode == '-') { - var textBank = nextText(csOp.chars); - var textBankIndex = 0; - - consumeAttribRuns(csOp.chars, function (len, attribs, endsLine) { - builder.insert(textBank.substr(textBankIndex, len), attribs + csOp.attribs); - textBankIndex += len; - }); - } - } - - return Changeset.checkRep(builder.toString()); -}; - -//export the constructor -module.exports = padDiffHTML;