diff --git a/src/node/utils/ExportHelper.js b/src/node/utils/ExportHelper.js index a939a8b6e..41d440f30 100644 --- a/src/node/utils/ExportHelper.js +++ b/src/node/utils/ExportHelper.js @@ -80,8 +80,4 @@ exports._analyzeLine = function(text, aline, apool){ } -exports._encodeWhitespace = function(s){ - return s.replace(/[^\x21-\x7E\s\t\n\r]/g, function(c){ - return "&#" +c.charCodeAt(0) + ";" - }); -} + diff --git a/src/node/utils/ExportHtml.js b/src/node/utils/ExportHtml.js index 585694d4b..51a4b2c3d 100644 --- a/src/node/utils/ExportHtml.js +++ b/src/node/utils/ExportHtml.js @@ -23,7 +23,6 @@ var Security = require('ep_etherpad-lite/static/js/security'); var hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks'); var getPadPlainText = require('./ExportHelper').getPadPlainText var _analyzeLine = require('./ExportHelper')._analyzeLine; -var _encodeWhitespace = require('./ExportHelper')._encodeWhitespace; function getPadHTML(pad, revNum, callback) { @@ -596,3 +595,9 @@ function _processSpaces(s){ return parts.join(''); } +function _encodeWhitespace(s){ + return s.replace(/[^\x21-\x7E\s\t\n\r]/g, function(c){ + return "&#" +c.charCodeAt(0) + ";" + }); +} + diff --git a/src/node/utils/ExportTxt.js b/src/node/utils/ExportTxt.js index 05847f162..4a3e458b4 100644 --- a/src/node/utils/ExportTxt.js +++ b/src/node/utils/ExportTxt.js @@ -26,7 +26,6 @@ var Security = require('ep_etherpad-lite/static/js/security'); var hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks'); var getPadPlainText = require('./ExportHelper').getPadPlainText; var _analyzeLine = require('./ExportHelper')._analyzeLine; -var _encodeWhitespace = require('./ExportHelper')._encodeWhitespace; // This is slightly different than the HTML method as it passes the output to getTXTFromAText function getPadTXT(pad, revNum, callback) @@ -112,7 +111,6 @@ function getTXTFromAtext(pad, atext, authorColors) var taker = Changeset.stringIterator(text); var assem = Changeset.stringAssembler(); var openTags = []; - var idx = 0; function processNextChars(numChars) @@ -225,7 +223,7 @@ function getTXTFromAtext(pad, atext, authorColors) // plugins from being able to display * at the beginning of a line // s = s.replace("*", ""); // Then remove it - assem.append(_encodeWhitespace(s)); + assem.append(s); } // end iteration over spans in line var tags2close = []; @@ -292,9 +290,3 @@ exports.getPadTXTDocument = function (padId, revNum, noDocType, callback) }); } -function _encodeWhitespace(s) { - return s.replace(/[^\x21-\x7E\s\t\n\r]/g, function(c) - { - return "&#" +c.charCodeAt(0) + ";" - }); -} diff --git a/src/node/utils/padDiffHTML.js b/src/node/utils/padDiffHTML.js new file mode 100644 index 000000000..00e967287 --- /dev/null +++ b/src/node/utils/padDiffHTML.js @@ -0,0 +1,554 @@ +var Changeset = require("../../static/js/Changeset"); +var async = require("async"); +var exportHtml = require('./ExportHtml'); + +function padDiffHTML (pad, fromRev, toRev){ + //check parameters + if(!pad || !pad.id || !pad.atext || !pad.pool) + { + throw new Error('Invalid pad'); + } + + var range = pad.getValidRevisionRange(fromRev, toRev); + if(!range) { throw new Error('Invalid revision range.' + + ' startRev: ' + fromRev + + ' endRev: ' + toRev); } + + this._pad = pad; + this._fromRev = range.startRev; + this._toRev = range.endRev; + this._html = null; + this._authors = []; +} + +padDiffHTML.prototype._isClearAuthorship = function(changeset){ + //unpack + var unpacked = Changeset.unpack(changeset); + + //check if there is nothing in the charBank + if(unpacked.charBank !== "") + return false; + + //check if oldLength == newLength + if(unpacked.oldLen !== unpacked.newLen) + return false; + + //lets iterator over the operators + var iterator = Changeset.opIterator(unpacked.ops); + + //get the first operator, this should be a clear operator + var clearOperator = iterator.next(); + + //check if there is only one operator + if(iterator.hasNext() === true) + return false; + + //check if this operator doesn't change text + if(clearOperator.opcode !== "=") + return false; + + //check that this operator applys to the complete text + //if the text ends with a new line, its exactly one character less, else it has the same length + if(clearOperator.chars !== unpacked.oldLen-1 && clearOperator.chars !== unpacked.oldLen) + return false; + + var attributes = []; + Changeset.eachAttribNumber(changeset, function(attrNum){ + attributes.push(attrNum); + }); + + //check that this changeset uses only one attribute + if(attributes.length !== 1) + return false; + + var appliedAttribute = this._pad.pool.getAttrib(attributes[0]); + + //check if the applied attribute is an anonymous author attribute + if(appliedAttribute[0] !== "author" || appliedAttribute[1] !== "") + return false; + + return true; +} + +padDiffHTML.prototype._createClearAuthorship = function(rev, callback){ + var self = this; + this._pad.getInternalRevisionAText(rev, function(err, atext){ + if(err){ + return callback(err); + } + + //build clearAuthorship changeset + var builder = Changeset.builder(atext.text.length); + builder.keepText(atext.text, [['author','']], self._pad.pool); + var changeset = builder.toString(); + + callback(null, changeset); + }); +} + +padDiffHTML.prototype._createClearStartAtext = function(rev, callback){ + var self = this; + + //get the atext of this revision + this._pad.getInternalRevisionAText(rev, function(err, atext){ + if(err){ + return callback(err); + } + + //create the clearAuthorship changeset + self._createClearAuthorship(rev, function(err, changeset){ + if(err){ + return callback(err); + } + + //apply the clearAuthorship changeset + var newAText = Changeset.applyToAText(changeset, atext, self._pad.pool); + + callback(null, newAText); + }); + }); +} + +padDiffHTML.prototype._getChangesetsInBulk = function(startRev, count, callback) { + var self = this; + + //find out which revisions we need + var revisions = []; + for(var i=startRev;i<(startRev+count) && i<=this._pad.head;i++){ + revisions.push(i); + } + + var changesets = [], authors = []; + + //get all needed revisions + async.forEach(revisions, function(rev, callback){ + self._pad.getRevision(rev, function(err, revision){ + if(err){ + return callback(err) + } + + var arrayNum = rev-startRev; + + changesets[arrayNum] = revision.changeset; + authors[arrayNum] = revision.meta.author; + + callback(); + }); + }, function(err){ + callback(err, changesets, authors); + }); +} + +padDiffHTML.prototype._addAuthors = function(authors) { + var self = this; + //add to array if not in the array + authors.forEach(function(author){ + if(self._authors.indexOf(author) == -1){ + self._authors.push(author); + } + }); +} + +padDiffHTML.prototype._createDiffAtext = function(callback) { + var self = this; + var bulkSize = 100; + + //get the cleaned startAText + self._createClearStartAtext(self._fromRev, function(err, atext){ + if(err) { return callback(err); } + + var superChangeset = null; + + var rev = self._fromRev + 1; + + //async while loop + async.whilst( + //loop condition + function () { return rev <= self._toRev; }, + + //loop body + function (callback) { + //get the bulk + self._getChangesetsInBulk(rev,bulkSize,function(err, changesets, authors){ + var addedAuthors = []; + + //run trough all changesets + for(var i=0;i= curChar) { + curLineNextOp.chars -= (curChar - indexIntoLine); + done = true; + } else { + indexIntoLine += curLineNextOp.chars; + } + } + } + + while (numChars > 0) { + if ((!curLineNextOp.chars) && (!curLineOpIter.hasNext())) { + curLine++; + curChar = 0; + curLineOpIterLine = curLine; + curLineNextOp.chars = 0; + curLineOpIter = Changeset.opIterator(alines_get(curLine)); + } + if (!curLineNextOp.chars) { + curLineOpIter.next(curLineNextOp); + } + var charsToUse = Math.min(numChars, curLineNextOp.chars); + func(charsToUse, curLineNextOp.attribs, charsToUse == curLineNextOp.chars && curLineNextOp.lines > 0); + numChars -= charsToUse; + curLineNextOp.chars -= charsToUse; + curChar += charsToUse; + } + + if ((!curLineNextOp.chars) && (!curLineOpIter.hasNext())) { + curLine++; + curChar = 0; + } + } + + function skip(N, L) { + if (L) { + curLine += L; + curChar = 0; + } else { + if (curLineOpIter && curLineOpIterLine == curLine) { + consumeAttribRuns(N, function () {}); + } else { + curChar += N; + } + } + } + + function nextText(numChars) { + var len = 0; + var assem = Changeset.stringAssembler(); + var firstString = lines_get(curLine).substring(curChar); + len += firstString.length; + assem.append(firstString); + + var lineNum = curLine + 1; + while (len < numChars) { + var nextString = lines_get(lineNum); + len += nextString.length; + assem.append(nextString); + lineNum++; + } + + return assem.toString().substring(0, numChars); + } + + function cachedStrFunc(func) { + var cache = {}; + return function (s) { + if (!cache[s]) { + cache[s] = func(s); + } + return cache[s]; + }; + } + + var attribKeys = []; + var attribValues = []; + + //iterate over all operators of this changeset + while (csIter.hasNext()) { + var csOp = csIter.next(); + + if (csOp.opcode == '=') { + var textBank = nextText(csOp.chars); + + // decide if this equal operator is an attribution change or not. We can see this by checkinf if attribs is set. + // If the text this operator applies to is only a star, than this is a false positive and should be ignored + if (csOp.attribs && textBank != "*") { + var deletedAttrib = apool.putAttrib(["removed", true]); + var authorAttrib = apool.putAttrib(["author", ""]);; + + attribKeys.length = 0; + attribValues.length = 0; + Changeset.eachAttribNumber(csOp.attribs, function (n) { + attribKeys.push(apool.getAttribKey(n)); + attribValues.push(apool.getAttribValue(n)); + + if(apool.getAttribKey(n) === "author"){ + authorAttrib = n; + }; + }); + + var undoBackToAttribs = cachedStrFunc(function (attribs) { + var backAttribs = []; + for (var i = 0; i < attribKeys.length; i++) { + var appliedKey = attribKeys[i]; + var appliedValue = attribValues[i]; + var oldValue = Changeset.attribsAttributeValue(attribs, appliedKey, apool); + if (appliedValue != oldValue) { + backAttribs.push([appliedKey, oldValue]); + } + } + return Changeset.makeAttribsString('=', backAttribs, apool); + }); + + var oldAttribsAddition = "*" + Changeset.numToString(deletedAttrib) + "*" + Changeset.numToString(authorAttrib); + + var textLeftToProcess = textBank; + + while(textLeftToProcess.length > 0){ + //process till the next line break or process only one line break + var lengthToProcess = textLeftToProcess.indexOf("\n"); + var lineBreak = false; + switch(lengthToProcess){ + case -1: + lengthToProcess=textLeftToProcess.length; + break; + case 0: + lineBreak = true; + lengthToProcess=1; + break; + } + + //get the text we want to procceed in this step + var processText = textLeftToProcess.substr(0, lengthToProcess); + textLeftToProcess = textLeftToProcess.substr(lengthToProcess); + + if(lineBreak){ + builder.keep(1, 1); //just skip linebreaks, don't do a insert + keep for a linebreak + + //consume the attributes of this linebreak + consumeAttribRuns(1, function(){}); + } else { + //add the old text via an insert, but add a deletion attribute + the author attribute of the author who deleted it + var textBankIndex = 0; + consumeAttribRuns(lengthToProcess, function (len, attribs, endsLine) { + //get the old attributes back + var attribs = (undoBackToAttribs(attribs) || "") + oldAttribsAddition; + + builder.insert(processText.substr(textBankIndex, len), attribs); + textBankIndex += len; + }); + + builder.keep(lengthToProcess, 0); + } + } + } else { + skip(csOp.chars, csOp.lines); + builder.keep(csOp.chars, csOp.lines); + } + } else if (csOp.opcode == '+') { + builder.keep(csOp.chars, csOp.lines); + } else if (csOp.opcode == '-') { + var textBank = nextText(csOp.chars); + var textBankIndex = 0; + + consumeAttribRuns(csOp.chars, function (len, attribs, endsLine) { + builder.insert(textBank.substr(textBankIndex, len), attribs + csOp.attribs); + textBankIndex += len; + }); + } + } + + return Changeset.checkRep(builder.toString()); +}; + +//export the constructor +module.exports = padDiffHTML;