etherpad-lite/src/node/utils/padDiff.js

477 lines
14 KiB
JavaScript
Raw Normal View History

2021-01-21 21:06:52 +00:00
'use strict';
2020-11-23 13:24:19 -05:00
const Changeset = require('../../static/js/Changeset');
const exportHtml = require('./ExportHtml');
2020-11-23 13:24:19 -05:00
function PadDiff(pad, fromRev, toRev) {
// check parameters
if (!pad || !pad.id || !pad.atext || !pad.pool) {
2013-01-22 23:16:49 +00:00
throw new Error('Invalid pad');
}
2020-11-23 13:24:19 -05:00
const range = pad.getValidRevisionRange(fromRev, toRev);
2021-10-01 01:22:54 -04:00
if (!range) throw new Error(`Invalid revision range. startRev: ${fromRev} endRev: ${toRev}`);
2013-01-22 23:16:49 +00:00
this._pad = pad;
this._fromRev = range.startRev;
this._toRev = range.endRev;
this._html = null;
this._authors = [];
}
2020-11-23 13:24:19 -05:00
PadDiff.prototype._isClearAuthorship = function (changeset) {
// unpack
2020-11-23 13:24:19 -05:00
const unpacked = Changeset.unpack(changeset);
// check if there is nothing in the charBank
2020-11-23 13:24:19 -05:00
if (unpacked.charBank !== '') {
2013-01-22 23:16:49 +00:00
return false;
}
// check if oldLength == newLength
if (unpacked.oldLen !== unpacked.newLen) {
2013-01-22 23:16:49 +00:00
return false;
}
// lets iterator over the operators
2020-11-23 13:24:19 -05:00
const iterator = Changeset.opIterator(unpacked.ops);
// get the first operator, this should be a clear operator
2020-11-23 13:24:19 -05:00
const clearOperator = iterator.next();
// check if there is only one operator
if (iterator.hasNext() === true) {
2013-01-22 23:16:49 +00:00
return false;
}
// check if this operator doesn't change text
2020-11-23 13:24:19 -05:00
if (clearOperator.opcode !== '=') {
2013-01-22 23:16:49 +00:00
return false;
}
// check that this operator applys to the complete text
// if the text ends with a new line, its exactly one character less, else it has the same length
2020-11-23 13:24:19 -05:00
if (clearOperator.chars !== unpacked.oldLen - 1 && clearOperator.chars !== unpacked.oldLen) {
2013-01-22 23:16:49 +00:00
return false;
}
2020-11-23 13:24:19 -05:00
const attributes = [];
Changeset.eachAttribNumber(changeset, (attrNum) => {
2013-01-22 23:16:49 +00:00
attributes.push(attrNum);
});
// check that this changeset uses only one attribute
if (attributes.length !== 1) {
2013-01-22 23:16:49 +00:00
return false;
}
2020-11-23 13:24:19 -05:00
const appliedAttribute = this._pad.pool.getAttrib(attributes[0]);
// check if the applied attribute is an anonymous author attribute
2020-11-23 13:24:19 -05:00
if (appliedAttribute[0] !== 'author' || appliedAttribute[1] !== '') {
2013-01-22 23:16:49 +00:00
return false;
}
2013-01-22 23:16:49 +00:00
return true;
};
2020-11-23 13:24:19 -05:00
PadDiff.prototype._createClearAuthorship = async function (rev) {
const atext = await this._pad.getInternalRevisionAText(rev);
2019-01-31 14:38:56 +00:00
// build clearAuthorship changeset
2020-11-23 13:24:19 -05:00
const builder = Changeset.builder(atext.text.length);
builder.keepText(atext.text, [['author', '']], this._pad.pool);
const changeset = builder.toString();
2019-01-31 14:38:56 +00:00
return changeset;
2020-11-23 13:24:19 -05:00
};
2020-11-23 13:24:19 -05:00
PadDiff.prototype._createClearStartAtext = async function (rev) {
// get the atext of this revision
const atext = await this._pad.getInternalRevisionAText(rev);
2019-01-31 14:38:56 +00:00
// create the clearAuthorship changeset
2020-11-23 13:24:19 -05:00
const changeset = await this._createClearAuthorship(rev);
2019-01-31 14:38:56 +00:00
// apply the clearAuthorship changeset
2020-11-23 13:24:19 -05:00
const newAText = Changeset.applyToAText(changeset, atext, this._pad.pool);
2019-01-31 14:38:56 +00:00
return newAText;
2020-11-23 13:24:19 -05:00
};
2020-11-23 13:24:19 -05:00
PadDiff.prototype._getChangesetsInBulk = async function (startRev, count) {
// find out which revisions we need
2020-11-23 13:24:19 -05:00
const revisions = [];
2019-01-31 14:38:56 +00:00
for (let i = startRev; i < (startRev + count) && i <= this._pad.head; i++) {
2013-01-22 23:16:49 +00:00
revisions.push(i);
}
2019-01-31 14:38:56 +00:00
// get all needed revisions (in parallel)
2020-11-23 13:24:19 -05:00
const changesets = []; const
authors = [];
await Promise.all(revisions.map((rev) => this._pad.getRevision(rev).then((revision) => {
const arrayNum = rev - startRev;
changesets[arrayNum] = revision.changeset;
authors[arrayNum] = revision.meta.author;
})));
return {changesets, authors};
};
2020-11-23 13:24:19 -05:00
PadDiff.prototype._addAuthors = function (authors) {
const self = this;
// add to array if not in the array
2020-11-23 13:24:19 -05:00
authors.forEach((author) => {
2021-01-21 21:06:52 +00:00
if (self._authors.indexOf(author) === -1) {
2013-01-22 23:16:49 +00:00
self._authors.push(author);
}
2013-01-22 23:16:49 +00:00
});
};
2020-11-23 13:24:19 -05:00
PadDiff.prototype._createDiffAtext = async function () {
const bulkSize = 100;
// get the cleaned startAText
2019-01-31 14:38:56 +00:00
let atext = await this._createClearStartAtext(this._fromRev);
let superChangeset = null;
for (let rev = this._fromRev + 1; rev <= this._toRev; rev += bulkSize) {
// get the bulk
2020-11-23 13:24:19 -05:00
const {changesets, authors} = await this._getChangesetsInBulk(rev, bulkSize);
2019-01-31 14:38:56 +00:00
2020-11-23 13:24:19 -05:00
const addedAuthors = [];
2019-01-31 14:38:56 +00:00
// run through all changesets
for (let i = 0; i < changesets.length && (rev + i) <= this._toRev; ++i) {
let changeset = changesets[i];
// skip clearAuthorship Changesets
if (this._isClearAuthorship(changeset)) {
continue;
2013-01-22 23:16:49 +00:00
}
2019-01-31 14:38:56 +00:00
changeset = this._extendChangesetWithAuthor(changeset, authors[i], this._pad.pool);
// add this author to the authorarray
addedAuthors.push(authors[i]);
// compose it with the superChangset
2021-01-21 21:06:52 +00:00
if (superChangeset == null) {
2019-01-31 14:38:56 +00:00
superChangeset = changeset;
} else {
2021-10-16 17:54:03 -04:00
superChangeset = Changeset.compose(superChangeset, changeset, this._pad.pool);
2019-01-31 14:38:56 +00:00
}
}
// add the authors to the PadDiff authorArray
this._addAuthors(addedAuthors);
}
2021-01-21 21:06:52 +00:00
// if there are only clearAuthorship changesets, we don't get a superChangeset,
// so we can skip this step
2019-01-31 14:38:56 +00:00
if (superChangeset) {
2020-11-23 13:24:19 -05:00
const deletionChangeset = this._createDeletionChangeset(superChangeset, atext, this._pad.pool);
2019-01-31 14:38:56 +00:00
// apply the superChangeset, which includes all addings
atext = Changeset.applyToAText(superChangeset, atext, this._pad.pool);
// apply the deletionChangeset, which adds a deletions
atext = Changeset.applyToAText(deletionChangeset, atext, this._pad.pool);
}
return atext;
2020-11-23 13:24:19 -05:00
};
2019-01-31 14:38:56 +00:00
2020-11-23 13:24:19 -05:00
PadDiff.prototype.getHtml = async function () {
// cache the html
if (this._html != null) {
2019-01-31 14:38:56 +00:00
return this._html;
2013-01-22 23:16:49 +00:00
}
2019-01-31 14:38:56 +00:00
// get the diff atext
2020-11-23 13:24:19 -05:00
const atext = await this._createDiffAtext();
2019-01-31 14:38:56 +00:00
// get the authorColor table
2020-11-23 13:24:19 -05:00
const authorColors = await this._pad.getAllAuthorColors();
2019-01-31 14:38:56 +00:00
// convert the atext to html
this._html = await exportHtml.getHTMLFromAtext(this._pad, atext, authorColors);
2019-01-31 14:38:56 +00:00
return this._html;
2020-11-23 13:24:19 -05:00
};
2019-01-31 14:38:56 +00:00
2020-11-23 13:24:19 -05:00
PadDiff.prototype.getAuthors = async function () {
2021-01-21 21:06:52 +00:00
// check if html was already produced, if not produce it, this generates
// the author array at the same time
2019-01-31 14:38:56 +00:00
if (this._html == null) {
await this.getHtml();
}
2019-01-31 14:38:56 +00:00
return self._authors;
2020-11-23 13:24:19 -05:00
};
2021-01-21 21:06:52 +00:00
PadDiff.prototype._extendChangesetWithAuthor = (changeset, author, apool) => {
// unpack
2020-11-23 13:24:19 -05:00
const unpacked = Changeset.unpack(changeset);
2020-11-23 13:24:19 -05:00
const iterator = Changeset.opIterator(unpacked.ops);
const assem = Changeset.opAssembler();
// create deleted attribs
2020-11-23 13:24:19 -05:00
const authorAttrib = apool.putAttrib(['author', author || '']);
const deletedAttrib = apool.putAttrib(['removed', true]);
const attribs = `*${Changeset.numToString(authorAttrib)}*${Changeset.numToString(deletedAttrib)}`;
// iteratore over the operators of the changeset
2020-11-23 13:24:19 -05:00
while (iterator.hasNext()) {
const operator = iterator.next();
2020-11-23 13:24:19 -05:00
if (operator.opcode === '-') {
// this is a delete operator, extend it with the author
2013-01-22 23:16:49 +00:00
operator.attribs = attribs;
2020-11-23 13:24:19 -05:00
} else if (operator.opcode === '=' && operator.attribs) {
// this is operator changes only attributes, let's mark which author did that
2020-11-23 13:24:19 -05:00
operator.attribs += `*${Changeset.numToString(authorAttrib)}`;
}
// append the new operator to our assembler
2013-01-22 23:16:49 +00:00
assem.append(operator);
}
// return the modified changeset
2013-01-22 23:16:49 +00:00
return Changeset.pack(unpacked.oldLen, unpacked.newLen, assem.toString(), unpacked.charBank);
};
2021-01-21 21:06:52 +00:00
// this method is 80% like Changeset.inverse. I just changed so instead of reverting,
// it adds deletions and attribute changes to to the atext.
2020-11-23 13:24:19 -05:00
PadDiff.prototype._createDeletionChangeset = function (cs, startAText, apool) {
const lines = Changeset.splitTextLines(startAText.text);
const alines = Changeset.splitAttributionLines(startAText.attribs, startAText.text);
2013-01-22 23:16:49 +00:00
// lines and alines are what the exports is meant to apply to.
// They may be arrays or objects with .get(i) and .length methods.
// They include final newlines on lines.
2021-01-21 21:06:52 +00:00
const linesGet = (idx) => {
2013-01-22 23:16:49 +00:00
if (lines.get) {
return lines.get(idx);
} else {
return lines[idx];
}
2021-01-21 21:06:52 +00:00
};
2021-01-21 21:06:52 +00:00
const aLinesGet = (idx) => {
2013-01-22 23:16:49 +00:00
if (alines.get) {
return alines.get(idx);
} else {
return alines[idx];
}
2021-01-21 21:06:52 +00:00
};
2020-11-23 13:24:19 -05:00
let curLine = 0;
let curChar = 0;
let curLineOpIter = null;
let curLineOpIterLine;
let curLineNextOp = Changeset.newOp('+');
2020-11-23 13:24:19 -05:00
const unpacked = Changeset.unpack(cs);
const csIter = Changeset.opIterator(unpacked.ops);
const builder = Changeset.builder(unpacked.newLen);
2021-01-21 21:06:52 +00:00
const consumeAttribRuns = (numChars, func /* (len, attribs, endsLine)*/) => {
if ((!curLineOpIter) || (curLineOpIterLine !== curLine)) {
2013-01-22 23:16:49 +00:00
// create curLineOpIter and advance it to curChar
2021-01-21 21:06:52 +00:00
curLineOpIter = Changeset.opIterator(aLinesGet(curLine));
2013-01-22 23:16:49 +00:00
curLineOpIterLine = curLine;
2020-11-23 13:24:19 -05:00
let indexIntoLine = 0;
while (curLineOpIter.hasNext()) {
curLineNextOp = curLineOpIter.next();
2013-01-22 23:16:49 +00:00
if (indexIntoLine + curLineNextOp.chars >= curChar) {
curLineNextOp.chars -= (curChar - indexIntoLine);
break;
2013-01-22 23:16:49 +00:00
}
indexIntoLine += curLineNextOp.chars;
2013-01-22 23:16:49 +00:00
}
}
2013-01-22 23:16:49 +00:00
while (numChars > 0) {
if ((!curLineNextOp.chars) && (!curLineOpIter.hasNext())) {
curLine++;
curChar = 0;
curLineOpIterLine = curLine;
curLineNextOp.chars = 0;
2021-01-21 21:06:52 +00:00
curLineOpIter = Changeset.opIterator(aLinesGet(curLine));
2013-01-22 23:16:49 +00:00
}
2013-01-22 23:16:49 +00:00
if (!curLineNextOp.chars) {
curLineNextOp = curLineOpIter.hasNext() ? curLineOpIter.next() : Changeset.newOp();
2013-01-22 23:16:49 +00:00
}
2020-11-23 13:24:19 -05:00
const charsToUse = Math.min(numChars, curLineNextOp.chars);
2021-01-21 21:06:52 +00:00
func(charsToUse, curLineNextOp.attribs,
charsToUse === curLineNextOp.chars && curLineNextOp.lines > 0);
2013-01-22 23:16:49 +00:00
numChars -= charsToUse;
curLineNextOp.chars -= charsToUse;
curChar += charsToUse;
}
2013-01-22 23:16:49 +00:00
if ((!curLineNextOp.chars) && (!curLineOpIter.hasNext())) {
curLine++;
curChar = 0;
}
2021-01-21 21:06:52 +00:00
};
2021-01-21 21:06:52 +00:00
const skip = (N, L) => {
2013-01-22 23:16:49 +00:00
if (L) {
curLine += L;
curChar = 0;
2021-01-21 21:06:52 +00:00
} else if (curLineOpIter && curLineOpIterLine === curLine) {
2020-11-23 13:24:19 -05:00
consumeAttribRuns(N, () => {});
2013-01-22 23:16:49 +00:00
} else {
2020-11-23 13:24:19 -05:00
curChar += N;
2013-01-22 23:16:49 +00:00
}
2021-01-21 21:06:52 +00:00
};
2021-01-21 21:06:52 +00:00
const nextText = (numChars) => {
2020-11-23 13:24:19 -05:00
let len = 0;
const assem = Changeset.stringAssembler();
2021-01-21 21:06:52 +00:00
const firstString = linesGet(curLine).substring(curChar);
2013-01-22 23:16:49 +00:00
len += firstString.length;
assem.append(firstString);
2020-11-23 13:24:19 -05:00
let lineNum = curLine + 1;
2013-01-22 23:16:49 +00:00
while (len < numChars) {
2021-01-21 21:06:52 +00:00
const nextString = linesGet(lineNum);
2013-01-22 23:16:49 +00:00
len += nextString.length;
assem.append(nextString);
lineNum++;
}
2013-01-22 23:16:49 +00:00
return assem.toString().substring(0, numChars);
2021-01-21 21:06:52 +00:00
};
2021-01-21 21:06:52 +00:00
const cachedStrFunc = (func) => {
2020-11-23 13:24:19 -05:00
const cache = {};
2021-01-21 21:06:52 +00:00
return (s) => {
2013-01-22 23:16:49 +00:00
if (!cache[s]) {
cache[s] = func(s);
}
return cache[s];
};
2021-01-21 21:06:52 +00:00
};
2020-11-23 13:24:19 -05:00
const attribKeys = [];
const attribValues = [];
// iterate over all operators of this changeset
2013-01-22 23:16:49 +00:00
while (csIter.hasNext()) {
2021-01-21 21:06:52 +00:00
const csOp = csIter.next();
2021-01-21 21:06:52 +00:00
if (csOp.opcode === '=') {
const textBank = nextText(csOp.chars);
2021-01-21 21:06:52 +00:00
// decide if this equal operator is an attribution change or not.
// We can see this by checkinf if attribs is set.
// If the text this operator applies to is only a star,
// than this is a false positive and should be ignored
if (csOp.attribs && textBank !== '*') {
2020-11-23 13:24:19 -05:00
const deletedAttrib = apool.putAttrib(['removed', true]);
2021-01-21 21:06:52 +00:00
let authorAttrib = apool.putAttrib(['author', '']);
2013-01-22 23:16:49 +00:00
attribKeys.length = 0;
attribValues.length = 0;
2020-11-23 13:24:19 -05:00
Changeset.eachAttribNumber(csOp.attribs, (n) => {
2013-01-22 23:16:49 +00:00
attribKeys.push(apool.getAttribKey(n));
attribValues.push(apool.getAttribValue(n));
2020-11-23 13:24:19 -05:00
if (apool.getAttribKey(n) === 'author') {
2013-01-22 23:16:49 +00:00
authorAttrib = n;
}
});
2021-01-21 21:06:52 +00:00
const undoBackToAttribs = cachedStrFunc((attribs) => {
2020-11-23 13:24:19 -05:00
const backAttribs = [];
for (let i = 0; i < attribKeys.length; i++) {
const appliedKey = attribKeys[i];
const appliedValue = attribValues[i];
const oldValue = Changeset.attribsAttributeValue(attribs, appliedKey, apool);
2021-01-21 21:06:52 +00:00
if (appliedValue !== oldValue) {
2013-01-22 23:16:49 +00:00
backAttribs.push([appliedKey, oldValue]);
}
}
2013-01-22 23:16:49 +00:00
return Changeset.makeAttribsString('=', backAttribs, apool);
});
2021-01-21 21:06:52 +00:00
const oldAttribsAddition =
`*${Changeset.numToString(deletedAttrib)}*${Changeset.numToString(authorAttrib)}`;
2020-11-23 13:24:19 -05:00
let textLeftToProcess = textBank;
2020-11-23 13:24:19 -05:00
while (textLeftToProcess.length > 0) {
// process till the next line break or process only one line break
2020-11-23 13:24:19 -05:00
let lengthToProcess = textLeftToProcess.indexOf('\n');
let lineBreak = false;
switch (lengthToProcess) {
case -1:
2020-11-23 13:24:19 -05:00
lengthToProcess = textLeftToProcess.length;
2013-01-22 23:16:49 +00:00
break;
case 0:
lineBreak = true;
2020-11-23 13:24:19 -05:00
lengthToProcess = 1;
2013-01-22 23:16:49 +00:00
break;
}
// get the text we want to procceed in this step
2021-01-21 21:06:52 +00:00
const processText = textLeftToProcess.substr(0, lengthToProcess);
2013-01-22 23:16:49 +00:00
textLeftToProcess = textLeftToProcess.substr(lengthToProcess);
if (lineBreak) {
builder.keep(1, 1); // just skip linebreaks, don't do a insert + keep for a linebreak
// consume the attributes of this linebreak
2020-11-23 13:24:19 -05:00
consumeAttribRuns(1, () => {});
2013-01-22 23:16:49 +00:00
} else {
2021-01-21 21:06:52 +00:00
// add the old text via an insert, but add a deletion attribute +
// the author attribute of the author who deleted it
let textBankIndex = 0;
2020-11-23 13:24:19 -05:00
consumeAttribRuns(lengthToProcess, (len, attribs, endsLine) => {
// get the old attributes back
2021-01-21 21:06:52 +00:00
const oldAttribs = (undoBackToAttribs(attribs) || '') + oldAttribsAddition;
2021-01-21 21:06:52 +00:00
builder.insert(processText.substr(textBankIndex, len), oldAttribs);
2013-01-22 23:16:49 +00:00
textBankIndex += len;
});
2013-01-22 23:16:49 +00:00
builder.keep(lengthToProcess, 0);
}
}
} else {
skip(csOp.chars, csOp.lines);
builder.keep(csOp.chars, csOp.lines);
}
2021-01-21 21:06:52 +00:00
} else if (csOp.opcode === '+') {
2013-01-22 23:16:49 +00:00
builder.keep(csOp.chars, csOp.lines);
2021-01-21 21:06:52 +00:00
} else if (csOp.opcode === '-') {
const textBank = nextText(csOp.chars);
let textBankIndex = 0;
2020-11-23 13:24:19 -05:00
consumeAttribRuns(csOp.chars, (len, attribs, endsLine) => {
2013-01-22 23:16:49 +00:00
builder.insert(textBank.substr(textBankIndex, len), attribs + csOp.attribs);
textBankIndex += len;
});
}
}
2013-01-22 23:16:49 +00:00
return Changeset.checkRep(builder.toString());
};
// export the constructor
2013-01-22 23:16:49 +00:00
module.exports = PadDiff;