From 2f81029e096bd0d1214ec4a74ce3ce6731f5ba30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20M=C3=BCller?= Date: Sun, 9 Jun 2024 12:17:30 +0200 Subject: [PATCH] Add initial code for revision cleanup --- src/node/types/Revision.ts | 9 ++ src/node/utils/Cleanup.ts | 185 +++++++++++++++++++++++++++++++++++++ 2 files changed, 194 insertions(+) create mode 100644 src/node/types/Revision.ts create mode 100644 src/node/utils/Cleanup.ts diff --git a/src/node/types/Revision.ts b/src/node/types/Revision.ts new file mode 100644 index 000000000..8a9d65e29 --- /dev/null +++ b/src/node/types/Revision.ts @@ -0,0 +1,9 @@ +import {AChangeSet} from "./PadType"; + +export type Revision = { + changeset: AChangeSet, + meta: { + author: string, + timestamp: number, + } +} diff --git a/src/node/utils/Cleanup.ts b/src/node/utils/Cleanup.ts new file mode 100644 index 000000000..5bc5176e7 --- /dev/null +++ b/src/node/utils/Cleanup.ts @@ -0,0 +1,185 @@ +'use strict' + +import {AChangeSet, PadType} from "../types/PadType"; +import {MapArrayType} from "../types/MapType"; +import {ChangeSet} from "../types/ChangeSet"; +import {Revision} from "../types/Revision"; + +const promises = require('./promises'); +const AttributePool = require('ep_etherpad-lite/static/js/AttributePool'); + +const padManager = require('ep_etherpad-lite/node/db/PadManager'); +const db = require('ep_etherpad-lite/node/db/DB'); +const Changeset = require('ep_etherpad-lite/static/js/Changeset'); +const padMessageHandler = require('ep_etherpad-lite/node/handler/PadMessageHandler'); + + +const composePadChangesets = async (pad: PadType, startNum: number, endNum: number) => { + // fetch all changesets we need + const headNum = pad.getHeadRevisionNumber(); + endNum = Math.min(endNum, headNum + 1); + startNum = Math.max(startNum, 0); + + // create an array for all changesets, we will + // replace the values with the changeset later + const changesetsNeeded = []; + for (let r = startNum; r < endNum; r++) { + changesetsNeeded.push(r); + } + + // get all changesets + const changesets: MapArrayType = {}; + await Promise.all(changesetsNeeded.map( + (revNum) => pad.getRevisionChangeset(revNum) + .then((changeset) => changesets[revNum] = changeset))); + + // compose Changesets + let r; + try { + let changeset = changesets[startNum]; + const pool = pad.apool(); + + for (r = startNum + 1; r < endNum; r++) { + const cs = changesets[r]; + changeset = Changeset.compose(changeset, cs, pool); + } + return changeset; + } catch (e) { + // r-1 indicates the rev that was build starting with startNum, applying startNum+1, +2, +3 + throw e; + } +}; + +exports.deleteAllRevisions = async (padID: string): Promise => { + + const randomPadId = padID + 'aertdfdf' + Math.random().toString(10) + + let pad = await padManager.getPad(padID); + await pad.copyPadWithoutHistory(randomPadId, false); + pad = await padManager.getPad(randomPadId); + await pad.copyPadWithoutHistory(padID, true); + await pad.remove(); +} + +const createRevision = async (aChangeset: AChangeSet, timestamp: number, isKeyRev: boolean, authorId = '', atext: any = null, pool: any = null) => { + + if (authorId !== '') pool.putAttrib(['author', authorId]); + + return { + changeset: aChangeset, + meta: { + author: authorId, + timestamp: timestamp, + ...isKeyRev ? { + pool: pool, + atext: atext, + } : {}, + }, + }; +} + +exports.deleteRevisions = async (padId: string, keepRevisions: number): Promise => { + + let pad = await padManager.getPad(padId); + pad.check() + + console.log('Initial pad is valid') + + padMessageHandler.kickSessionsFromPad(padId) + + const cleanupUntilRevision = pad.head - keepRevisions + console.log('Composing changesets: ', cleanupUntilRevision) + const changeset = await composePadChangesets(pad, 0, cleanupUntilRevision + 1) + + const revisions: Revision[] = []; + + for (let rev = 0; rev <= pad.head; ++rev) { + revisions[rev] = await pad.getRevision(rev) + } + + console.log('Loaded revisions: ', revisions.length) + + await promises.timesLimit(cleanupUntilRevision, 500, async (i: string) => { + console.log('Delete revision: ', i) + await db.remove(`pad:${padId}:revs:${i}`, null); + }); + + let padContent = await db.get(`pad:${padId}`) + padContent.head = keepRevisions + await db.set(`pad:${padId}`, padContent); + + let newAText = Changeset.makeAText('\n'); + let newPool = new AttributePool() + + for (let rev = 0; rev <= cleanupUntilRevision; ++rev) { + newAText = Changeset.applyToAText(revisions[rev].changeset, newAText, newPool); + } + + const revision = await createRevision( + changeset, + revisions[cleanupUntilRevision].meta.timestamp, + 0 === pad.getKeyRevisionNumber(0), + '', + newAText, + newPool + ); + console.log('Create revision 0: ', revision); + + const p: Promise[] = []; + + p.push(db.set(`pad:${padId}:revs:0`, revision)) + + p.push(promises.timesLimit(keepRevisions, 500, async (i: number) => { + const rev = i + cleanupUntilRevision + 1 + const newRev = rev - cleanupUntilRevision; + console.log('Map revision: ' + rev + ' => ' + newRev) + + newAText = Changeset.applyToAText(revisions[rev].changeset, newAText, newPool); + + const revision = await createRevision( + revisions[rev].changeset, + revisions[rev].meta.timestamp, + newRev === pad.getKeyRevisionNumber(newRev), + revisions[rev].meta.author, + newAText, + newPool + ); + console.log('Create revision: ', newRev, revision); + + await db.set(`pad:${padId}:revs:${newRev}`, revision); + })); + + await Promise.all(p) + + console.log('Finished migration. Checking pad now') + + + padManager.unloadPad(padId); + + let newPad = await padManager.getPad(padId); + newPad.check(); +} + +exports.checkTodos = async () => { + await new Promise(resolve => setTimeout(resolve, 5000)); + + await Promise.all((await padManager.listAllPads()).padIDs.map(async (padId: string) => { + const pad = await padManager.getPad(padId); + + console.log('pad user count', padId, padMessageHandler.padUsersCount(padId)) + const revisionDate = await pad.getRevisionDate(pad.getHeadRevisionNumber()) + console.log('pad last modified', padId, Date.now() - revisionDate) + + if (pad.head < 10000 || padMessageHandler.padUsersCount(padId) > 0 || Date.now() < revisionDate + 1000 * 60 * 60 * 24) { + return + } + + try { + await exports.deleteRevisions(padId, 100) + console.log('successful cleaned up pad: ', padId) + } catch (err: any) { + console.error(`Error in pad ${padId}: ${err.stack || err}`); + return; + } + })); +}