import/export: conversion to Promises/async

NB1: needs additional review and testing - no abiword available on my test bed
NB2: in ImportHandler.js, directly delete the file, and handle the eventual
     error later: checking before for existence is prone to race conditions,
     and does not handle any errors anyway.
This commit is contained in:
Ray Bellis 2019-01-31 08:55:36 +00:00
parent 5192a0c498
commit 62345ac8f7
8 changed files with 379 additions and 570 deletions

View file

@ -19,18 +19,20 @@
* limitations under the License.
*/
var ERR = require("async-stacktrace");
var exporthtml = require("../utils/ExportHtml");
var exporttxt = require("../utils/ExportTxt");
var exportEtherpad = require("../utils/ExportEtherpad");
var async = require("async");
var fs = require("fs");
var settings = require('../utils/Settings');
var os = require('os');
var hooks = require("ep_etherpad-lite/static/js/pluginfw/hooks");
var TidyHtml = require('../utils/TidyHtml');
const util = require("util");
var convertor = null;
const fsp_writeFile = util.promisify(fs.writeFile);
const fsp_unlink = util.promisify(fs.unlink);
let convertor = null;
// load abiword only if it is enabled
if (settings.abiword != null) {
@ -47,122 +49,92 @@ const tempDirectory = os.tmpdir();
/**
* do a requested export
*/
exports.doExport = function(req, res, padId, type)
async function doExport(req, res, padId, type)
{
var fileName = padId;
// allow fileName to be overwritten by a hook, the type type is kept static for security reasons
hooks.aCallFirst("exportFileName", padId,
function(err, hookFileName){
// if fileName is set then set it to the padId, note that fileName is returned as an array.
if (hookFileName.length) {
fileName = hookFileName;
}
let hookFileName = await hooks.aCallFirst("exportFileName", padId);
// tell the browser that this is a downloadable file
res.attachment(fileName + "." + type);
// if fileName is set then set it to the padId, note that fileName is returned as an array.
if (hookFileName.length) {
fileName = hookFileName;
}
// if this is a plain text export, we can do this directly
// We have to over engineer this because tabs are stored as attributes and not plain text
if (type == "etherpad") {
exportEtherpad.getPadRaw(padId, function(err, pad) {
if (!err) {
res.send(pad);
// return;
}
});
} else if (type == "txt") {
exporttxt.getPadTXTDocument(padId, req.params.rev, function(err, txt) {
if (!err) {
res.send(txt);
}
});
} else {
var html;
var randNum;
var srcFile, destFile;
// tell the browser that this is a downloadable file
res.attachment(fileName + "." + type);
async.series([
// render the html document
function(callback) {
exporthtml.getPadHTMLDocument(padId, req.params.rev, function(err, _html) {
if (ERR(err, callback)) return;
html = _html;
callback();
});
},
// if this is a plain text export, we can do this directly
// We have to over engineer this because tabs are stored as attributes and not plain text
if (type === "etherpad") {
let pad = await exportEtherpad.getPadRaw(padId);
res.send(pad);
} else if (type === "txt") {
let txt = await exporttxt.getPadTXTDocument(padId, req.params.rev);
res.send(txt);
} else {
// render the html document
let html = await exporthtml.getPadHTMLDocument(padId, req.params.rev);
// decide what to do with the html export
function(callback) {
// if this is a html export, we can send this from here directly
if (type == "html") {
// do any final changes the plugin might want to make
hooks.aCallFirst("exportHTMLSend", html, function(err, newHTML) {
if (newHTML.length) html = newHTML;
res.send(html);
callback("stop");
});
} else {
// write the html export to a file
randNum = Math.floor(Math.random()*0xFFFFFFFF);
srcFile = tempDirectory + "/etherpad_export_" + randNum + ".html";
fs.writeFile(srcFile, html, callback);
}
},
// decide what to do with the html export
// Tidy up the exported HTML
function(callback) {
// ensure html can be collected by the garbage collector
html = null;
TidyHtml.tidy(srcFile, callback);
},
// send the convert job to the convertor (abiword, libreoffice, ..)
function(callback) {
destFile = tempDirectory + "/etherpad_export_" + randNum + "." + type;
// Allow plugins to overwrite the convert in export process
hooks.aCallAll("exportConvert", { srcFile: srcFile, destFile: destFile, req: req, res: res }, function(err, result) {
if (!err && result.length > 0) {
// console.log("export handled by plugin", destFile);
handledByPlugin = true;
callback();
} else {
convertor.convertFile(srcFile, destFile, type, callback);
}
});
},
// send the file
function(callback) {
res.sendFile(destFile, null, callback);
},
// clean up temporary files
function(callback) {
async.parallel([
function(callback) {
fs.unlink(srcFile, callback);
},
function(callback) {
// 100ms delay to accommodate for slow windows fs
if (os.type().indexOf("Windows") > -1) {
setTimeout(function() {
fs.unlink(destFile, callback);
}, 100);
} else {
fs.unlink(destFile, callback);
}
}
], callback);
}
],
function(err) {
if (err && err != "stop") ERR(err);
})
}
// if this is a html export, we can send this from here directly
if (type === "html") {
// do any final changes the plugin might want to make
let newHTML = await hooks.aCallFirst("exportHTMLSend", html);
if (newHTML.length) html = newHTML;
res.send(html);
throw "stop";
}
);
};
// else write the html export to a file
let randNum = Math.floor(Math.random()*0xFFFFFFFF);
let srcFile = tempDirectory + "/etherpad_export_" + randNum + ".html";
await fsp_writeFile(srcFile, html);
// Tidy up the exported HTML
// ensure html can be collected by the garbage collector
html = null;
await TidyHtml.tidy(srcFile);
// send the convert job to the convertor (abiword, libreoffice, ..)
let destFile = tempDirectory + "/etherpad_export_" + randNum + "." + type;
// Allow plugins to overwrite the convert in export process
let result = await hooks.aCallAll("exportConvert", { srcFile, destFile, req, res });
if (result.length > 0) {
// console.log("export handled by plugin", destFile);
handledByPlugin = true;
} else {
// @TODO no Promise interface for convertors (yet)
await new Promise((resolve, reject) => {
convertor.convertFile(srcFile, destFile, type, function(err) {
err ? reject("convertFailed") : resolve();
});
});
}
// send the file
let sendFile = util.promisify(res.sendFile);
await res.sendFile(destFile, null);
// clean up temporary files
await fsp_unlink(srcFile);
// 100ms delay to accommodate for slow windows fs
if (os.type().indexOf("Windows") > -1) {
await new Promise(resolve => setTimeout(resolve, 100));
}
await fsp_unlink(destFile);
}
}
exports.doExport = function(req, res, padId, type)
{
doExport(req, res, padId, type).catch(err => {
if (err !== "stop") {
throw err;
}
});
}

View file

@ -20,10 +20,8 @@
* limitations under the License.
*/
var ERR = require("async-stacktrace")
, padManager = require("../db/PadManager")
var padManager = require("../db/PadManager")
, padMessageHandler = require("./PadMessageHandler")
, async = require("async")
, fs = require("fs")
, path = require("path")
, settings = require('../utils/Settings')
@ -32,10 +30,16 @@ var ERR = require("async-stacktrace")
, importHtml = require("../utils/ImportHtml")
, importEtherpad = require("../utils/ImportEtherpad")
, log4js = require("log4js")
, hooks = require("ep_etherpad-lite/static/js/pluginfw/hooks.js");
, hooks = require("ep_etherpad-lite/static/js/pluginfw/hooks.js")
, util = require("util");
var convertor = null;
var exportExtension = "htm";
let fsp_exists = util.promisify(fs.exists);
let fsp_rename = util.promisify(fs.rename);
let fsp_readFile = util.promisify(fs.readFile);
let fsp_unlink = util.promisify(fs.unlink)
let convertor = null;
let exportExtension = "htm";
// load abiword only if it is enabled and if soffice is disabled
if (settings.abiword != null && settings.soffice === null) {
@ -53,292 +57,213 @@ const tmpDirectory = os.tmpdir();
/**
* do a requested import
*/
exports.doImport = function(req, res, padId)
async function doImport(req, res, padId)
{
var apiLogger = log4js.getLogger("ImportHandler");
// pipe to a file
// convert file to html via abiword or soffice
// set html in the pad
var srcFile, destFile
, pad
, text
, importHandledByPlugin
, directDatabaseAccess
, useConvertor;
var randNum = Math.floor(Math.random()*0xFFFFFFFF);
// setting flag for whether to use convertor or not
useConvertor = (convertor != null);
let useConvertor = (convertor != null);
async.series([
// save the uploaded file to /tmp
function(callback) {
var form = new formidable.IncomingForm();
form.keepExtensions = true;
form.uploadDir = tmpDirectory;
let form = new formidable.IncomingForm();
form.keepExtensions = true;
form.uploadDir = tmpDirectory;
form.parse(req, function(err, fields, files) {
if (err || files.file === undefined) {
// the upload failed, stop at this point
if (err) {
console.warn("Uploading Error: " + err.stack);
}
callback("uploadFailed");
return;
// locally wrapped Promise, since form.parse requires a callback
let srcFile = await new Promise((resolve, reject) => {
form.parse(req, function(err, fields, files) {
if (err || files.file === undefined) {
// the upload failed, stop at this point
if (err) {
console.warn("Uploading Error: " + err.stack);
}
// everything ok, continue
// save the path of the uploaded file
srcFile = files.file.path;
callback();
});
},
// ensure this is a file ending we know, else we change the file ending to .txt
// this allows us to accept source code files like .c or .java
function(callback) {
var fileEnding = path.extname(srcFile).toLowerCase()
, knownFileEndings = [".txt", ".doc", ".docx", ".pdf", ".odt", ".html", ".htm", ".etherpad", ".rtf"]
, fileEndingKnown = (knownFileEndings.indexOf(fileEnding) > -1);
// if the file ending is known, continue as normal
if (fileEndingKnown) {
callback();
return;
reject("uploadFailed");
}
resolve(files.file.path);
});
});
// ensure this is a file ending we know, else we change the file ending to .txt
// this allows us to accept source code files like .c or .java
let fileEnding = path.extname(srcFile).toLowerCase()
, knownFileEndings = [".txt", ".doc", ".docx", ".pdf", ".odt", ".html", ".htm", ".etherpad", ".rtf"]
, fileEndingUnknown = (knownFileEndings.indexOf(fileEnding) < 0);
if (fileEndingUnknown) {
// the file ending is not known
if (settings.allowUnknownFileEnds === true) {
// we need to rename this file with a .txt ending
if (settings.allowUnknownFileEnds === true) {
var oldSrcFile = srcFile;
srcFile = path.join(path.dirname(srcFile), path.basename(srcFile, fileEnding) + ".txt");
fs.rename(oldSrcFile, srcFile, callback);
} else {
console.warn("Not allowing unknown file type to be imported", fileEnding);
callback("uploadFailed");
}
},
let oldSrcFile = srcFile;
function(callback) {
destFile = path.join(tmpDirectory, "etherpad_import_" + randNum + "." + exportExtension);
srcFile = path.join(path.dirname(srcFile), path.basename(srcFile, fileEnding) + ".txt");
await fs.rename(oldSrcFile, srcFile);
} else {
console.warn("Not allowing unknown file type to be imported", fileEnding);
throw "uploadFailed";
}
}
// Logic for allowing external Import Plugins
hooks.aCallAll("import", { srcFile: srcFile, destFile: destFile }, function(err, result) {
if (ERR(err, callback)) return callback();
let destFile = path.join(tmpDirectory, "etherpad_import_" + randNum + "." + exportExtension);
if (result.length > 0) { // This feels hacky and wrong..
importHandledByPlugin = true;
}
callback();
});
},
// Logic for allowing external Import Plugins
let result = await hooks.aCallAll("import", { srcFile, destFile });
let importHandledByPlugin = (result.length > 0); // This feels hacky and wrong..
function(callback) {
var fileEnding = path.extname(srcFile).toLowerCase()
var fileIsNotEtherpad = (fileEnding !== ".etherpad");
let fileIsEtherpad = (fileEnding === ".etherpad");
let fileIsHTML = (fileEnding === ".html" || fileEnding === ".htm");
let fileIsTXT = (fileEnding === ".txt");
if (fileIsNotEtherpad) {
callback();
let directDatabaseAccess = false;
return;
}
if (fileIsEtherpad) {
// we do this here so we can see if the pad has quite a few edits
let _pad = await padManager.getPad(padId);
let headCount = _pad.head;
// we do this here so we can see if the pad has quite a few edits
padManager.getPad(padId, function(err, _pad) {
var headCount = _pad.head;
if (headCount >= 10) {
apiLogger.warn("Direct database Import attempt of a pad that already has content, we won't be doing this");
return callback("padHasData");
}
if (headCount >= 10) {
apiLogger.warn("Direct database Import attempt of a pad that already has content, we won't be doing this");
throw "padHasData";
}
fs.readFile(srcFile, "utf8", function(err, _text) {
directDatabaseAccess = true;
importEtherpad.setPadRaw(padId, _text, function(err) {
callback();
});
const fsp_readFile = util.promisify(fs.readFile);
let _text = await fsp_readFile(srcFile, "utf8");
directDatabaseAccess = true;
await importEtherpad.setPadRaw(padId, _text);
}
// convert file to html if necessary
if (!importHandledByPlugin && !directDatabaseAccess) {
if (fileIsTXT) {
// Don't use convertor for text files
useConvertor = false;
}
// See https://github.com/ether/etherpad-lite/issues/2572
if (fileIsHTML || !useConvertor) {
// if no convertor only rename
fs.renameSync(srcFile, destFile);
} else {
// @TODO - no Promise interface for convertors (yet)
await new Promise((resolve, reject) => {
convertor.convertFile(srcFile, destFile, exportExtension, function(err) {
// catch convert errors
if (err) {
console.warn("Converting Error:", err);
reject("convertFailed");
}
resolve();
});
});
},
// convert file to html if necessary
function(callback) {
if (importHandledByPlugin || directDatabaseAccess) {
callback();
return;
}
var fileEnding = path.extname(srcFile).toLowerCase();
var fileIsHTML = (fileEnding === ".html" || fileEnding === ".htm");
var fileIsTXT = (fileEnding === ".txt");
if (fileIsTXT) useConvertor = false; // Don't use convertor for text files
// See https://github.com/ether/etherpad-lite/issues/2572
if (fileIsHTML || (useConvertor === false)) {
// if no convertor only rename
fs.rename(srcFile, destFile, callback);
return;
}
convertor.convertFile(srcFile, destFile, exportExtension, function(err) {
// catch convert errors
if (err) {
console.warn("Converting Error:", err);
return callback("convertFailed");
}
callback();
});
},
function(callback) {
if (useConvertor || directDatabaseAccess) {
callback();
return;
}
// Read the file with no encoding for raw buffer access.
fs.readFile(destFile, function(err, buf) {
if (err) throw err;
var isAscii = true;
// Check if there are only ascii chars in the uploaded file
for (var i=0, len=buf.length; i<len; i++) {
if (buf[i] > 240) {
isAscii=false;
break;
}
}
if (!isAscii) {
callback("uploadFailed");
return;
}
callback();
});
},
// get the pad object
function(callback) {
padManager.getPad(padId, function(err, _pad) {
if (ERR(err, callback)) return;
pad = _pad;
callback();
});
},
// read the text
function(callback) {
if (directDatabaseAccess) {
callback();
return;
}
fs.readFile(destFile, "utf8", function(err, _text) {
if (ERR(err, callback)) return;
text = _text;
// Title needs to be stripped out else it appends it to the pad..
text = text.replace("<title>", "<!-- <title>");
text = text.replace("</title>","</title>-->");
// node on windows has a delay on releasing of the file lock.
// We add a 100ms delay to work around this
if (os.type().indexOf("Windows") > -1) {
setTimeout(function() {callback();}, 100);
} else {
callback();
}
});
},
// change text of the pad and broadcast the changeset
function(callback) {
if (!directDatabaseAccess) {
var fileEnding = path.extname(srcFile).toLowerCase();
if (importHandledByPlugin || useConvertor || fileEnding == ".htm" || fileEnding == ".html") {
importHtml.setPadHTML(pad, text, function(e){
if (e) {
apiLogger.warn("Error importing, possibly caused by malformed HTML");
}
});
} else {
pad.setText(text);
}
}
// Load the Pad into memory then broadcast updates to all clients
padManager.unloadPad(padId);
padManager.getPad(padId, function(err, _pad) {
var pad = _pad;
padManager.unloadPad(padId);
// direct Database Access means a pad user should perform a switchToPad
// and not attempt to receive updated pad data
if (directDatabaseAccess) {
callback();
return;
}
// @TODO: not waiting for updatePadClients to finish
padMessageHandler.updatePadClients(pad);
callback();
});
},
// clean up temporary files
function(callback) {
if (directDatabaseAccess) {
callback();
return;
}
try {
fs.unlinkSync(srcFile);
} catch (e) {
console.log(e);
}
try {
fs.unlinkSync(destFile);
} catch (e) {
console.log(e);
}
callback();
}
], function(err) {
var status = "ok";
}
if (!useConvertor && !directDatabaseAccess) {
// Read the file with no encoding for raw buffer access.
let buf = await fsp_readFile(destFile);
// Check if there are only ascii chars in the uploaded file
let isAscii = ! Array.prototype.some.call(buf, c => (c > 240));
if (!isAscii) {
throw "uploadFailed";
}
}
// get the pad object
let pad = await padManager.getPad(padId);
// read the text
let text;
if (!directDatabaseAccess) {
text = await fsp_readFile(destFile, "utf8");
// Title needs to be stripped out else it appends it to the pad..
text = text.replace("<title>", "<!-- <title>");
text = text.replace("</title>","</title>-->");
// node on windows has a delay on releasing of the file lock.
// We add a 100ms delay to work around this
if (os.type().indexOf("Windows") > -1){
await new Promise(resolve => setTimeout(resolve, 100));
}
}
// change text of the pad and broadcast the changeset
if (!directDatabaseAccess) {
if (importHandledByPlugin || useConvertor || fileIsHTML) {
try {
importHtml.setPadHTML(pad, text);
} catch (e) {
apiLogger.warn("Error importing, possibly caused by malformed HTML");
}
} else {
pad.setText(text);
}
}
// Load the Pad into memory then broadcast updates to all clients
padManager.unloadPad(padId);
pad = await padManager.getPad(padId);
padManager.unloadPad(padId);
// direct Database Access means a pad user should perform a switchToPad
// and not attempt to receive updated pad data
if (!directDatabaseAccess) {
// tell clients to update
await padMessageHandler.updatePadClients(pad);
}
if (!directDatabaseAccess) {
// clean up temporary files
/*
* TODO: directly delete the file and handle the eventual error. Checking
* before for existence is prone to race conditions, and does not handle any
* errors anyway.
*/
if (await fsp_exists(srcFile)) {
fsp_unlink(srcFile);
}
if (await fsp_exists(destFile)) {
fsp_unlink(destFile);
}
}
return directDatabaseAccess;
}
exports.doImport = function (req, res, padId)
{
let status = "ok";
let directDatabaseAccess;
doImport(req, res, padId).then(result => {
directDatabaseAccess = result;
}).catch(err => {
// check for known errors and replace the status
if (err == "uploadFailed" || err == "convertFailed" || err == "padHasData") {
status = err;
err = null;
} else {
throw err;
}
ERR(err);
// close the connection
res.send(
"<head> \
<script type='text/javascript' src='../../static/js/jquery.js'></script> \
</head> \
<script> \
$(window).load(function(){ \
var impexp = window.parent.padimpexp.handleFrameCall('" + directDatabaseAccess +"', '" + status + "'); \
}) \
</script>"
);
});
// close the connection
res.send(
"<head> \
<script type='text/javascript' src='../../static/js/jquery.js'></script> \
</head> \
<script> \
$(window).load(function(){ \
var impexp = window.parent.padimpexp.handleFrameCall('" + directDatabaseAccess +"', '" + status + "'); \
}) \
</script>"
);
}