etherpad-lite/src/node/handler/ImportHandler.js

251 lines
8 KiB
JavaScript
Raw Normal View History

'use strict';
2011-07-21 20:13:58 +01:00
/**
* Handles the import requests
*/
/*
* 2011 Peter 'Pita' Martischka (Primary Technology Ltd)
2012-11-27 00:11:45 +01:00
* 2012 Iván Eixarch
2014-12-30 00:12:26 +01:00
* 2014 John McLear (Etherpad Foundation / McLear Ltd)
2011-07-21 20:13:58 +01:00
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS-IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
2020-11-23 13:24:19 -05:00
const padManager = require('../db/PadManager');
const padMessageHandler = require('./PadMessageHandler');
const fs = require('fs').promises;
2020-11-23 13:24:19 -05:00
const path = require('path');
const settings = require('../utils/Settings');
2022-01-28 21:16:05 -05:00
const {Formidable} = require('formidable');
2020-11-23 13:24:19 -05:00
const os = require('os');
const importHtml = require('../utils/ImportHtml');
const importEtherpad = require('../utils/ImportEtherpad');
const log4js = require('log4js');
const hooks = require('../../static/js/pluginfw/hooks.js');
const logger = log4js.getLogger('ImportHandler');
// `status` must be a string supported by `importErrorMessage()` in `src/static/js/pad_impexp.js`.
class ImportError extends Error {
constructor(status, ...args) {
super(...args);
if (Error.captureStackTrace) Error.captureStackTrace(this, ImportError);
this.name = 'ImportError';
this.status = status;
const msg = this.message == null ? '' : String(this.message);
if (status !== '') this.message = msg === '' ? status : `${status}: ${msg}`;
}
}
const rm = async (path) => {
try {
await fs.unlink(path);
} catch (err) {
if (err.code !== 'ENOENT') throw err;
}
};
let converter = null;
2020-11-23 13:24:19 -05:00
let exportExtension = 'htm';
// load abiword only if it is enabled and if soffice is disabled
if (settings.abiword != null && settings.soffice == null) {
converter = require('../utils/Abiword');
}
// load soffice only if it is enabled
if (settings.soffice != null) {
converter = require('../utils/LibreOffice');
2020-11-23 13:24:19 -05:00
exportExtension = 'html';
}
const tmpDirectory = os.tmpdir();
2011-07-21 20:13:58 +01:00
/**
* do a requested import
*/
const doImport = async (req, res, padId) => {
// pipe to a file
// convert file to html via abiword or soffice
// set html in the pad
2020-11-23 13:24:19 -05:00
const randNum = Math.floor(Math.random() * 0xFFFFFFFF);
// setting flag for whether to use converter or not
let useConverter = (converter != null);
2022-01-28 21:16:05 -05:00
const form = new Formidable({
keepExtensions: true,
uploadDir: tmpDirectory,
maxFileSize: settings.importMaxFileSize,
});
// locally wrapped Promise, since form.parse requires a callback
let srcFile = await new Promise((resolve, reject) => {
2020-11-23 13:24:19 -05:00
form.parse(req, (err, fields, files) => {
2021-02-18 03:42:41 -05:00
if (err != null) {
logger.warn(`Import failed due to form error: ${err.stack || err}`);
// I hate doing indexOf here but I can't see anything to use...
2020-11-23 13:24:19 -05:00
if (err && err.stack && err.stack.indexOf('maxFileSize') !== -1) {
return reject(new ImportError('maxFileSize'));
}
return reject(new ImportError('uploadFailed'));
}
2021-02-18 03:42:41 -05:00
if (!files.file) {
logger.warn('Import failed because form had no file');
return reject(new ImportError('uploadFailed'));
2020-04-03 10:30:12 +00:00
}
2022-01-28 21:16:05 -05:00
resolve(files.file.filepath);
});
});
// ensure this is a file ending we know, else we change the file ending to .txt
// this allows us to accept source code files like .c or .java
2020-11-23 13:24:19 -05:00
const fileEnding = path.extname(srcFile).toLowerCase();
const knownFileEndings =
['.txt', '.doc', '.docx', '.pdf', '.odt', '.html', '.htm', '.etherpad', '.rtf'];
2020-11-23 13:24:19 -05:00
const fileEndingUnknown = (knownFileEndings.indexOf(fileEnding) < 0);
if (fileEndingUnknown) {
// the file ending is not known
if (settings.allowUnknownFileEnds === true) {
// we need to rename this file with a .txt ending
2020-11-23 13:24:19 -05:00
const oldSrcFile = srcFile;
2020-11-23 13:24:19 -05:00
srcFile = path.join(path.dirname(srcFile), `${path.basename(srcFile, fileEnding)}.txt`);
await fs.rename(oldSrcFile, srcFile);
} else {
2021-02-18 03:42:41 -05:00
logger.warn(`Not allowing unknown file type to be imported: ${fileEnding}`);
throw new ImportError('uploadFailed');
}
}
2020-11-23 13:24:19 -05:00
const destFile = path.join(tmpDirectory, `etherpad_import_${randNum}.${exportExtension}`);
const context = {srcFile, destFile, fileEnding, padId, ImportError};
const importHandledByPlugin = (await hooks.aCallAll('import', context)).some((x) => x);
2020-11-23 13:24:19 -05:00
const fileIsEtherpad = (fileEnding === '.etherpad');
const fileIsHTML = (fileEnding === '.html' || fileEnding === '.htm');
const fileIsTXT = (fileEnding === '.txt');
let directDatabaseAccess = false;
if (fileIsEtherpad) {
const pad = await padManager.getPad(padId);
const headCount = pad.head;
if (headCount >= 10) {
logger.warn('Aborting direct database import attempt of a pad that already has content');
throw new ImportError('padHasData');
}
const text = await fs.readFile(srcFile, 'utf8');
directDatabaseAccess = true;
await importEtherpad.setPadRaw(padId, text);
}
// convert file to html if necessary
if (!importHandledByPlugin && !directDatabaseAccess) {
if (fileIsTXT) {
// Don't use converter for text files
useConverter = false;
}
// See https://github.com/ether/etherpad-lite/issues/2572
if (fileIsHTML || !useConverter) {
// if no converter only rename
await fs.rename(srcFile, destFile);
} else {
try {
await converter.convertFile(srcFile, destFile, exportExtension);
} catch (err) {
logger.warn(`Converting Error: ${err.stack || err}`);
throw new ImportError('convertFailed');
}
}
}
if (!useConverter && !directDatabaseAccess) {
// Read the file with no encoding for raw buffer access.
const buf = await fs.readFile(destFile);
// Check if there are only ascii chars in the uploaded file
2020-11-23 13:24:19 -05:00
const isAscii = !Array.prototype.some.call(buf, (c) => (c > 240));
if (!isAscii) {
2021-02-18 03:42:41 -05:00
logger.warn('Attempt to import non-ASCII file');
throw new ImportError('uploadFailed');
}
}
// get the pad object
let pad = await padManager.getPad(padId);
// read the text
let text;
if (!directDatabaseAccess) {
text = await fs.readFile(destFile, 'utf8');
2014-12-29 20:57:58 +01:00
// node on windows has a delay on releasing of the file lock.
// We add a 100ms delay to work around this
2020-11-23 13:24:19 -05:00
if (os.type().indexOf('Windows') > -1) {
await new Promise((resolve) => setTimeout(resolve, 100));
}
}
// change text of the pad and broadcast the changeset
if (!directDatabaseAccess) {
if (importHandledByPlugin || useConverter || fileIsHTML) {
try {
await importHtml.setPadHTML(pad, text);
2021-02-18 03:42:41 -05:00
} catch (err) {
logger.warn(`Error importing, possibly caused by malformed HTML: ${err.stack || err}`);
}
} else {
await pad.setText(text);
}
}
// Load the Pad into memory then broadcast updates to all clients
padManager.unloadPad(padId);
pad = await padManager.getPad(padId);
padManager.unloadPad(padId);
2014-12-29 20:57:58 +01:00
// Direct database access means a pad user should reload the pad and not attempt to receive
// updated pad data.
if (directDatabaseAccess) return true;
// tell clients to update
await padMessageHandler.updatePadClients(pad);
// clean up temporary files
rm(srcFile);
rm(destFile);
return false;
};
exports.doImport = async (req, res, padId) => {
let httpStatus = 200;
let code = 0;
let message = 'ok';
let directDatabaseAccess;
try {
directDatabaseAccess = await doImport(req, res, padId);
} catch (err) {
const known = err instanceof ImportError && err.status;
if (!known) logger.error(`Internal error during import: ${err.stack || err}`);
httpStatus = known ? 400 : 500;
code = known ? 1 : 2;
message = known ? err.status : 'internalError';
}
res.status(httpStatus).json({code, message, data: {directDatabaseAccess}});
2020-11-23 13:24:19 -05:00
};