'use strict'; /** * Handles the import requests */ /* * 2011 Peter 'Pita' Martischka (Primary Technology Ltd) * 2012 Iván Eixarch * 2014 John McLear (Etherpad Foundation / McLear Ltd) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS-IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ const padManager = require('../db/PadManager'); const padMessageHandler = require('./PadMessageHandler'); const fs = require('fs').promises; const path = require('path'); const settings = require('../utils/Settings'); const formidable = require('formidable'); const os = require('os'); const importHtml = require('../utils/ImportHtml'); const importEtherpad = require('../utils/ImportEtherpad'); const log4js = require('log4js'); const hooks = require('../../static/js/pluginfw/hooks.js'); // `status` must be a string supported by `importErrorMessage()` in `src/static/js/pad_impexp.js`. class ImportError extends Error { constructor(status, ...args) { super(...args); if (Error.captureStackTrace) Error.captureStackTrace(this, ImportError); this.name = 'ImportError'; this.status = status; const msg = this.message == null ? '' : String(this.message); if (status !== '') this.message = msg === '' ? status : `${status}: ${msg}`; } } const rm = async (path) => { try { await fs.unlink(path); } catch (err) { if (err.code !== 'ENOENT') throw err; } }; let convertor = null; let exportExtension = 'htm'; // load abiword only if it is enabled and if soffice is disabled if (settings.abiword != null && settings.soffice == null) { convertor = require('../utils/Abiword'); } // load soffice only if it is enabled if (settings.soffice != null) { convertor = require('../utils/LibreOffice'); exportExtension = 'html'; } const tmpDirectory = os.tmpdir(); /** * do a requested import */ const doImport = async (req, res, padId) => { const apiLogger = log4js.getLogger('ImportHandler'); // pipe to a file // convert file to html via abiword or soffice // set html in the pad const randNum = Math.floor(Math.random() * 0xFFFFFFFF); // setting flag for whether to use convertor or not let useConvertor = (convertor != null); const form = new formidable.IncomingForm(); form.keepExtensions = true; form.uploadDir = tmpDirectory; form.maxFileSize = settings.importMaxFileSize; // Ref: https://github.com/node-formidable/formidable/issues/469 // Crash in Etherpad was Uploading Error: Error: Request aborted // [ERR_STREAM_DESTROYED]: Cannot call write after a stream was destroyed form.onPart = (part) => { form.handlePart(part); if (part.filename !== undefined) { form.openedFiles[form.openedFiles.length - 1]._writeStream.on('error', (err) => { form.emit('error', err); }); } }; // locally wrapped Promise, since form.parse requires a callback let srcFile = await new Promise((resolve, reject) => { form.parse(req, (err, fields, files) => { if (err || files.file === undefined) { // the upload failed, stop at this point if (err) { console.warn(`Uploading Error: ${err.stack}`); } // I hate doing indexOf here but I can't see anything to use... if (err && err.stack && err.stack.indexOf('maxFileSize') !== -1) { return reject(new ImportError('maxFileSize')); } return reject(new ImportError('uploadFailed')); } if (!files.file) { // might not be a graceful fix but it works return reject(new ImportError('uploadFailed')); } resolve(files.file.path); }); }); // ensure this is a file ending we know, else we change the file ending to .txt // this allows us to accept source code files like .c or .java const fileEnding = path.extname(srcFile).toLowerCase(); const knownFileEndings = ['.txt', '.doc', '.docx', '.pdf', '.odt', '.html', '.htm', '.etherpad', '.rtf']; const fileEndingUnknown = (knownFileEndings.indexOf(fileEnding) < 0); if (fileEndingUnknown) { // the file ending is not known if (settings.allowUnknownFileEnds === true) { // we need to rename this file with a .txt ending const oldSrcFile = srcFile; srcFile = path.join(path.dirname(srcFile), `${path.basename(srcFile, fileEnding)}.txt`); await fs.rename(oldSrcFile, srcFile); } else { console.warn('Not allowing unknown file type to be imported', fileEnding); throw new ImportError('uploadFailed'); } } const destFile = path.join(tmpDirectory, `etherpad_import_${randNum}.${exportExtension}`); // Logic for allowing external Import Plugins const result = await hooks.aCallAll('import', {srcFile, destFile, fileEnding}); const importHandledByPlugin = (result.length > 0); // This feels hacky and wrong.. const fileIsEtherpad = (fileEnding === '.etherpad'); const fileIsHTML = (fileEnding === '.html' || fileEnding === '.htm'); const fileIsTXT = (fileEnding === '.txt'); if (fileIsEtherpad) { // we do this here so we can see if the pad has quite a few edits const _pad = await padManager.getPad(padId); const headCount = _pad.head; if (headCount >= 10) { apiLogger.warn('Aborting direct database import attempt of a pad that already has content'); throw new ImportError('padHasData'); } const _text = await fs.readFile(srcFile, 'utf8'); req.directDatabaseAccess = true; await importEtherpad.setPadRaw(padId, _text); } // convert file to html if necessary if (!importHandledByPlugin && !req.directDatabaseAccess) { if (fileIsTXT) { // Don't use convertor for text files useConvertor = false; } // See https://github.com/ether/etherpad-lite/issues/2572 if (fileIsHTML || !useConvertor) { // if no convertor only rename await fs.rename(srcFile, destFile); } else { // @TODO - no Promise interface for convertors (yet) await new Promise((resolve, reject) => { convertor.convertFile(srcFile, destFile, exportExtension, (err) => { // catch convert errors if (err) { console.warn('Converting Error:', err); return reject(new ImportError('convertFailed')); } resolve(); }); }); } } if (!useConvertor && !req.directDatabaseAccess) { // Read the file with no encoding for raw buffer access. const buf = await fs.readFile(destFile); // Check if there are only ascii chars in the uploaded file const isAscii = !Array.prototype.some.call(buf, (c) => (c > 240)); if (!isAscii) { throw new ImportError('uploadFailed'); } } // get the pad object let pad = await padManager.getPad(padId); // read the text let text; if (!req.directDatabaseAccess) { text = await fs.readFile(destFile, 'utf8'); // node on windows has a delay on releasing of the file lock. // We add a 100ms delay to work around this if (os.type().indexOf('Windows') > -1) { await new Promise((resolve) => setTimeout(resolve, 100)); } } // change text of the pad and broadcast the changeset if (!req.directDatabaseAccess) { if (importHandledByPlugin || useConvertor || fileIsHTML) { try { await importHtml.setPadHTML(pad, text); } catch (e) { apiLogger.warn('Error importing, possibly caused by malformed HTML'); } } else { await pad.setText(text); } } // Load the Pad into memory then broadcast updates to all clients padManager.unloadPad(padId); pad = await padManager.getPad(padId); padManager.unloadPad(padId); // direct Database Access means a pad user should perform a switchToPad // and not attempt to receive updated pad data if (req.directDatabaseAccess) { return; } // tell clients to update await padMessageHandler.updatePadClients(pad); // clean up temporary files rm(srcFile); rm(destFile); }; exports.doImport = (req, res, padId) => { /** * NB: abuse the 'req' object by storing an additional * 'directDatabaseAccess' property on it so that it can * be passed back in the HTML below. * * this is necessary because in the 'throw' paths of * the function above there's no other way to return * a value to the caller. */ let status = 'ok'; doImport(req, res, padId).catch((err) => { if (!(err instanceof ImportError) || !err.status) throw err; status = err.status; }).then(() => { // close the connection res.send([ '', ].join('\n')); }); };