mirror of
https://github.com/ether/etherpad-lite.git
synced 2025-04-23 08:56:17 -04:00
Merge pull request #2668 from simong/tidy
Tidy HTML before trying to convert it with abiword
This commit is contained in:
commit
5615bab0d9
5 changed files with 156 additions and 35 deletions
|
@ -90,6 +90,10 @@
|
||||||
Abiword is needed to advanced import/export features of pads*/
|
Abiword is needed to advanced import/export features of pads*/
|
||||||
"abiword" : null,
|
"abiword" : null,
|
||||||
|
|
||||||
|
/* This is the path to the Tidy executable. Setting it to null, disables Tidy.
|
||||||
|
Tidy is used to improve the quality of exported pads*/
|
||||||
|
"tidyHtml" : null,
|
||||||
|
|
||||||
/* Allow import of file types other than the supported types: txt, doc, docx, rtf, odt, html & htm */
|
/* Allow import of file types other than the supported types: txt, doc, docx, rtf, odt, html & htm */
|
||||||
"allowUnknownFileEnds" : true,
|
"allowUnknownFileEnds" : true,
|
||||||
|
|
||||||
|
|
|
@ -28,6 +28,7 @@ var fs = require("fs");
|
||||||
var settings = require('../utils/Settings');
|
var settings = require('../utils/Settings');
|
||||||
var os = require('os');
|
var os = require('os');
|
||||||
var hooks = require("ep_etherpad-lite/static/js/pluginfw/hooks");
|
var hooks = require("ep_etherpad-lite/static/js/pluginfw/hooks");
|
||||||
|
var TidyHtml = require('../utils/TidyHtml');
|
||||||
|
|
||||||
//load abiword only if its enabled
|
//load abiword only if its enabled
|
||||||
if(settings.abiword != null)
|
if(settings.abiword != null)
|
||||||
|
@ -172,12 +173,19 @@ exports.doExport = function(req, res, padId, type)
|
||||||
fs.writeFile(srcFile, html, callback);
|
fs.writeFile(srcFile, html, callback);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
//send the convert job to abiword
|
|
||||||
|
// Tidy up the exported HTML
|
||||||
function(callback)
|
function(callback)
|
||||||
{
|
{
|
||||||
//ensure html can be collected by the garbage collector
|
//ensure html can be collected by the garbage collector
|
||||||
html = null;
|
html = null;
|
||||||
|
|
||||||
|
TidyHtml.tidy(srcFile, callback);
|
||||||
|
},
|
||||||
|
|
||||||
|
//send the convert job to abiword
|
||||||
|
function(callback)
|
||||||
|
{
|
||||||
destFile = tempDirectory + "/etherpad_export_" + randNum + "." + type;
|
destFile = tempDirectory + "/etherpad_export_" + randNum + "." + type;
|
||||||
abiword.convertFile(srcFile, destFile, type, callback);
|
abiword.convertFile(srcFile, destFile, type, callback);
|
||||||
},
|
},
|
||||||
|
|
|
@ -152,6 +152,11 @@ exports.minify = true;
|
||||||
*/
|
*/
|
||||||
exports.abiword = null;
|
exports.abiword = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The path of the tidy executable
|
||||||
|
*/
|
||||||
|
exports.tidyHtml = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Should we support none natively supported file types on import?
|
* Should we support none natively supported file types on import?
|
||||||
*/
|
*/
|
||||||
|
|
41
src/node/utils/TidyHtml.js
Normal file
41
src/node/utils/TidyHtml.js
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
/**
|
||||||
|
* Tidy up the HTML in a given file
|
||||||
|
*/
|
||||||
|
|
||||||
|
var log4js = require('log4js');
|
||||||
|
var settings = require('./Settings');
|
||||||
|
var spawn = require('child_process').spawn;
|
||||||
|
|
||||||
|
exports.tidy = function(srcFile, callback) {
|
||||||
|
var logger = log4js.getLogger('TidyHtml');
|
||||||
|
|
||||||
|
// Don't do anything if Tidy hasn't been enabled
|
||||||
|
if (!settings.tidyHtml) {
|
||||||
|
logger.debug('tidyHtml has not been configured yet, ignoring tidy request');
|
||||||
|
return callback(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
var errMessage = '';
|
||||||
|
|
||||||
|
// Spawn a new tidy instance that cleans up the file inline
|
||||||
|
logger.debug('Tidying ' + srcFile);
|
||||||
|
var tidy = spawn(settings.tidyHtml, ['-modify', srcFile]);
|
||||||
|
|
||||||
|
// Keep track of any error messages
|
||||||
|
tidy.stderr.on('data', function (data) {
|
||||||
|
errMessage += data.toString();
|
||||||
|
});
|
||||||
|
|
||||||
|
// Wait until Tidy is done
|
||||||
|
tidy.on('close', function(code) {
|
||||||
|
// Tidy returns a 0 when no errors occur and a 1 exit code when
|
||||||
|
// the file could be tidied but a few warnings were generated
|
||||||
|
if (code === 0 || code === 1) {
|
||||||
|
logger.debug('Tidied ' + srcFile + ' successfully');
|
||||||
|
return callback(null);
|
||||||
|
} else {
|
||||||
|
logger.error('Failed to tidy ' + srcFile + '\n' + errMessage);
|
||||||
|
return callback('Tidy died with exit code ' + code);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
};
|
63
tests/backend/specs/api/tidy.js
Normal file
63
tests/backend/specs/api/tidy.js
Normal file
|
@ -0,0 +1,63 @@
|
||||||
|
var assert = require('assert')
|
||||||
|
fs = require('fs'),
|
||||||
|
path = require('path'),
|
||||||
|
TidyHtml = null,
|
||||||
|
Settings = null;
|
||||||
|
|
||||||
|
var npm = require("../../../../src/node_modules/npm/lib/npm.js");
|
||||||
|
|
||||||
|
describe('tidyHtml', function() {
|
||||||
|
before(function(done) {
|
||||||
|
npm.load({}, function(err) {
|
||||||
|
assert.ok(!err);
|
||||||
|
TidyHtml = require('../../../../src/node/utils/TidyHtml');
|
||||||
|
Settings = require('../../../../src/node/utils/Settings');
|
||||||
|
return done()
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('Tidies HTML', function(done) {
|
||||||
|
// If the user hasn't configured Tidy, we skip this tests as it's required for this test
|
||||||
|
if (!Settings.tidyHtml) {
|
||||||
|
this.skip();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to tidy up a bad HTML file
|
||||||
|
var tmpDir = process.env.TEMP || "/tmp";
|
||||||
|
var tmpFile = path.join(tmpDir, 'tmp_' + (Math.floor(Math.random() * 1000000)) + '.html')
|
||||||
|
fs.writeFileSync(tmpFile, '<html><body><p>a paragraph</p><li>List without outer UL</li>trailing closing p</p></body></html>');
|
||||||
|
TidyHtml.tidy(tmpFile, function(err){
|
||||||
|
assert.ok(!err);
|
||||||
|
|
||||||
|
// Read the file again
|
||||||
|
var cleanedHtml = fs.readFileSync(tmpFile).toString();
|
||||||
|
|
||||||
|
var expectedHtml = [
|
||||||
|
'<title></title>',
|
||||||
|
'</head>',
|
||||||
|
'<body>',
|
||||||
|
'<p>a paragraph</p>',
|
||||||
|
'<ul>',
|
||||||
|
'<li>List without outer UL</li>',
|
||||||
|
'<li style="list-style: none">trailing closing p</li>',
|
||||||
|
'</ul>',
|
||||||
|
'</body>',
|
||||||
|
'</html>',
|
||||||
|
].join('\n');
|
||||||
|
assert.notStrictEqual(cleanedHtml.indexOf(expectedHtml), -1);
|
||||||
|
return done();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('can deal with errors', function(done) {
|
||||||
|
// If the user hasn't configured Tidy, we skip this tests as it's required for this test
|
||||||
|
if (!Settings.tidyHtml) {
|
||||||
|
this.skip();
|
||||||
|
}
|
||||||
|
|
||||||
|
TidyHtml.tidy('/some/none/existing/file.html', function(err) {
|
||||||
|
assert.ok(err);
|
||||||
|
return done();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
Loading…
Add table
Add a link
Reference in a new issue