mirror of
https://github.com/ether/etherpad-lite.git
synced 2025-04-21 16:06:16 -04:00
begin support for better txt output
This commit is contained in:
parent
ab04a4e511
commit
d16b15f3eb
2 changed files with 543 additions and 12 deletions
|
@ -20,6 +20,7 @@
|
|||
|
||||
var ERR = require("async-stacktrace");
|
||||
var exporthtml = require("../utils/ExportHtml");
|
||||
var exporttxt = require("../utils/ExportTxt");
|
||||
var exportdokuwiki = require("../utils/ExportDokuWiki");
|
||||
var padManager = require("../db/PadManager");
|
||||
var async = require("async");
|
||||
|
@ -48,22 +49,75 @@ exports.doExport = function(req, res, padId, type)
|
|||
res.attachment(padId + "." + type);
|
||||
|
||||
//if this is a plain text export, we can do this directly
|
||||
// We have to over engineer this because tabs are stored as attributes and not plain text
|
||||
|
||||
if(type == "txt")
|
||||
{
|
||||
padManager.getPad(padId, function(err, pad)
|
||||
{
|
||||
ERR(err);
|
||||
if(req.params.rev){
|
||||
pad.getInternalRevisionAText(req.params.rev, function(junk, text)
|
||||
{
|
||||
res.send(text.text ? text.text : null);
|
||||
});
|
||||
}
|
||||
else
|
||||
var txt;
|
||||
var randNum;
|
||||
var srcFile, destFile;
|
||||
|
||||
async.series([
|
||||
//render the txt document
|
||||
function(callback)
|
||||
{
|
||||
res.send(pad.text());
|
||||
exporttxt.getPadTXTDocument(padId, req.params.rev, false, function(err, _txt)
|
||||
{
|
||||
if(ERR(err, callback)) return;
|
||||
txt = _txt;
|
||||
callback();
|
||||
});
|
||||
},
|
||||
//decide what to do with the txt export
|
||||
function(callback)
|
||||
{
|
||||
//if this is a txt export, we can send this from here directly
|
||||
res.send(txt);
|
||||
callback("stop");
|
||||
},
|
||||
//send the convert job to abiword
|
||||
function(callback)
|
||||
{
|
||||
//ensure html can be collected by the garbage collector
|
||||
txt = null;
|
||||
|
||||
destFile = tempDirectory + "/eplite_export_" + randNum + "." + type;
|
||||
abiword.convertFile(srcFile, destFile, type, callback);
|
||||
},
|
||||
//send the file
|
||||
function(callback)
|
||||
{
|
||||
res.sendfile(destFile, null, callback);
|
||||
},
|
||||
//clean up temporary files
|
||||
function(callback)
|
||||
{
|
||||
async.parallel([
|
||||
function(callback)
|
||||
{
|
||||
fs.unlink(srcFile, callback);
|
||||
},
|
||||
function(callback)
|
||||
{
|
||||
//100ms delay to accomidate for slow windows fs
|
||||
if(os.type().indexOf("Windows") > -1)
|
||||
{
|
||||
setTimeout(function()
|
||||
{
|
||||
fs.unlink(destFile, callback);
|
||||
}, 100);
|
||||
}
|
||||
else
|
||||
{
|
||||
fs.unlink(destFile, callback);
|
||||
}
|
||||
}
|
||||
], callback);
|
||||
}
|
||||
});
|
||||
], function(err)
|
||||
{
|
||||
if(err && err != "stop") ERR(err);
|
||||
})
|
||||
}
|
||||
else if(type == 'dokuwiki')
|
||||
{
|
||||
|
|
477
src/node/utils/ExportTxt.js
Normal file
477
src/node/utils/ExportTxt.js
Normal file
|
@ -0,0 +1,477 @@
|
|||
/**
|
||||
* Copyright 2009 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS-IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
var async = require("async");
|
||||
var Changeset = require("ep_etherpad-lite/static/js/Changeset");
|
||||
var padManager = require("../db/PadManager");
|
||||
var ERR = require("async-stacktrace");
|
||||
var Security = require('ep_etherpad-lite/static/js/security');
|
||||
var hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks');
|
||||
function getPadPlainText(pad, revNum)
|
||||
{
|
||||
var atext = ((revNum !== undefined) ? pad.getInternalRevisionAText(revNum) : pad.atext());
|
||||
var textLines = atext.text.slice(0, -1).split('\n');
|
||||
var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text);
|
||||
var apool = pad.pool();
|
||||
|
||||
var pieces = [];
|
||||
for (var i = 0; i < textLines.length; i++)
|
||||
{
|
||||
var line = _analyzeLine(textLines[i], attribLines[i], apool);
|
||||
if (line.listLevel)
|
||||
{
|
||||
var numSpaces = line.listLevel * 2 - 1;
|
||||
var bullet = '*';
|
||||
pieces.push(new Array(numSpaces + 1).join(' '), bullet, ' ', line.text, '\n');
|
||||
}
|
||||
else
|
||||
{
|
||||
pieces.push(line.text, '\n');
|
||||
}
|
||||
}
|
||||
|
||||
return pieces.join('');
|
||||
}
|
||||
|
||||
function getPadTXT(pad, revNum, callback)
|
||||
{
|
||||
var atext = pad.atext;
|
||||
var html;
|
||||
async.waterfall([
|
||||
// fetch revision atext
|
||||
|
||||
|
||||
function (callback)
|
||||
{
|
||||
if (revNum != undefined)
|
||||
{
|
||||
pad.getInternalRevisionAText(revNum, function (err, revisionAtext)
|
||||
{
|
||||
if(ERR(err, callback)) return;
|
||||
atext = revisionAtext;
|
||||
callback();
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
callback(null);
|
||||
}
|
||||
},
|
||||
|
||||
// convert atext to html
|
||||
|
||||
|
||||
function (callback)
|
||||
{
|
||||
html = getTXTFromAtext(pad, atext);
|
||||
callback(null);
|
||||
}],
|
||||
// run final callback
|
||||
|
||||
|
||||
function (err)
|
||||
{
|
||||
if(ERR(err, callback)) return;
|
||||
callback(null, html);
|
||||
});
|
||||
}
|
||||
|
||||
exports.getPadTXT = getPadTXT;
|
||||
exports.getTXTFromAtext = getTXTFromAtext;
|
||||
|
||||
function getTXTFromAtext(pad, atext, authorColors)
|
||||
{
|
||||
var apool = pad.apool();
|
||||
var textLines = atext.text.slice(0, -1).split('\n');
|
||||
var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text);
|
||||
|
||||
var tags = ['h1', 'h2', 'strong', 'em', 'u', 's'];
|
||||
var props = ['heading1', 'heading2', 'bold', 'italic', 'underline', 'strikethrough'];
|
||||
var anumMap = {};
|
||||
var css = "";
|
||||
|
||||
props.forEach(function (propName, i)
|
||||
{
|
||||
var propTrueNum = apool.putAttrib([propName, true], true);
|
||||
if (propTrueNum >= 0)
|
||||
{
|
||||
anumMap[propTrueNum] = i;
|
||||
}
|
||||
});
|
||||
|
||||
function getLineTXT(text, attribs)
|
||||
{
|
||||
var propVals = [false, false, false];
|
||||
var ENTER = 1;
|
||||
var STAY = 2;
|
||||
var LEAVE = 0;
|
||||
|
||||
// Use order of tags (b/i/u) as order of nesting, for simplicity
|
||||
// and decent nesting. For example,
|
||||
// <b>Just bold<b> <b><i>Bold and italics</i></b> <i>Just italics</i>
|
||||
// becomes
|
||||
// <b>Just bold <i>Bold and italics</i></b> <i>Just italics</i>
|
||||
var taker = Changeset.stringIterator(text);
|
||||
var assem = Changeset.stringAssembler();
|
||||
var openTags = [];
|
||||
|
||||
var urls = _findURLs(text);
|
||||
|
||||
var idx = 0;
|
||||
|
||||
function processNextChars(numChars)
|
||||
{
|
||||
if (numChars <= 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var iter = Changeset.opIterator(Changeset.subattribution(attribs, idx, idx + numChars));
|
||||
idx += numChars;
|
||||
|
||||
while (iter.hasNext())
|
||||
{
|
||||
var o = iter.next();
|
||||
var propChanged = false;
|
||||
Changeset.eachAttribNumber(o.attribs, function (a)
|
||||
{
|
||||
if (a in anumMap)
|
||||
{
|
||||
var i = anumMap[a]; // i = 0 => bold, etc.
|
||||
if (!propVals[i])
|
||||
{
|
||||
propVals[i] = ENTER;
|
||||
propChanged = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
propVals[i] = STAY;
|
||||
}
|
||||
}
|
||||
});
|
||||
for (var i = 0; i < propVals.length; i++)
|
||||
{
|
||||
if (propVals[i] === true)
|
||||
{
|
||||
propVals[i] = LEAVE;
|
||||
propChanged = true;
|
||||
}
|
||||
else if (propVals[i] === STAY)
|
||||
{
|
||||
propVals[i] = true; // set it back
|
||||
}
|
||||
}
|
||||
// now each member of propVal is in {false,LEAVE,ENTER,true}
|
||||
// according to what happens at start of span
|
||||
if (propChanged)
|
||||
{
|
||||
// leaving bold (e.g.) also leaves italics, etc.
|
||||
var left = false;
|
||||
for (var i = 0; i < propVals.length; i++)
|
||||
{
|
||||
var v = propVals[i];
|
||||
if (!left)
|
||||
{
|
||||
if (v === LEAVE)
|
||||
{
|
||||
left = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (v === true)
|
||||
{
|
||||
propVals[i] = STAY; // tag will be closed and re-opened
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var tags2close = [];
|
||||
|
||||
for (var i = propVals.length - 1; i >= 0; i--)
|
||||
{
|
||||
if (propVals[i] === LEAVE)
|
||||
{
|
||||
//emitCloseTag(i);
|
||||
tags2close.push(i);
|
||||
propVals[i] = false;
|
||||
}
|
||||
else if (propVals[i] === STAY)
|
||||
{
|
||||
//emitCloseTag(i);
|
||||
tags2close.push(i);
|
||||
}
|
||||
}
|
||||
|
||||
for (var i = 0; i < propVals.length; i++)
|
||||
{
|
||||
if (propVals[i] === ENTER || propVals[i] === STAY)
|
||||
{
|
||||
emitOpenTag(i);
|
||||
propVals[i] = true;
|
||||
}
|
||||
}
|
||||
// propVals is now all {true,false} again
|
||||
} // end if (propChanged)
|
||||
var chars = o.chars;
|
||||
if (o.lines)
|
||||
{
|
||||
chars--; // exclude newline at end of line, if present
|
||||
}
|
||||
|
||||
var s = taker.take(chars);
|
||||
|
||||
//removes the characters with the code 12. Don't know where they come
|
||||
//from but they break the abiword parser and are completly useless
|
||||
s = s.replace(String.fromCharCode(12), "");
|
||||
|
||||
assem.append(_encodeWhitespace(Security.escapeHTML(s)));
|
||||
} // end iteration over spans in line
|
||||
|
||||
var tags2close = [];
|
||||
for (var i = propVals.length - 1; i >= 0; i--)
|
||||
{
|
||||
if (propVals[i])
|
||||
{
|
||||
tags2close.push(i);
|
||||
propVals[i] = false;
|
||||
}
|
||||
}
|
||||
|
||||
} // end processNextChars
|
||||
if (urls)
|
||||
{
|
||||
urls.forEach(function (urlData)
|
||||
{
|
||||
var startIndex = urlData[0];
|
||||
var url = urlData[1];
|
||||
var urlLength = url.length;
|
||||
processNextChars(startIndex - idx);
|
||||
console.warn(url);
|
||||
// assem.append('<a href="' + Security.escapeHTMLAttribute(url) + '">');
|
||||
assem.append(url);
|
||||
processNextChars(urlLength);
|
||||
// assem.append('</a>');
|
||||
});
|
||||
}
|
||||
processNextChars(text.length - idx);
|
||||
|
||||
return _processSpaces(assem.toString());
|
||||
} // end getLineHTML
|
||||
var pieces = [css];
|
||||
|
||||
// Need to deal with constraints imposed on HTML lists; can
|
||||
// only gain one level of nesting at once, can't change type
|
||||
// mid-list, etc.
|
||||
// People might use weird indenting, e.g. skip a level,
|
||||
// so we want to do something reasonable there. We also
|
||||
// want to deal gracefully with blank lines.
|
||||
// => keeps track of the parents level of indentation
|
||||
var lists = []; // e.g. [[1,'bullet'], [3,'bullet'], ...]
|
||||
for (var i = 0; i < textLines.length; i++)
|
||||
{
|
||||
var line = _analyzeLine(textLines[i], attribLines[i], apool);
|
||||
var lineContent = getLineTXT(line.text, line.aline);
|
||||
if(line.listTypeName == "bullet"){
|
||||
lineContent = "* " + lineContent; // add a bullet
|
||||
}
|
||||
if(line.listLevel > 0){
|
||||
for (var j = line.listLevel - 1; j >= 0; j--){
|
||||
pieces.push('\t');
|
||||
}
|
||||
if(line.listTypeName == "number"){
|
||||
pieces.push(line.listLevel + ". ");
|
||||
// This is bad because it doesn't truly reflect what the user
|
||||
// sees because browsers do magic on nested <ol><li>s
|
||||
}
|
||||
pieces.push(lineContent, '\n');
|
||||
}else{
|
||||
console.warn(line);
|
||||
pieces.push(lineContent, '\n');
|
||||
}
|
||||
|
||||
// I'm not too keen about using teh HTML export filters here, they could cause real pain in the future
|
||||
// I'd suggest supporting getLineTXTForExport
|
||||
var lineContentFromHook = hooks.callAllStr("getLineHTMLForExport",
|
||||
{
|
||||
line: line,
|
||||
apool: apool,
|
||||
attribLine: attribLines[i],
|
||||
text: textLines[i]
|
||||
}, " ", " ", "");
|
||||
if (lineContentFromHook)
|
||||
{
|
||||
pieces.push(lineContentFromHook, '');
|
||||
}
|
||||
else
|
||||
{
|
||||
// pieces.push(lineContent, '\n');
|
||||
}
|
||||
}
|
||||
|
||||
return pieces.join('');
|
||||
}
|
||||
|
||||
function _analyzeLine(text, aline, apool)
|
||||
{
|
||||
var line = {};
|
||||
|
||||
// identify list
|
||||
var lineMarker = 0;
|
||||
line.listLevel = 0;
|
||||
if (aline)
|
||||
{
|
||||
var opIter = Changeset.opIterator(aline);
|
||||
if (opIter.hasNext())
|
||||
{
|
||||
var listType = Changeset.opAttributeValue(opIter.next(), 'list', apool);
|
||||
if (listType)
|
||||
{
|
||||
lineMarker = 1;
|
||||
listType = /([a-z]+)([12345678])/.exec(listType);
|
||||
if (listType)
|
||||
{
|
||||
line.listTypeName = listType[1];
|
||||
line.listLevel = Number(listType[2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (lineMarker)
|
||||
{
|
||||
line.text = text.substring(1);
|
||||
line.aline = Changeset.subattribution(aline, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
line.text = text;
|
||||
line.aline = aline;
|
||||
}
|
||||
|
||||
return line;
|
||||
}
|
||||
|
||||
exports.getPadTXTDocument = function (padId, revNum, noDocType, callback)
|
||||
{
|
||||
padManager.getPad(padId, function (err, pad)
|
||||
{
|
||||
if(ERR(err, callback)) return;
|
||||
|
||||
getPadTXT(pad, revNum, function (err, html)
|
||||
{
|
||||
if(ERR(err, callback)) return;
|
||||
callback(null, html);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function _encodeWhitespace(s) {
|
||||
return s.replace(/[^\x21-\x7E\s\t\n\r]/g, function(c)
|
||||
{
|
||||
return "&#" +c.charCodeAt(0) + ";"
|
||||
});
|
||||
}
|
||||
|
||||
// copied from ACE
|
||||
function _processSpaces(s)
|
||||
{
|
||||
var doesWrap = true;
|
||||
if (s.indexOf("<") < 0 && !doesWrap)
|
||||
{
|
||||
// short-cut
|
||||
return s.replace(/ /g, ' ');
|
||||
}
|
||||
var parts = [];
|
||||
s.replace(/<[^>]*>?| |[^ <]+/g, function (m)
|
||||
{
|
||||
parts.push(m);
|
||||
});
|
||||
if (doesWrap)
|
||||
{
|
||||
var endOfLine = true;
|
||||
var beforeSpace = false;
|
||||
// last space in a run is normal, others are nbsp,
|
||||
// end of line is nbsp
|
||||
for (var i = parts.length - 1; i >= 0; i--)
|
||||
{
|
||||
var p = parts[i];
|
||||
if (p == " ")
|
||||
{
|
||||
if (endOfLine || beforeSpace) parts[i] = ' ';
|
||||
endOfLine = false;
|
||||
beforeSpace = true;
|
||||
}
|
||||
else if (p.charAt(0) != "<")
|
||||
{
|
||||
endOfLine = false;
|
||||
beforeSpace = false;
|
||||
}
|
||||
}
|
||||
// beginning of line is nbsp
|
||||
for (var i = 0; i < parts.length; i++)
|
||||
{
|
||||
var p = parts[i];
|
||||
if (p == " ")
|
||||
{
|
||||
parts[i] = ' ';
|
||||
break;
|
||||
}
|
||||
else if (p.charAt(0) != "<")
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var i = 0; i < parts.length; i++)
|
||||
{
|
||||
var p = parts[i];
|
||||
if (p == " ")
|
||||
{
|
||||
parts[i] = ' ';
|
||||
}
|
||||
}
|
||||
}
|
||||
return parts.join('');
|
||||
}
|
||||
|
||||
|
||||
// copied from ACE
|
||||
var _REGEX_WORDCHAR = /[\u0030-\u0039\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u1FFF\u3040-\u9FFF\uF900-\uFDFF\uFE70-\uFEFE\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFDC]/;
|
||||
var _REGEX_SPACE = /\s/;
|
||||
var _REGEX_URLCHAR = new RegExp('(' + /[-:@a-zA-Z0-9_.,~%+\/\\?=&#;()$]/.source + '|' + _REGEX_WORDCHAR.source + ')');
|
||||
var _REGEX_URL = new RegExp(/(?:(?:https?|s?ftp|ftps|file|smb|afp|nfs|(x-)?man|gopher|txmt):\/\/|mailto:)/.source + _REGEX_URLCHAR.source + '*(?![:.,;])' + _REGEX_URLCHAR.source, 'g');
|
||||
|
||||
// returns null if no URLs, or [[startIndex1, url1], [startIndex2, url2], ...]
|
||||
|
||||
function _findURLs(text)
|
||||
{
|
||||
_REGEX_URL.lastIndex = 0;
|
||||
var urls = null;
|
||||
var execResult;
|
||||
while ((execResult = _REGEX_URL.exec(text)))
|
||||
{
|
||||
urls = (urls || []);
|
||||
var startIndex = execResult.index;
|
||||
var url = execResult[0];
|
||||
urls.push([startIndex, url]);
|
||||
}
|
||||
|
||||
return urls;
|
||||
}
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue