Tweaks to 'XPath expression' and 'CSS selector' operations. Closes #13.

This commit is contained in:
n1474335 2016-12-20 18:49:25 +00:00
parent 650fd9a940
commit 39d50093ae
9 changed files with 189 additions and 169 deletions

View file

@ -243,6 +243,8 @@ const Categories = [
"SQL Minify",
"CSS Beautify",
"CSS Minify",
"XPath expression",
"CSS selector",
"Strip HTML tags",
"Diff",
]

View file

@ -1939,38 +1939,38 @@ const OperationConfig = {
]
},
"XPath expression": {
description: "Extract information from an xml document with an XPath query",
run: Extract.run_xpath,
description: "Extract information from an XML document with an XPath query",
run: Code.run_xpath,
input_type: "string",
output_type: "string",
args: [
{
name: "XPath",
type: "string",
value: Extract.XPATH_INITIAL
value: Code.XPATH_INITIAL
},
{
name: "Result delimiter",
type: "binary_short_string",
value: Extract.XPATH_DELIMITER
value: Code.XPATH_DELIMITER
}
]
},
"CSS selector": {
description: "Extract information from an HTML document with an CSS selector",
run: Extract.run_css_query,
description: "Extract information from an HTML document with a CSS selector",
run: Code.run_css_query,
input_type: "string",
output_type: "string",
args: [
{
name: "CSS selector",
type: "string",
value: Extract.SELECTOR_INITIAL
value: Code.CSS_SELECTOR_INITIAL
},
{
name: "Delimiter",
type: "binary_short_string",
value: Extract.CSS_QUERY_DELIMITER
value: Code.CSS_QUERY_DELIMITER
},
]
},

16
src/js/lib/xpath.js Normal file → Executable file
View file

@ -1,11 +1,11 @@
(function(){/*
* XPath.js - Pure JavaScript implementation of XPath 2.0 parser and evaluator
*
* Copyright (c) 2012 Sergey Ilinsky
* Dual licensed under the MIT and GPL licenses.
*
*
*/
/** @license
========================================================================
XPath.js - Pure JavaScript implementation of XPath 2.0 parser and evaluator
Copyright (c) 2012 Sergey Ilinsky
Dual licensed under the MIT and GPL licenses.
*/
(function(){
// Javascript objects
var cString = window.String,

View file

@ -1,4 +1,4 @@
/* globals prettyPrintOne, vkbeautify */
/* globals prettyPrintOne, vkbeautify, xpath */
/**
* Code operations.
@ -304,4 +304,119 @@ var Code = {
}
},
/**
* @constant
* @default
*/
XPATH_INITIAL: "",
/**
* @constant
* @default
*/
XPATH_DELIMITER: "\\n",
/**
* XPath expression operation.
*
* @author Mikescher (https://github.com/Mikescher | https://mikescher.com)
* @param {string} input
* @param {Object[]} args
* @returns {string}
*/
run_xpath:function(input, args) {
const query = args[0],
delimiter = args[1];
var xml;
try {
xml = $.parseXML(input);
} catch (err) {
return "Invalid input XML.";
}
var result;
try {
result = xpath.evaluate(xml, query);
} catch (err) {
return "Invalid XPath. Details:\n" + err.message;
}
const serializer = new XMLSerializer();
const node_to_string = function(node) {
switch (node.nodeType) {
case Node.ELEMENT_NODE: return serializer.serializeToString(node);
case Node.ATTRIBUTE_NODE: return node.value;
case Node.COMMENT_NODE: return node.data;
case Node.DOCUMENT_NODE: return serializer.serializeToString(node);
default: throw new Error("Unknown Node Type: " + node.nodeType);
}
};
return Object.keys(result).map(function(key) {
return result[key];
}).slice(0, -1) // all values except last (length)
.map(node_to_string)
.join(delimiter);
},
/**
* @constant
* @default
*/
CSS_SELECTOR_INITIAL: "",
/**
* @constant
* @default
*/
CSS_QUERY_DELIMITER: "\\n",
/**
* CSS selector operation.
*
* @author Mikescher (https://github.com/Mikescher | https://mikescher.com)
* @param {string} input
* @param {Object[]} args
* @returns {string}
*/
run_css_query: function(input, args) {
const query = args[0],
delimiter = args[1];
var html;
try {
html = $.parseHTML(input);
} catch (err) {
return "Invalid input HTML.";
}
var result;
try {
result = $(html).find(query);
} catch (err) {
return "Invalid CSS Selector. Details:\n" + err.message;
}
const node_to_string = function(node) {
switch (node.nodeType) {
case Node.ELEMENT_NODE: return node.outerHTML;
case Node.ATTRIBUTE_NODE: return node.value;
case Node.COMMENT_NODE: return node.data;
case Node.TEXT_NODE: return node.wholeText;
case Node.DOCUMENT_NODE: return node.outerHTML;
default: throw new Error("Unknown Node Type: " + node.nodeType);
}
};
return Array.apply(null, Array(result.length))
.map(function(_, i) {
return result[i];
})
.map(node_to_string)
.join(delimiter);
},
};

View file

@ -1,5 +1,3 @@
/* globals xpath */
/**
* Identifier extraction operations.
*
@ -296,117 +294,4 @@ var Extract = {
return output;
},
/**
* @constant
* @default
*/
XPATH_INITIAL: "",
/**
* @constant
* @default
*/
XPATH_DELIMITER: "\\n",
/**
* Extract information (from an xml document) with an XPath query
*
* @author Mikescher (https://github.com/Mikescher | https://mikescher.com)
*
* @param {string} input
* @param {Object[]} args
* @returns {string}
*/
run_xpath:function(input, args) {
const query = args[0];
const delimiter = args[1];
var xml;
try {
xml = $.parseXML(input);
} catch (err) {
return "Invalid input XML.";
}
var result;
try {
result = xpath.evaluate(xml, query);
} catch (err) {
return "Invalid XPath. Details:\n" + err.message;
}
const serializer = new XMLSerializer();
const nodeToString = function(node) {
switch (node.nodeType) {
case Node.ELEMENT_NODE: return serializer.serializeToString(node);
case Node.ATTRIBUTE_NODE: return node.value;
case Node.COMMENT_NODE: return node.data;
case Node.DOCUMENT_NODE: return serializer.serializeToString(node);
default: throw new Error("Unknown Node Type: " + node.nodeType);
}
};
return Object.values(result).slice(0, -1) // all values except last (length)
.map(nodeToString)
.join(delimiter);
},
/**
* @constant
* @default
*/
SELECTOR_INITIAL: "",
/**
* @constant
* @default
*/
CSS_QUERY_DELIMITER: "\\n",
/**
* Extract information (from an hmtl document) with an css selector
*
* @author Mikescher (https://github.com/Mikescher | https://mikescher.com)
*
* @param {string} input
* @param {Object[]} args
* @returns {string}
*/
run_css_query: function(input, args) {
const query = args[0];
const delimiter = args[1];
var html;
try {
html = $.parseHTML(input);
} catch (err) {
return "Invalid input HTML.";
}
var result;
try {
result = $(html).find(query);
} catch (err) {
return "Invalid CSS Selector. Details:\n" + err.message;
}
const nodeToString = function(node) {
switch (node.nodeType) {
case Node.ELEMENT_NODE: return node.outerHTML;
case Node.ATTRIBUTE_NODE: return node.value;
case Node.COMMENT_NODE: return node.data;
case Node.TEXT_NODE: return node.wholeText;
case Node.DOCUMENT_NODE: return node.outerHTML;
default: throw new Error("Unknown Node Type: " + node.nodeType);
}
};
return Array.apply(null, Array(result.length))
.map(function(_, i) {
return result[i];
})
.map(nodeToString)
.join(delimiter);
},
};