added innertHTML/attributes/node options to xpath/css filters
This commit is contained in:
parent
306b41a11f
commit
102fce7c17
4 changed files with 172 additions and 42 deletions
70
scraping.go
70
scraping.go
|
@ -338,6 +338,10 @@ func getURLContent(filter *Filter, fetchURL string) (string, error) {
|
|||
}
|
||||
|
||||
func getFilterResultXPath(filter *Filter) {
|
||||
selectType := "node"
|
||||
if filter.Var2 != nil {
|
||||
selectType = *filter.Var2
|
||||
}
|
||||
for _, parent := range filter.Parents {
|
||||
for _, result := range parent.Results {
|
||||
doc, err := htmlquery.Parse(strings.NewReader(result))
|
||||
|
@ -347,9 +351,40 @@ func getFilterResultXPath(filter *Filter) {
|
|||
}
|
||||
nodes, _ := htmlquery.QueryAll(doc, filter.Var1)
|
||||
for _, node := range nodes {
|
||||
switch selectType {
|
||||
case "inner":
|
||||
{
|
||||
// if the child is a text node, theres nothing else (?), so just append that
|
||||
if node.FirstChild != nil && node.FirstChild.Type == html.TextNode {
|
||||
filter.Results = append(filter.Results, html.UnescapeString(node.FirstChild.Data))
|
||||
continue
|
||||
}
|
||||
// else, theres more nodes, turn them all into a string and add that as a result
|
||||
var result bytes.Buffer
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
var b bytes.Buffer
|
||||
html.Render(&b, node)
|
||||
result.WriteString(b.String())
|
||||
}
|
||||
filter.Results = append(filter.Results, html.UnescapeString(result.String()))
|
||||
break
|
||||
}
|
||||
case "attr":
|
||||
{
|
||||
for _, attr := range node.Attr {
|
||||
result := fmt.Sprintf("%s=\"%s\"", attr.Key, attr.Val)
|
||||
filter.Results = append(filter.Results, html.UnescapeString(result))
|
||||
}
|
||||
break
|
||||
}
|
||||
default:
|
||||
{
|
||||
var b bytes.Buffer
|
||||
html.Render(&b, node)
|
||||
filter.Results = append(filter.Results, html.UnescapeString(b.String()))
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -366,6 +401,10 @@ func getFilterResultJSON(filter *Filter) {
|
|||
}
|
||||
|
||||
func getFilterResultCSS(filter *Filter) {
|
||||
selectType := "node"
|
||||
if filter.Var2 != nil {
|
||||
selectType = *filter.Var2
|
||||
}
|
||||
for _, parent := range filter.Parents {
|
||||
for _, result := range parent.Results {
|
||||
doc, err := html.Parse(strings.NewReader(result))
|
||||
|
@ -379,9 +418,40 @@ func getFilterResultCSS(filter *Filter) {
|
|||
continue
|
||||
}
|
||||
for _, node := range cascadia.QueryAll(doc, sel) {
|
||||
switch selectType {
|
||||
case "inner":
|
||||
{
|
||||
// if the child is a text node, theres nothing else (?), so just append that
|
||||
if node.FirstChild != nil && node.FirstChild.Type == html.TextNode {
|
||||
filter.Results = append(filter.Results, html.UnescapeString(node.FirstChild.Data))
|
||||
continue
|
||||
}
|
||||
// else, theres more nodes, turn them all into a string and add that as a result
|
||||
var result bytes.Buffer
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
var b bytes.Buffer
|
||||
html.Render(&b, node)
|
||||
result.WriteString(b.String())
|
||||
}
|
||||
filter.Results = append(filter.Results, html.UnescapeString(result.String()))
|
||||
break
|
||||
}
|
||||
case "attr":
|
||||
{
|
||||
for _, attr := range node.Attr {
|
||||
result := fmt.Sprintf("%s=\"%s\"", attr.Key, attr.Val)
|
||||
filter.Results = append(filter.Results, html.UnescapeString(result))
|
||||
}
|
||||
break
|
||||
}
|
||||
default:
|
||||
{
|
||||
var b bytes.Buffer
|
||||
html.Render(&b, node)
|
||||
filter.Results = append(filter.Results, html.UnescapeString(b.String()))
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -123,15 +123,30 @@ function onTypeChange(node) {
|
|||
var1Label.innerHTML = "XPath";
|
||||
var1Input.placeholder = "//a[@class='price]";
|
||||
var1Div.appendChild(var1Input);
|
||||
var var2Input = document.createElement("input");
|
||||
var2Input.name = "var2";
|
||||
var2Input.id = "var2Input";
|
||||
var2Input.value = var2Value;
|
||||
var2Input.classList.add("form-control");
|
||||
var2Input.disabled = true;
|
||||
var2Input.placeholder = "";
|
||||
var2Label.innerHTML = "-";
|
||||
var2Div.appendChild(var2Input);
|
||||
var select_1 = document.createElement("select");
|
||||
select_1.name = "var2";
|
||||
select_1.id = "var2Input";
|
||||
select_1.classList.add("form-control");
|
||||
var innerHTML = document.createElement("option");
|
||||
innerHTML.value = "inner";
|
||||
innerHTML.innerHTML = "innerHTML";
|
||||
select_1.appendChild(innerHTML);
|
||||
var attributes = document.createElement("option");
|
||||
attributes.value = "attr";
|
||||
attributes.innerHTML = "Attributes";
|
||||
select_1.appendChild(attributes);
|
||||
var node_1 = document.createElement("option");
|
||||
node_1.value = "node";
|
||||
node_1.innerHTML = "Node";
|
||||
select_1.appendChild(node_1);
|
||||
var2Div.appendChild(select_1);
|
||||
var2Label.innerHTML = "Select";
|
||||
if (var2Value == "") {
|
||||
select_1.value = "inner";
|
||||
}
|
||||
else {
|
||||
select_1.value = var2Value;
|
||||
}
|
||||
var var3Input = document.createElement("input");
|
||||
var3Input.name = "var3";
|
||||
var3Input.id = "var3Input";
|
||||
|
@ -179,14 +194,30 @@ function onTypeChange(node) {
|
|||
var1Label.innerHTML = "Selector";
|
||||
var1Input.placeholder = ".price";
|
||||
var1Div.appendChild(var1Input);
|
||||
var var2Input = document.createElement("input");
|
||||
var2Input.name = "var2";
|
||||
var2Input.id = "var2Input";
|
||||
var2Input.value = var2Value;
|
||||
var2Input.classList.add("form-control");
|
||||
var2Input.disabled = true;
|
||||
var2Label.innerHTML = "-";
|
||||
var2Div.appendChild(var2Input);
|
||||
var select_2 = document.createElement("select");
|
||||
select_2.name = "var2";
|
||||
select_2.id = "var2Input";
|
||||
select_2.classList.add("form-control");
|
||||
var innerHTML = document.createElement("option");
|
||||
innerHTML.value = "inner";
|
||||
innerHTML.innerHTML = "innerHTML";
|
||||
select_2.appendChild(innerHTML);
|
||||
var attributes = document.createElement("option");
|
||||
attributes.value = "attr";
|
||||
attributes.innerHTML = "Attributes";
|
||||
select_2.appendChild(attributes);
|
||||
var node_2 = document.createElement("option");
|
||||
node_2.value = "node";
|
||||
node_2.innerHTML = "Node";
|
||||
select_2.appendChild(node_2);
|
||||
var2Div.appendChild(select_2);
|
||||
var2Label.innerHTML = "Select";
|
||||
if (var2Value == "") {
|
||||
select_2.value = "inner";
|
||||
}
|
||||
else {
|
||||
select_2.value = var2Value;
|
||||
}
|
||||
var var3Input = document.createElement("input");
|
||||
var3Input.name = "var3";
|
||||
var3Input.id = "var3Input";
|
||||
|
@ -748,13 +779,13 @@ function onConditionChange(node) {
|
|||
filterSelect.classList.add("form-control");
|
||||
try {
|
||||
for (var _b = __values(_diagram.nodes.values()), _c = _b.next(); !_c.done; _c = _b.next()) {
|
||||
var node_1 = _c.value;
|
||||
if (node_1.type != "store") {
|
||||
var node_3 = _c.value;
|
||||
if (node_3.type != "store") {
|
||||
continue;
|
||||
}
|
||||
var nodeOption = document.createElement("option");
|
||||
nodeOption.value = node_1.label;
|
||||
nodeOption.innerHTML = node_1.label;
|
||||
nodeOption.value = node_3.label;
|
||||
nodeOption.innerHTML = node_3.label;
|
||||
filterSelect.appendChild(nodeOption);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -100,15 +100,29 @@ function onTypeChange(node: DiagramNode | null = null){
|
|||
var1Input.placeholder = "//a[@class='price]";
|
||||
var1Div.appendChild(var1Input);
|
||||
|
||||
let var2Input = document.createElement("input");
|
||||
var2Input.name = "var2";
|
||||
var2Input.id = "var2Input";
|
||||
var2Input.value = var2Value;
|
||||
var2Input.classList.add("form-control")
|
||||
var2Input.disabled = true;
|
||||
var2Input.placeholder = "";
|
||||
var2Label.innerHTML = "-";
|
||||
var2Div.appendChild(var2Input);
|
||||
let select = document.createElement("select");
|
||||
select.name = "var2";
|
||||
select.id = "var2Input";
|
||||
select.classList.add("form-control");
|
||||
let innerHTML = document.createElement("option");
|
||||
innerHTML.value = "inner"
|
||||
innerHTML.innerHTML = "innerHTML";
|
||||
select.appendChild(innerHTML);
|
||||
let attributes = document.createElement("option");
|
||||
attributes.value = "attr"
|
||||
attributes.innerHTML = "Attributes";
|
||||
select.appendChild(attributes);
|
||||
let node = document.createElement("option");
|
||||
node.value = "node"
|
||||
node.innerHTML = "Node";
|
||||
select.appendChild(node);
|
||||
var2Div.appendChild(select);
|
||||
var2Label.innerHTML = "Select";
|
||||
if (var2Value == ""){
|
||||
select.value = "inner";
|
||||
} else {
|
||||
select.value = var2Value;
|
||||
}
|
||||
|
||||
let var3Input = document.createElement("input");
|
||||
var3Input.name = "var3";
|
||||
|
@ -160,14 +174,29 @@ function onTypeChange(node: DiagramNode | null = null){
|
|||
var1Input.placeholder = ".price";
|
||||
var1Div.appendChild(var1Input);
|
||||
|
||||
let var2Input = document.createElement("input");
|
||||
var2Input.name = "var2";
|
||||
var2Input.id = "var2Input";
|
||||
var2Input.value = var2Value;
|
||||
var2Input.classList.add("form-control")
|
||||
var2Input.disabled = true;
|
||||
var2Label.innerHTML = "-";
|
||||
var2Div.appendChild(var2Input);
|
||||
let select = document.createElement("select");
|
||||
select.name = "var2";
|
||||
select.id = "var2Input";
|
||||
select.classList.add("form-control");
|
||||
let innerHTML = document.createElement("option");
|
||||
innerHTML.value = "inner"
|
||||
innerHTML.innerHTML = "innerHTML";
|
||||
select.appendChild(innerHTML);
|
||||
let attributes = document.createElement("option");
|
||||
attributes.value = "attr"
|
||||
attributes.innerHTML = "Attributes";
|
||||
select.appendChild(attributes);
|
||||
let node = document.createElement("option");
|
||||
node.value = "node"
|
||||
node.innerHTML = "Node";
|
||||
select.appendChild(node);
|
||||
var2Div.appendChild(select);
|
||||
var2Label.innerHTML = "Select";
|
||||
if (var2Value == ""){
|
||||
select.value = "inner";
|
||||
} else {
|
||||
select.value = var2Value;
|
||||
}
|
||||
|
||||
let var3Input = document.createElement("input");
|
||||
var3Input.name = "var3";
|
||||
|
|
8
todo.md
8
todo.md
|
@ -9,8 +9,8 @@
|
|||
- json
|
||||
- add index to docs/compose to fix link in pages
|
||||
- safe escape {{ }} for pages
|
||||
- xpath/css innerthtml option?
|
||||
- inner
|
||||
- attributes
|
||||
- node
|
||||
- ~~xpath/css innerthtml option?~~
|
||||
- ~~inner~~
|
||||
- ~~attributes~~
|
||||
- ~~node~~
|
||||
- tests for all of it
|
Loading…
Add table
Reference in a new issue