diff --git a/scraping.go b/scraping.go index 4ceda6a..fe9c254 100644 --- a/scraping.go +++ b/scraping.go @@ -338,6 +338,10 @@ func getURLContent(filter *Filter, fetchURL string) (string, error) { } func getFilterResultXPath(filter *Filter) { + selectType := "node" + if filter.Var2 != nil { + selectType = *filter.Var2 + } for _, parent := range filter.Parents { for _, result := range parent.Results { doc, err := htmlquery.Parse(strings.NewReader(result)) @@ -347,9 +351,40 @@ func getFilterResultXPath(filter *Filter) { } nodes, _ := htmlquery.QueryAll(doc, filter.Var1) for _, node := range nodes { + switch selectType { + case "inner": + { + // if the child is a text node, theres nothing else (?), so just append that + if node.FirstChild != nil && node.FirstChild.Type == html.TextNode { + filter.Results = append(filter.Results, html.UnescapeString(node.FirstChild.Data)) + continue + } + // else, theres more nodes, turn them all into a string and add that as a result + var result bytes.Buffer + for child := node.FirstChild; child != nil; child = child.NextSibling { + var b bytes.Buffer + html.Render(&b, node) + result.WriteString(b.String()) + } + filter.Results = append(filter.Results, html.UnescapeString(result.String())) + break + } + case "attr": + { + for _, attr := range node.Attr { + result := fmt.Sprintf("%s=\"%s\"", attr.Key, attr.Val) + filter.Results = append(filter.Results, html.UnescapeString(result)) + } + break + } + default: + { var b bytes.Buffer html.Render(&b, node) filter.Results = append(filter.Results, html.UnescapeString(b.String())) + break + } + } } } } @@ -366,6 +401,10 @@ func getFilterResultJSON(filter *Filter) { } func getFilterResultCSS(filter *Filter) { + selectType := "node" + if filter.Var2 != nil { + selectType = *filter.Var2 + } for _, parent := range filter.Parents { for _, result := range parent.Results { doc, err := html.Parse(strings.NewReader(result)) @@ -379,9 +418,40 @@ func getFilterResultCSS(filter *Filter) { continue } for _, node := range cascadia.QueryAll(doc, sel) { + switch selectType { + case "inner": + { + // if the child is a text node, theres nothing else (?), so just append that + if node.FirstChild != nil && node.FirstChild.Type == html.TextNode { + filter.Results = append(filter.Results, html.UnescapeString(node.FirstChild.Data)) + continue + } + // else, theres more nodes, turn them all into a string and add that as a result + var result bytes.Buffer + for child := node.FirstChild; child != nil; child = child.NextSibling { + var b bytes.Buffer + html.Render(&b, node) + result.WriteString(b.String()) + } + filter.Results = append(filter.Results, html.UnescapeString(result.String())) + break + } + case "attr": + { + for _, attr := range node.Attr { + result := fmt.Sprintf("%s=\"%s\"", attr.Key, attr.Val) + filter.Results = append(filter.Results, html.UnescapeString(result)) + } + break + } + default: + { var b bytes.Buffer html.Render(&b, node) filter.Results = append(filter.Results, html.UnescapeString(b.String())) + break + } + } } } } diff --git a/static/edit.js b/static/edit.js index 9f66562..cceee1f 100644 --- a/static/edit.js +++ b/static/edit.js @@ -123,15 +123,30 @@ function onTypeChange(node) { var1Label.innerHTML = "XPath"; var1Input.placeholder = "//a[@class='price]"; var1Div.appendChild(var1Input); - var var2Input = document.createElement("input"); - var2Input.name = "var2"; - var2Input.id = "var2Input"; - var2Input.value = var2Value; - var2Input.classList.add("form-control"); - var2Input.disabled = true; - var2Input.placeholder = ""; - var2Label.innerHTML = "-"; - var2Div.appendChild(var2Input); + var select_1 = document.createElement("select"); + select_1.name = "var2"; + select_1.id = "var2Input"; + select_1.classList.add("form-control"); + var innerHTML = document.createElement("option"); + innerHTML.value = "inner"; + innerHTML.innerHTML = "innerHTML"; + select_1.appendChild(innerHTML); + var attributes = document.createElement("option"); + attributes.value = "attr"; + attributes.innerHTML = "Attributes"; + select_1.appendChild(attributes); + var node_1 = document.createElement("option"); + node_1.value = "node"; + node_1.innerHTML = "Node"; + select_1.appendChild(node_1); + var2Div.appendChild(select_1); + var2Label.innerHTML = "Select"; + if (var2Value == "") { + select_1.value = "inner"; + } + else { + select_1.value = var2Value; + } var var3Input = document.createElement("input"); var3Input.name = "var3"; var3Input.id = "var3Input"; @@ -179,14 +194,30 @@ function onTypeChange(node) { var1Label.innerHTML = "Selector"; var1Input.placeholder = ".price"; var1Div.appendChild(var1Input); - var var2Input = document.createElement("input"); - var2Input.name = "var2"; - var2Input.id = "var2Input"; - var2Input.value = var2Value; - var2Input.classList.add("form-control"); - var2Input.disabled = true; - var2Label.innerHTML = "-"; - var2Div.appendChild(var2Input); + var select_2 = document.createElement("select"); + select_2.name = "var2"; + select_2.id = "var2Input"; + select_2.classList.add("form-control"); + var innerHTML = document.createElement("option"); + innerHTML.value = "inner"; + innerHTML.innerHTML = "innerHTML"; + select_2.appendChild(innerHTML); + var attributes = document.createElement("option"); + attributes.value = "attr"; + attributes.innerHTML = "Attributes"; + select_2.appendChild(attributes); + var node_2 = document.createElement("option"); + node_2.value = "node"; + node_2.innerHTML = "Node"; + select_2.appendChild(node_2); + var2Div.appendChild(select_2); + var2Label.innerHTML = "Select"; + if (var2Value == "") { + select_2.value = "inner"; + } + else { + select_2.value = var2Value; + } var var3Input = document.createElement("input"); var3Input.name = "var3"; var3Input.id = "var3Input"; @@ -748,13 +779,13 @@ function onConditionChange(node) { filterSelect.classList.add("form-control"); try { for (var _b = __values(_diagram.nodes.values()), _c = _b.next(); !_c.done; _c = _b.next()) { - var node_1 = _c.value; - if (node_1.type != "store") { + var node_3 = _c.value; + if (node_3.type != "store") { continue; } var nodeOption = document.createElement("option"); - nodeOption.value = node_1.label; - nodeOption.innerHTML = node_1.label; + nodeOption.value = node_3.label; + nodeOption.innerHTML = node_3.label; filterSelect.appendChild(nodeOption); } } diff --git a/static/edit.ts b/static/edit.ts index 86a458c..42aa373 100644 --- a/static/edit.ts +++ b/static/edit.ts @@ -100,15 +100,29 @@ function onTypeChange(node: DiagramNode | null = null){ var1Input.placeholder = "//a[@class='price]"; var1Div.appendChild(var1Input); - let var2Input = document.createElement("input"); - var2Input.name = "var2"; - var2Input.id = "var2Input"; - var2Input.value = var2Value; - var2Input.classList.add("form-control") - var2Input.disabled = true; - var2Input.placeholder = ""; - var2Label.innerHTML = "-"; - var2Div.appendChild(var2Input); + let select = document.createElement("select"); + select.name = "var2"; + select.id = "var2Input"; + select.classList.add("form-control"); + let innerHTML = document.createElement("option"); + innerHTML.value = "inner" + innerHTML.innerHTML = "innerHTML"; + select.appendChild(innerHTML); + let attributes = document.createElement("option"); + attributes.value = "attr" + attributes.innerHTML = "Attributes"; + select.appendChild(attributes); + let node = document.createElement("option"); + node.value = "node" + node.innerHTML = "Node"; + select.appendChild(node); + var2Div.appendChild(select); + var2Label.innerHTML = "Select"; + if (var2Value == ""){ + select.value = "inner"; + } else { + select.value = var2Value; + } let var3Input = document.createElement("input"); var3Input.name = "var3"; @@ -160,14 +174,29 @@ function onTypeChange(node: DiagramNode | null = null){ var1Input.placeholder = ".price"; var1Div.appendChild(var1Input); - let var2Input = document.createElement("input"); - var2Input.name = "var2"; - var2Input.id = "var2Input"; - var2Input.value = var2Value; - var2Input.classList.add("form-control") - var2Input.disabled = true; - var2Label.innerHTML = "-"; - var2Div.appendChild(var2Input); + let select = document.createElement("select"); + select.name = "var2"; + select.id = "var2Input"; + select.classList.add("form-control"); + let innerHTML = document.createElement("option"); + innerHTML.value = "inner" + innerHTML.innerHTML = "innerHTML"; + select.appendChild(innerHTML); + let attributes = document.createElement("option"); + attributes.value = "attr" + attributes.innerHTML = "Attributes"; + select.appendChild(attributes); + let node = document.createElement("option"); + node.value = "node" + node.innerHTML = "Node"; + select.appendChild(node); + var2Div.appendChild(select); + var2Label.innerHTML = "Select"; + if (var2Value == ""){ + select.value = "inner"; + } else { + select.value = var2Value; + } let var3Input = document.createElement("input"); var3Input.name = "var3"; diff --git a/todo.md b/todo.md index f81e46b..0d06c67 100644 --- a/todo.md +++ b/todo.md @@ -9,8 +9,8 @@ - json - add index to docs/compose to fix link in pages - safe escape {{ }} for pages -- xpath/css innerthtml option? - - inner - - attributes - - node +- ~~xpath/css innerthtml option?~~ + - ~~inner~~ + - ~~attributes~~ + - ~~node~~ - tests for all of it \ No newline at end of file