diff --git a/main.go b/main.go index 98e775c..aef5923 100644 --- a/main.go +++ b/main.go @@ -72,6 +72,9 @@ func (web Web) watchView(c *gin.Context) { var connections []FilterConnection web.db.Model(&FilterConnection{}).Where("watch_id = ?", watch.ID).Find(&connections) + buildFilterTree(filters, connections) + fillFilterResults(filters) + c.HTML(http.StatusOK, "watchView", gin.H{ "Watch": watch, "Filters": filters, diff --git a/models.go b/models.go index 6903194..3d729fd 100644 --- a/models.go +++ b/models.go @@ -12,7 +12,7 @@ type Filter struct { Name string `form:"filter_name" yaml:"filter_name" json:"filter_name" binding:"required" validate:"min=1"` X int `form:"x" yaml:"x" json:"x" validate:"default=0"` Y int `form:"y" yaml:"y" json:"y" validate:"default=0"` - Type string `form:"filter_type" yaml:"filter_type" json:"filter_type" binding:"required" validate:"oneof=url xpath json css replace match substring"` + Type string `form:"filter_type" yaml:"filter_type" json:"filter_type" binding:"required" validate:"oneof=url xpath json css replace match substring min max average count"` Var1 string `form:"var1" yaml:"var1" json:"var1" binding:"required"` Var2 *string `form:"var2" yaml:"var2" json:"var2"` Var3 *string `form:"var3" yaml:"var3" json:"var3"` diff --git a/scraping.go b/scraping.go index 33d373f..60aa5e2 100644 --- a/scraping.go +++ b/scraping.go @@ -2,7 +2,11 @@ package main import ( "bytes" + "fmt" + "io/ioutil" "log" + "math" + "net/http" "regexp" "strconv" "strings" @@ -13,8 +17,37 @@ import ( "golang.org/x/net/html" ) +func fillFilterResults(filters []Filter) { + processedMap := make(map[uint]bool, len(filters)) + for len(filters) > 0 { + filter := &filters[0] + filters = filters[1:] + var allParentsProcessed = true + for _, parent := range filter.Parents { + if _, contains := processedMap[parent.ID]; !contains { + allParentsProcessed = false + break + } + } + if !allParentsProcessed { + filters = append(filters, *filter) + continue + } + getFilterResult(filter) + processedMap[filter.ID] = true + } +} + func getFilterResult(filter *Filter) { switch { + case filter.Type == "gurl": + { + getFilterResultURL(filter) + } + case filter.Type == "gurls": + { + getFilterResultURL(filter) + } case filter.Type == "xpath": { getFilterResultXPath(filter) @@ -39,11 +72,42 @@ func getFilterResult(filter *Filter) { { getFilterResultSubstring(filter) } + case filter.Type == "min": + { + getFilterResultMin(filter) + } + case filter.Type == "max": + { + getFilterResultMax(filter) + } + case filter.Type == "average": + { + getFilterResultAverage(filter) + } + case filter.Type == "count": + { + getFilterResultCount(filter) + } default: - + log.Println("getFilterResult called with filter.Type == ", filter.Type) } } +func getFilterResultURL(filter *Filter) { + url := filter.Var1 + resp, err := http.Get(url) + if err != nil { + log.Println("Could not fetch url", url) + log.Println("Reason:", err) + } + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + log.Println("Could not fetch url", url) + log.Println("Reason:", err) + } + filter.Results = append(filter.Results, string(body)) +} + func getFilterResultXPath(filter *Filter) { if filter.Parents == nil { log.Println("Filter", filter.Name, "called without parents for", filter.Type) @@ -100,7 +164,6 @@ func getFilterResultCSS(filter *Filter) { for _, node := range cascadia.QueryAll(doc, sel) { var b bytes.Buffer html.Render(&b, node) - log.Println(b.String()) filter.Results = append(filter.Results, html.UnescapeString(b.String())) } } @@ -140,9 +203,7 @@ func getFilterResultMatch(filter *Filter) { } for _, parent := range filter.Parents { for _, result := range parent.Results { - log.Println(">", result) for _, str := range r.FindAllString(result, -1) { - log.Println(">>", str) filter.Results = append(filter.Results, str) } } @@ -209,3 +270,64 @@ func getFilterResultSubstring(filter *Filter) { } } } + +func getFilterResultMin(filter *Filter) { + var min = math.MaxFloat64 + var setMin = false + for _, parent := range filter.Parents { + for _, result := range parent.Results { + if number, err := strconv.ParseFloat(result, 64); err == nil { + if number < min { + min = number + setMin = true + } + } + } + } + + if setMin { + filter.Results = append(filter.Results, fmt.Sprintf("%f", min)) + } +} + +func getFilterResultMax(filter *Filter) { + var max = -math.MaxFloat64 + var setMax = false + for _, parent := range filter.Parents { + for _, result := range parent.Results { + if number, err := strconv.ParseFloat(result, 64); err == nil { + if number > max { + max = number + setMax = true + } + } + } + } + + if setMax { + filter.Results = append(filter.Results, fmt.Sprintf("%f", max)) + } +} + +func getFilterResultAverage(filter *Filter) { + var sum float64 = 0.0 + var count float64 = 0.0 + for _, parent := range filter.Parents { + for _, result := range parent.Results { + if number, err := strconv.ParseFloat(result, 64); err == nil { + sum += number + count++ + } + } + } + filter.Results = append(filter.Results, fmt.Sprintf("%f", sum/count)) +} + +func getFilterResultCount(filter *Filter) { + var count = 0 + for _, parent := range filter.Parents { + count += len(parent.Children) + } + log.Println(fmt.Sprintf("%d", count)) + filter.Results = append(filter.Results, fmt.Sprintf("%d", count)) +} diff --git a/static/edit.js b/static/edit.js index 4b8a65f..2920192 100644 --- a/static/edit.js +++ b/static/edit.js @@ -32,14 +32,47 @@ var __values = (this && this.__values) || function(o) { function onTypeChange() { var select = document.getElementById("typeInput"); var type = select.value; + var var1Div = document.getElementById("var1Div"); var var1Input = document.getElementById("var1Input"); var var1Label = document.getElementById("var1Label"); + var var2Div = document.getElementById("var2Div"); var var2Input = document.getElementById("var2Input"); var var2Label = document.getElementById("var2Label"); + var var3Div = document.getElementById("var3Div"); var var3Input = document.getElementById("var3Input"); var var3Label = document.getElementById("var3Label"); switch (type) { + case "gurl": { + var1Div.innerHTML = ""; + var1Div.appendChild(var1Input); + var1Label.innerHTML = "URL"; + var1Input.placeholder = "https://shopping.website.com"; + var2Input.disabled = true; + var2Input.placeholder = ""; + var2Label.innerHTML = "-"; + var3Input.disabled = true; + var3Input.placeholder = ""; + var3Label.innerHTML = "-"; + break; + } + case "gurls": { + var1Div.innerHTML = ""; + var1Div.appendChild(var1Input); + var1Label.innerHTML = "-"; + var1Input.placeholder = "From parents"; + var1Input.value = "-"; + var1Input.disabled = true; + var2Input.disabled = true; + var2Input.placeholder = ""; + var2Label.innerHTML = "-"; + var3Input.disabled = true; + var3Input.placeholder = ""; + var3Label.innerHTML = "-"; + break; + } case "xpath": { + var1Div.innerHTML = ""; + var1Div.appendChild(var1Input); var1Label.innerHTML = "XPath"; var1Input.placeholder = "//a[@class='price"; var2Input.disabled = true; @@ -51,6 +84,8 @@ function onTypeChange() { break; } case "json": { + var1Div.innerHTML = ""; + var1Div.appendChild(var1Input); var1Label.innerHTML = "JSON"; var1Input.placeholder = "products.#.price"; var2Input.disabled = true; @@ -60,6 +95,8 @@ function onTypeChange() { break; } case "css": { + var1Div.innerHTML = ""; + var1Div.appendChild(var1Input); var1Label.innerHTML = "Selector"; var1Input.placeholder = ".price"; var2Input.disabled = true; @@ -69,6 +106,8 @@ function onTypeChange() { break; } case "replace": { + var1Div.innerHTML = ""; + var1Div.appendChild(var1Input); var1Label.innerHTML = "Regex"; var1Input.placeholder = "So[mM]e(thing|where)"; var2Input.disabled = false; @@ -78,6 +117,8 @@ function onTypeChange() { break; } case "match": { + var1Div.innerHTML = ""; + var1Div.appendChild(var1Input); var1Label.innerHTML = "Regex"; var1Input.placeholder = "So[mM]e(thing|where)"; var2Input.disabled = true; @@ -95,6 +136,36 @@ function onTypeChange() { var3Label.innerHTML = "-"; break; } + case "math": { + var mathSelect = document.createElement("select"); + mathSelect.classList.add("form-control"); + var mathOptionMin = document.createElement("option"); + mathOptionMin.value = "min"; + mathOptionMin.innerHTML = "Min"; + mathSelect.appendChild(mathOptionMin); + var mathOptionMax = document.createElement("option"); + mathOptionMax.value = "max"; + mathOptionMax.innerHTML = "Max"; + mathSelect.appendChild(mathOptionMax); + var mathOptionAvg = document.createElement("option"); + mathOptionAvg.value = "average"; + mathOptionAvg.innerHTML = "Average"; + mathSelect.appendChild(mathOptionAvg); + var mathOptionCount = document.createElement("option"); + mathOptionCount.value = "count"; + mathOptionCount.innerHTML = "Count"; + mathSelect.appendChild(mathOptionCount); + var1Input.remove(); + var1Div.appendChild(mathSelect); + var1Label.innerHTML = "Function"; + var2Input.disabled = true; + var2Input.placeholder = ""; + var2Label.innerHTML = "-"; + var3Input.disabled = true; + var3Input.placeholder = ""; + var3Label.innerHTML = "-"; + break; + } } } function onSubmitNewFilter() { diff --git a/static/edit.ts b/static/edit.ts index 53eeb40..473cbcd 100644 --- a/static/edit.ts +++ b/static/edit.ts @@ -2,17 +2,50 @@ function onTypeChange(){ let select = document.getElementById("typeInput") as HTMLSelectElement; let type = select.value; + let var1Div = document.getElementById("var1Div") as HTMLDivElement; let var1Input = document.getElementById("var1Input") as HTMLInputElement; let var1Label = document.getElementById("var1Label") as HTMLLabelElement; + let var2Div = document.getElementById("var2Div") as HTMLDivElement; let var2Input = document.getElementById("var2Input") as HTMLInputElement; let var2Label = document.getElementById("var2Label") as HTMLLabelElement; - + + let var3Div = document.getElementById("var3Div") as HTMLDivElement; let var3Input = document.getElementById("var3Input") as HTMLInputElement; let var3Label = document.getElementById("var3Label") as HTMLLabelElement; switch(type){ + case "gurl": { + var1Div.innerHTML = ""; + var1Div.appendChild(var1Input); + var1Label.innerHTML = "URL"; + var1Input.placeholder = "https://shopping.website.com"; + var2Input.disabled = true; + var2Input.placeholder = "" + var2Label.innerHTML = "-"; + var3Input.disabled = true; + var3Input.placeholder = "" + var3Label.innerHTML = "-"; + break; + } + case "gurls": { + var1Div.innerHTML = ""; + var1Div.appendChild(var1Input); + var1Label.innerHTML = "-"; + var1Input.placeholder = "From parents"; + var1Input.value = "-"; + var1Input.disabled = true; + var2Input.disabled = true; + var2Input.placeholder = "" + var2Label.innerHTML = "-"; + var3Input.disabled = true; + var3Input.placeholder = "" + var3Label.innerHTML = "-"; + break; + } case "xpath": { + var1Div.innerHTML = ""; + var1Div.appendChild(var1Input); var1Label.innerHTML = "XPath"; var1Input.placeholder = "//a[@class='price"; var2Input.disabled = true; @@ -24,6 +57,8 @@ function onTypeChange(){ break; } case "json": { + var1Div.innerHTML = ""; + var1Div.appendChild(var1Input); var1Label.innerHTML = "JSON"; var1Input.placeholder = "products.#.price"; var2Input.disabled = true; @@ -33,6 +68,8 @@ function onTypeChange(){ break; } case "css": { + var1Div.innerHTML = ""; + var1Div.appendChild(var1Input); var1Label.innerHTML = "Selector"; var1Input.placeholder = ".price"; var2Input.disabled = true; @@ -42,6 +79,8 @@ function onTypeChange(){ break; } case "replace": { + var1Div.innerHTML = ""; + var1Div.appendChild(var1Input); var1Label.innerHTML = "Regex"; var1Input.placeholder = "So[mM]e(thing|where)"; var2Input.disabled = false; @@ -51,6 +90,8 @@ function onTypeChange(){ break; } case "match": { + var1Div.innerHTML = ""; + var1Div.appendChild(var1Input); var1Label.innerHTML = "Regex"; var1Input.placeholder = "So[mM]e(thing|where)"; var2Input.disabled = true; @@ -68,6 +109,38 @@ function onTypeChange(){ var3Label.innerHTML = "-"; break; } + case "math": { + let mathSelect = document.createElement("select"); + mathSelect.classList.add("form-control"); + let mathOptionMin = document.createElement("option"); + mathOptionMin.value = "min" + mathOptionMin.innerHTML = "Min"; + mathSelect.appendChild(mathOptionMin); + let mathOptionMax = document.createElement("option") + mathOptionMax.value = "max"; + mathOptionMax.innerHTML = "Max"; + mathSelect.appendChild(mathOptionMax); + let mathOptionAvg = document.createElement("option") + mathOptionAvg.value = "average"; + mathOptionAvg.innerHTML = "Average"; + mathSelect.appendChild(mathOptionAvg); + let mathOptionCount = document.createElement("option") + mathOptionCount.value = "count"; + mathOptionCount.innerHTML = "Count"; + mathSelect.appendChild(mathOptionCount); + + var1Input.remove(); + var1Div.appendChild(mathSelect); + + var1Label.innerHTML = "Function"; + var2Input.disabled = true; + var2Input.placeholder = "" + var2Label.innerHTML = "-"; + var3Input.disabled = true; + var3Input.placeholder = "" + var3Label.innerHTML = "-"; + break; + } } } diff --git a/templates/watch/view.html b/templates/watch/view.html index 91d14e8..9e12630 100644 --- a/templates/watch/view.html +++ b/templates/watch/view.html @@ -37,24 +37,27 @@