diff --git a/main.go b/main.go index 0937f17..3702b43 100644 --- a/main.go +++ b/main.go @@ -1,6 +1,7 @@ package main import ( + "encoding/json" "fmt" "log" "net/http" @@ -59,41 +60,64 @@ func (web Web) deleteWatch(c *gin.Context) { c.Redirect(http.StatusSeeOther, "/") } +type FilterDepth struct { + Filter Filter + Depth int +} + func (web Web) viewWatch(c *gin.Context) { id := c.Param("id") var watch Watch - web.db.Model(&Watch{}).Preload("URLs.GroupFilters.Filters").First(&watch, id) - c.HTML(http.StatusOK, "viewWatch", watch) -} + web.db.Model(&Watch{}).First(&watch, id) -func (web Web) createURL(c *gin.Context) { - var url URL - errMap, err := bindAndValidateURL(&url, c) - if err != nil { - log.Print(err) - c.HTML(http.StatusInternalServerError, "500", errMap) - return - } - web.db.Create(&url) - c.Redirect(http.StatusSeeOther, fmt.Sprintf("/watch/view/%d", url.WatchID)) -} + var filters []Filter + web.db.Model(&Filter{}).Find(&filters) -func (web Web) createFilterGroup(c *gin.Context) { - watch_id, err := strconv.ParseUint(c.PostForm("w_id"), 10, 64) - if err != nil { - log.Print(err) - c.HTML(http.StatusInternalServerError, "500", gin.H{}) - return + queuedFilters := []*Filter{} + filterMap := make(map[uint]*Filter) + for _, filter := range filters { + filterMap[filter.ID] = &filter + if filter.ParentID == nil { + queuedFilters = append(queuedFilters, &filter) + } + s, _ := json.MarshalIndent(filter, "", "\t") + fmt.Println(s) } - var group FilterGroup - errMap, err := bindAndValidateGroup(&group, c) - if err != nil { - c.HTML(http.StatusBadRequest, "500", errMap) - return + + for _, filter := range filterMap { + if filter.Parent != nil { + parent := filterMap[*filter.ParentID] + parent.Filters = append(parent.Filters, *filter) + } } - web.db.Create(&group) - c.Redirect(http.StatusSeeOther, fmt.Sprintf("/watch/view/%d", watch_id)) + + nextFilters := []*Filter{} + bftFilters := []FilterDepth{} + depth := 0 + for len(queuedFilters) > 0 { + for _, f1 := range queuedFilters { + bftFilters = append(bftFilters, FilterDepth{ + Filter: *f1, + Depth: depth, + }) + for _, f2 := range f1.Filters { + nextFilters = append(nextFilters, &f2) + } + } + log.Println(nextFilters) + queuedFilters = nextFilters + log.Println(queuedFilters) + nextFilters = []*Filter{} + log.Println(nextFilters) + depth += 1 + } + + c.HTML(http.StatusOK, "viewWatch", gin.H{ + "Watch": watch, + "Filters": bftFilters, + "MaxDepth": depth, + }) } func (web Web) createFilter(c *gin.Context) { @@ -105,7 +129,7 @@ func (web Web) createFilter(c *gin.Context) { return } web.db.Create(&filter) - c.Redirect(http.StatusSeeOther, fmt.Sprintf("/group/edit/%d", filter.FilterGroupID)) + c.Redirect(http.StatusSeeOther, "/group/edit") } func (web Web) updateFilter(c *gin.Context) { @@ -120,10 +144,10 @@ func (web Web) updateFilter(c *gin.Context) { web.db.First(&filter, filterUpdate.ID) filter.Name = filterUpdate.Name filter.Type = filterUpdate.Type - filter.From = filterUpdate.From - filter.To = filterUpdate.To + filter.Var1 = filterUpdate.From + filter.Var2 = &filterUpdate.To web.db.Save(&filter) - c.Redirect(http.StatusSeeOther, fmt.Sprintf("/group/edit/%d", +filter.FilterGroupID)) + c.Redirect(http.StatusSeeOther, "/group/edit/") } func (web Web) deleteFilter(c *gin.Context) { @@ -138,37 +162,6 @@ func (web Web) deleteFilter(c *gin.Context) { c.Redirect(http.StatusSeeOther, "/group/edit/"+group_id) } -func (web Web) editGroup(c *gin.Context) { - group_id, err := strconv.ParseUint(c.Param("id"), 10, 64) - if err != nil { - c.Redirect(http.StatusSeeOther, "/watch/new") - return // TODO response - } - var group FilterGroup - web.db.Preload("URL.Watch").Preload("Filters").Preload("URL").First(&group, group_id) - - c.HTML(http.StatusOK, "editGroup", gin.H{ - "Group": group, - "currentResult": getGroupResult(&group), - }) -} - -func (web Web) updateGroup(c *gin.Context) { - var groupUpdate FilterGroupUpdate - errMap, err := bindAndValidateGroupUpdate(&groupUpdate, c) - if err != nil { - log.Print(err) - c.HTML(http.StatusBadRequest, "500", errMap) - return - } - var group FilterGroup - web.db.First(&group, groupUpdate.ID) - group.Name = groupUpdate.Name - group.Type = groupUpdate.Type - web.db.Save(&group) - c.Redirect(http.StatusSeeOther, fmt.Sprintf("/group/edit/%d", +group.ID)) -} - func passiveBot(bot *tgbotapi.BotAPI) { u := tgbotapi.NewUpdate(0) u.Timeout = 60 @@ -212,7 +205,35 @@ func main() { } db, _ := gorm.Open(sqlite.Open(viper.GetString("database.dsn"))) - db.AutoMigrate(&Watch{}, &URL{}, &FilterGroup{}, &Filter{}) + db.AutoMigrate(&Watch{}, &Filter{}) + + filters := []Filter{} + watch := Watch{ + Name: "LG C2 42", + Interval: 60, + Filters: filters, + } + db.Create(&watch) + + urlFilter := Filter{ + WatchID: watch.ID, + ParentID: nil, + Parent: nil, + Name: "PriceWatch Fetch", + Type: "url", + Var1: "https://tweakers.net/pricewatch/1799060/lg-c2-42-inch-donkerzilveren-voet-zwart.html", + } + db.Create(&urlFilter) + + xpathFilter := Filter{ + WatchID: watch.ID, + Watch: watch, + ParentID: &urlFilter.ID, + Name: "price select", + Type: "xpath", + Var1: "//td[@class='shop-price']", + } + db.Create(&xpathFilter) //bot, _ := tgbotapi.NewBotAPI(viper.GetString("telegram.token")) @@ -244,10 +265,6 @@ func main() { router.POST("/watch/create", web.createWatch) router.POST("/watch/delete", web.deleteWatch) router.GET("/watch/view/:id/", web.viewWatch) - router.POST("/url/create/", web.createURL) - router.POST("/group/create/", web.createFilterGroup) - router.GET("/group/edit/:id", web.editGroup) - router.POST("/group/update", web.updateGroup) router.POST("/filter/create/", web.createFilter) router.POST("/filter/update/", web.updateFilter) router.POST("/filter/delete/", web.deleteFilter) diff --git a/models.go b/models.go index 7dd06e3..56012c5 100644 --- a/models.go +++ b/models.go @@ -8,33 +8,20 @@ type Watch struct { gorm.Model Name string `form:"watch_name" yaml:"watch_name" binding:"required" validate:"min=1"` Interval int `form:"interval" yaml:"interval" binding:"required"` - URLs []URL -} - -type URL struct { - gorm.Model - WatchID uint `form:"url_watch_id" yaml:"url_watch_id" binding:"required"` - Watch *Watch `form:"watch" yaml:"watch" validate:"omitempty"` - Name string `form:"url_name" yaml:"url_name" binding:"required" validate:"min=1"` - URL string `form:"url" yaml:"url" binding:"required,url" validate:"min=1"` - GroupFilters []FilterGroup -} - -type FilterGroup struct { - gorm.Model - URLID uint `form:"group_url_id" yaml:"group_url_id" binding:"required"` - URL *URL - Name string `form:"group_name" yaml:"group_name" binding:"required" validate:"min=1"` - Type string `form:"group_type" yaml:"group_type" binding:"required" validate:"oneof=diff enum number bool"` - Filters []Filter + Filters []Filter } type Filter struct { gorm.Model - FilterGroupID uint `form:"filter_group_id" yaml:"filter_group_id" binding:"required"` - FilterGroup *FilterGroup - Name string `form:"filter_name" yaml:"filter_name" binding:"required" validate:"min=1"` - Type string `form:"filter_type" yaml:"filter_type" binding:"required" validate:"oneof=xpath json css replace match substring"` - From string `form:"from" yaml:"from" binding:"required"` - To string `form:"to" yaml:"to" binding:"required"` + WatchID uint `form:"filter_watch_id" yaml:"filter_watch_id" binding:"required"` + Watch Watch + ParentID *uint `form:"parent_id" yaml:"parent_id"` + Parent *Filter `form:"parent_id" yaml:"parent_id"` + Name string `form:"filter_name" yaml:"filter_name" binding:"required" validate:"min=1"` + Type string `form:"filter_type" yaml:"filter_type" binding:"required" validate:"oneof=url xpath json css replace match substring"` + Var1 string `form:"var1" yaml:"var1" binding:"required"` + Var2 *string `form:"var2" yaml:"var2"` + Var3 *string `form:"var3" yaml:"var3"` + Filters []Filter `gorm:"-:all"` + results []string `gorm:"-:all"` } diff --git a/scraping.go b/scraping.go index 19234ec..3339250 100644 --- a/scraping.go +++ b/scraping.go @@ -2,9 +2,7 @@ package main import ( "bytes" - "io/ioutil" "log" - "net/http" "regexp" "strconv" "strings" @@ -15,170 +13,191 @@ import ( "golang.org/x/net/html" ) -func getGroupResult(group *FilterGroup) []string { - resp, err := http.Get(group.URL.URL) - if err != nil { - log.Print("Something went wrong loading", group.URL.URL) - return []string{} +func getFilterResults(filter *Filter) { + getFilterResult(filter) + for _, filter := range filter.Filters { + getFilterResults(&filter) } - defer resp.Body.Close() - html, err := ioutil.ReadAll(resp.Body) - if err != nil { - log.Print("Something went wrong loading ", group.URL.URL) - return []string{} - } - resultStrings := []string{string(html)} - newStrings := []string{} - for _, filter := range group.Filters { - for _, resultString := range resultStrings { - getFilterResult(resultString, &filter, &newStrings) - } - resultStrings = newStrings - newStrings = nil - } - return resultStrings } -func getFilterResult(s string, filter *Filter, newStrings *[]string) { +func getFilterResult(filter *Filter) { switch { case filter.Type == "xpath": { - getFilterResultXPath(s, filter, newStrings) + getFilterResultXPath(filter) } case filter.Type == "json": { - getFilterResultJSON(s, filter, newStrings) + getFilterResultJSON(filter) } case filter.Type == "css": { - getFilterResultCSS(s, filter, newStrings) + getFilterResultCSS(filter) } case filter.Type == "replace": { - getFilterResultReplace(s, filter, newStrings) + getFilterResultReplace(filter) } case filter.Type == "match": { - getFilterResultMatch(s, filter, newStrings) + getFilterResultMatch(filter) } case filter.Type == "substring": { - getFilterResultSubstring(s, filter, newStrings) + getFilterResultSubstring(filter) } default: } } -func getFilterResultXPath(s string, filter *Filter, newStrings *[]string) { - doc, err := htmlquery.Parse(strings.NewReader(s)) - if err != nil { - log.Print(err) +func getFilterResultXPath(filter *Filter) { + if filter.Parent == nil { + log.Println("Filter", filter.Name, "called without parent for", filter.Type) return } - nodes, _ := htmlquery.QueryAll(doc, filter.From) - for _, node := range nodes { - var b bytes.Buffer - html.Render(&b, node) - *newStrings = append(*newStrings, html.UnescapeString(b.String())) - } -} - -func getFilterResultJSON(s string, filter *Filter, newStrings *[]string) { - - for _, result := range gjson.Get(s, filter.From).Array() { - *newStrings = append(*newStrings, result.String()) - } -} - -func getFilterResultCSS(s string, filter *Filter, newStrings *[]string) { - doc, err := html.Parse(strings.NewReader(s)) - if err != nil { - log.Print(err) - return - } - sel, err := cascadia.Parse(filter.From) - if err != nil { - log.Print(err) - return - } - for _, node := range cascadia.QueryAll(doc, sel) { - var b bytes.Buffer - html.Render(&b, node) - *newStrings = append(*newStrings, html.UnescapeString(b.String())) - } -} - -func getFilterResultReplace(s string, filter *Filter, newStrings *[]string) { - r, err := regexp.Compile(filter.From) - if err != nil { - log.Print(err) - return - } - *newStrings = append(*newStrings, r.ReplaceAllString(s, filter.To)) -} - -func getFilterResultMatch(s string, filter *Filter, newStrings *[]string) { - r, err := regexp.Compile(filter.From) - if err != nil { - log.Print(err) - return - } - for _, str := range r.FindAllString(s, -1) { - - *newStrings = append(*newStrings, str) - } -} - -func getFilterResultSubstring(s string, filter *Filter, newStrings *[]string) { - substrings := strings.Split(filter.From, ",") - var sb strings.Builder - asRunes := []rune(s) - - for _, substring := range substrings { - if strings.Contains(substring, ":") { - from_to := strings.Split(substring, ":") - if len(from_to) != 2 { - return - } - fromStr := from_to[0] - var hasFrom bool = true - if fromStr == "" { - hasFrom = false - } - from64, err := strconv.ParseInt(fromStr, 10, 32) - var from = int(from64) - if hasFrom && err != nil { - return - } else if from < 0 { - from = len(asRunes) + from - } - toStr := from_to[1] - var hasTo bool = true - if toStr == "" { - hasTo = false - } - to64, err := strconv.ParseInt(toStr, 10, 32) - var to = int(to64) - if hasTo && err != nil { - return - } else if to < 0 { - to = len(asRunes) + to - } - if hasFrom && hasTo { - sb.WriteString(string(asRunes[from:to])) - } else if hasFrom { - sb.WriteString(string(asRunes[from:])) - } else if hasTo { - sb.WriteString(string(asRunes[:to])) - } - } else { - pos, err := strconv.ParseInt(substring, 10, 32) - if err != nil || pos < 0 { - return - } - sb.WriteRune(asRunes[pos]) + for _, result := range filter.Parent.results { + doc, err := htmlquery.Parse(strings.NewReader(result)) + if err != nil { + log.Print(err) + continue + } + nodes, _ := htmlquery.QueryAll(doc, filter.Var1) + for _, node := range nodes { + var b bytes.Buffer + html.Render(&b, node) + filter.results = append(filter.results, html.UnescapeString(b.String())) } } - *newStrings = append(*newStrings, sb.String()) +} + +func getFilterResultJSON(filter *Filter) { + if filter.Parent == nil { + log.Println("Filter", filter.Name, "called without parent for", filter.Type) + return + } + for _, result := range filter.Parent.results { + for _, match := range gjson.Get(result, filter.Var1).Array() { + filter.results = append(filter.results, match.String()) + } + } +} + +func getFilterResultCSS(filter *Filter) { + if filter.Parent == nil { + log.Println("Filter", filter.Name, "called without parent for", filter.Type) + return + } + for _, result := range filter.results { + doc, err := html.Parse(strings.NewReader(result)) + if err != nil { + log.Print(err) + continue + } + sel, err := cascadia.Parse(filter.Var1) + if err != nil { + log.Print(err) + continue + } + for _, node := range cascadia.QueryAll(doc, sel) { + var b bytes.Buffer + html.Render(&b, node) + filter.results = append(filter.results, html.UnescapeString(b.String())) + } + } +} + +func getFilterResultReplace(filter *Filter) { + if filter.Parent == nil { + log.Println("Filter", filter.Name, "called without parent for", filter.Type) + return + } + for _, result := range filter.results { + r, err := regexp.Compile(filter.Var1) + if err != nil { + log.Print(err) + continue + } + if filter.Var2 == nil { + filter.results = append(filter.results, r.ReplaceAllString(result, "")) + } else { + filter.results = append(filter.results, r.ReplaceAllString(result, *filter.Var2)) + } + } +} + +func getFilterResultMatch(filter *Filter) { + if filter.Parent == nil { + log.Println("Filter", filter.Name, "called without parent for", filter.Type) + return + } + for _, result := range filter.results { + r, err := regexp.Compile(filter.Var1) + if err != nil { + log.Print(err) + continue + } + for _, str := range r.FindAllString(result, -1) { + filter.results = append(filter.results, str) + } + } +} + +func getFilterResultSubstring(filter *Filter) { + if filter.Parent == nil { + log.Println("Filter", filter.Name, "called without parent for", filter.Type) + return + } + for _, result := range filter.results { + substrings := strings.Split(filter.Var1, ",") + var sb strings.Builder + asRunes := []rune(result) + + for _, substring := range substrings { + if strings.Contains(substring, ":") { + from_to := strings.Split(substring, ":") + if len(from_to) != 2 { + return + } + fromStr := from_to[0] + var hasFrom bool = true + if fromStr == "" { + hasFrom = false + } + from64, err := strconv.ParseInt(fromStr, 10, 32) + var from = int(from64) + if hasFrom && err != nil { + return + } else if from < 0 { + from = len(asRunes) + from + } + toStr := from_to[1] + var hasTo bool = true + if toStr == "" { + hasTo = false + } + to64, err := strconv.ParseInt(toStr, 10, 32) + var to = int(to64) + if hasTo && err != nil { + return + } else if to < 0 { + to = len(asRunes) + to + } + if hasFrom && hasTo { + sb.WriteString(string(asRunes[from:to])) + } else if hasFrom { + sb.WriteString(string(asRunes[from:])) + } else if hasTo { + sb.WriteString(string(asRunes[:to])) + } + } else { + pos, err := strconv.ParseInt(substring, 10, 32) + if err != nil || pos < 0 { + return + } + sb.WriteRune(asRunes[pos]) + } + } + filter.results = append(filter.results, sb.String()) + } } diff --git a/scraping_test.go b/scraping_test.go index 0ea4c8f..6e4e92b 100644 --- a/scraping_test.go +++ b/scraping_test.go @@ -57,11 +57,9 @@ func TestFilterXPath(t *testing.T) { t.Run(testname, func(t *testing.T) { want := []string{} getFilterResultXPath( - HTML_STRING, &Filter{ - From: test.Query, + Var1: test.Query, }, - &want, ) if !reflect.DeepEqual(test.Want, want) { t.Errorf("Got %s, want %s", want, test.Want) @@ -86,11 +84,9 @@ func TestFilterJSON(t *testing.T) { t.Run(testname, func(t *testing.T) { want := []string{} getFilterResultJSON( - JSON_STRING, &Filter{ - From: test.Query, + Var1: test.Query, }, - &want, ) if !reflect.DeepEqual(test.Want, want) { t.Errorf("Got %s, want %s", want, test.Want) @@ -116,11 +112,9 @@ func TestFilterCSS(t *testing.T) { t.Run(testname, func(t *testing.T) { want := []string{} getFilterResultCSS( - HTML_STRING, &Filter{ - From: test.Query, + Var1: test.Query, }, - &want, ) if !reflect.DeepEqual(test.Want, want) { t.Errorf("Got %s, want %s", want, test.Want) @@ -149,11 +143,9 @@ func TestFilterReplace(t *testing.T) { t.Run(testname, func(t *testing.T) { want := []string{test.Want} getFilterResultReplace( - test.Input, &Filter{ - From: test.Query, + Var1: test.Query, }, - &want, ) if want[0] != test.Want { t.Errorf("Got %s, want %s", want[0], test.Want) @@ -180,11 +172,9 @@ func TestFilterMatch(t *testing.T) { t.Run(testname, func(t *testing.T) { want := []string{} getFilterResultMatch( - test.Input, &Filter{ - From: test.Query, + Var1: test.Query, }, - &want, ) if !reflect.DeepEqual(test.Want, want) { t.Errorf("Got %s, want %s", want, test.Want) @@ -229,11 +219,9 @@ func TestFilterSubstring(t *testing.T) { t.Run(testname, func(t *testing.T) { want := []string{test.Want} getFilterResultSubstring( - test.Input, &Filter{ - From: test.Query, + Var1: test.Query, }, - &want, ) if want[0] != test.Want { t.Errorf("Got %s, want %s", want[0], test.Want) diff --git a/templates/viewWatch.html b/templates/viewWatch.html index dad4119..7faeb30 100644 --- a/templates/viewWatch.html +++ b/templates/viewWatch.html @@ -2,103 +2,23 @@
- {{ .Name }} + {{ .Watch.Name }}
- {{ $.Interval }} + {{ .Watch.Interval }}
-{{ range .URLs }} -
-
-
{{ .Name }}
-
-
-
{{ .URL }}
-
-
- {{ range .GroupFilters }} -
-
-
-
{{ .Name }}
- -
-
- {{ .Type }} -
-
-
- - - - - {{ if .Filters }} - {{ range .Filters }} - - - - - - - - {{ end }} - {{ else }} - - - - {{ end }} - -
{{ .Name }}{{ .Type }}{{ .From }}{{ .To }}
No filters yet, click "Edit" to add
-
-
- {{ end }} -
- - - - - - - - - - -
- - - - - -
-
-
-
-{{ end }} -
-
-
New URL
-
-
-
-
- -
-
- -
- - - -
-
-
+ + +{{ range .Filters }} + + + + + + +{{ end }} +
{{ .Depth }}{{ .Filter.ID }}{{ .Filter.ParentID }}{{ .Filter.Name }}
{{ end }} diff --git a/util.go b/util.go index b54906e..0c7a19f 100644 --- a/util.go +++ b/util.go @@ -12,16 +12,6 @@ func bindAndValidateWatch(watch *Watch, c *gin.Context) (map[string]string, erro return validate(err), err } -func bindAndValidateURL(url *URL, c *gin.Context) (map[string]string, error) { - err := c.ShouldBind(url) - return validate(err), err -} - -func bindAndValidateGroup(group *FilterGroup, c *gin.Context) (map[string]string, error) { - err := c.ShouldBind(group) - return validate(err), err -} - func bindAndValidateFilter(filter *Filter, c *gin.Context) (map[string]string, error) { err := c.ShouldBind(filter) return validate(err), err