diff --git a/main.go b/main.go
index 0937f17..3702b43 100644
--- a/main.go
+++ b/main.go
@@ -1,6 +1,7 @@
package main
import (
+ "encoding/json"
"fmt"
"log"
"net/http"
@@ -59,41 +60,64 @@ func (web Web) deleteWatch(c *gin.Context) {
c.Redirect(http.StatusSeeOther, "/")
}
+type FilterDepth struct {
+ Filter Filter
+ Depth int
+}
+
func (web Web) viewWatch(c *gin.Context) {
id := c.Param("id")
var watch Watch
- web.db.Model(&Watch{}).Preload("URLs.GroupFilters.Filters").First(&watch, id)
- c.HTML(http.StatusOK, "viewWatch", watch)
-}
+ web.db.Model(&Watch{}).First(&watch, id)
-func (web Web) createURL(c *gin.Context) {
- var url URL
- errMap, err := bindAndValidateURL(&url, c)
- if err != nil {
- log.Print(err)
- c.HTML(http.StatusInternalServerError, "500", errMap)
- return
- }
- web.db.Create(&url)
- c.Redirect(http.StatusSeeOther, fmt.Sprintf("/watch/view/%d", url.WatchID))
-}
+ var filters []Filter
+ web.db.Model(&Filter{}).Find(&filters)
-func (web Web) createFilterGroup(c *gin.Context) {
- watch_id, err := strconv.ParseUint(c.PostForm("w_id"), 10, 64)
- if err != nil {
- log.Print(err)
- c.HTML(http.StatusInternalServerError, "500", gin.H{})
- return
+ queuedFilters := []*Filter{}
+ filterMap := make(map[uint]*Filter)
+ for _, filter := range filters {
+ filterMap[filter.ID] = &filter
+ if filter.ParentID == nil {
+ queuedFilters = append(queuedFilters, &filter)
+ }
+ s, _ := json.MarshalIndent(filter, "", "\t")
+ fmt.Println(s)
}
- var group FilterGroup
- errMap, err := bindAndValidateGroup(&group, c)
- if err != nil {
- c.HTML(http.StatusBadRequest, "500", errMap)
- return
+
+ for _, filter := range filterMap {
+ if filter.Parent != nil {
+ parent := filterMap[*filter.ParentID]
+ parent.Filters = append(parent.Filters, *filter)
+ }
}
- web.db.Create(&group)
- c.Redirect(http.StatusSeeOther, fmt.Sprintf("/watch/view/%d", watch_id))
+
+ nextFilters := []*Filter{}
+ bftFilters := []FilterDepth{}
+ depth := 0
+ for len(queuedFilters) > 0 {
+ for _, f1 := range queuedFilters {
+ bftFilters = append(bftFilters, FilterDepth{
+ Filter: *f1,
+ Depth: depth,
+ })
+ for _, f2 := range f1.Filters {
+ nextFilters = append(nextFilters, &f2)
+ }
+ }
+ log.Println(nextFilters)
+ queuedFilters = nextFilters
+ log.Println(queuedFilters)
+ nextFilters = []*Filter{}
+ log.Println(nextFilters)
+ depth += 1
+ }
+
+ c.HTML(http.StatusOK, "viewWatch", gin.H{
+ "Watch": watch,
+ "Filters": bftFilters,
+ "MaxDepth": depth,
+ })
}
func (web Web) createFilter(c *gin.Context) {
@@ -105,7 +129,7 @@ func (web Web) createFilter(c *gin.Context) {
return
}
web.db.Create(&filter)
- c.Redirect(http.StatusSeeOther, fmt.Sprintf("/group/edit/%d", filter.FilterGroupID))
+ c.Redirect(http.StatusSeeOther, "/group/edit")
}
func (web Web) updateFilter(c *gin.Context) {
@@ -120,10 +144,10 @@ func (web Web) updateFilter(c *gin.Context) {
web.db.First(&filter, filterUpdate.ID)
filter.Name = filterUpdate.Name
filter.Type = filterUpdate.Type
- filter.From = filterUpdate.From
- filter.To = filterUpdate.To
+ filter.Var1 = filterUpdate.From
+ filter.Var2 = &filterUpdate.To
web.db.Save(&filter)
- c.Redirect(http.StatusSeeOther, fmt.Sprintf("/group/edit/%d", +filter.FilterGroupID))
+ c.Redirect(http.StatusSeeOther, "/group/edit/")
}
func (web Web) deleteFilter(c *gin.Context) {
@@ -138,37 +162,6 @@ func (web Web) deleteFilter(c *gin.Context) {
c.Redirect(http.StatusSeeOther, "/group/edit/"+group_id)
}
-func (web Web) editGroup(c *gin.Context) {
- group_id, err := strconv.ParseUint(c.Param("id"), 10, 64)
- if err != nil {
- c.Redirect(http.StatusSeeOther, "/watch/new")
- return // TODO response
- }
- var group FilterGroup
- web.db.Preload("URL.Watch").Preload("Filters").Preload("URL").First(&group, group_id)
-
- c.HTML(http.StatusOK, "editGroup", gin.H{
- "Group": group,
- "currentResult": getGroupResult(&group),
- })
-}
-
-func (web Web) updateGroup(c *gin.Context) {
- var groupUpdate FilterGroupUpdate
- errMap, err := bindAndValidateGroupUpdate(&groupUpdate, c)
- if err != nil {
- log.Print(err)
- c.HTML(http.StatusBadRequest, "500", errMap)
- return
- }
- var group FilterGroup
- web.db.First(&group, groupUpdate.ID)
- group.Name = groupUpdate.Name
- group.Type = groupUpdate.Type
- web.db.Save(&group)
- c.Redirect(http.StatusSeeOther, fmt.Sprintf("/group/edit/%d", +group.ID))
-}
-
func passiveBot(bot *tgbotapi.BotAPI) {
u := tgbotapi.NewUpdate(0)
u.Timeout = 60
@@ -212,7 +205,35 @@ func main() {
}
db, _ := gorm.Open(sqlite.Open(viper.GetString("database.dsn")))
- db.AutoMigrate(&Watch{}, &URL{}, &FilterGroup{}, &Filter{})
+ db.AutoMigrate(&Watch{}, &Filter{})
+
+ filters := []Filter{}
+ watch := Watch{
+ Name: "LG C2 42",
+ Interval: 60,
+ Filters: filters,
+ }
+ db.Create(&watch)
+
+ urlFilter := Filter{
+ WatchID: watch.ID,
+ ParentID: nil,
+ Parent: nil,
+ Name: "PriceWatch Fetch",
+ Type: "url",
+ Var1: "https://tweakers.net/pricewatch/1799060/lg-c2-42-inch-donkerzilveren-voet-zwart.html",
+ }
+ db.Create(&urlFilter)
+
+ xpathFilter := Filter{
+ WatchID: watch.ID,
+ Watch: watch,
+ ParentID: &urlFilter.ID,
+ Name: "price select",
+ Type: "xpath",
+ Var1: "//td[@class='shop-price']",
+ }
+ db.Create(&xpathFilter)
//bot, _ := tgbotapi.NewBotAPI(viper.GetString("telegram.token"))
@@ -244,10 +265,6 @@ func main() {
router.POST("/watch/create", web.createWatch)
router.POST("/watch/delete", web.deleteWatch)
router.GET("/watch/view/:id/", web.viewWatch)
- router.POST("/url/create/", web.createURL)
- router.POST("/group/create/", web.createFilterGroup)
- router.GET("/group/edit/:id", web.editGroup)
- router.POST("/group/update", web.updateGroup)
router.POST("/filter/create/", web.createFilter)
router.POST("/filter/update/", web.updateFilter)
router.POST("/filter/delete/", web.deleteFilter)
diff --git a/models.go b/models.go
index 7dd06e3..56012c5 100644
--- a/models.go
+++ b/models.go
@@ -8,33 +8,20 @@ type Watch struct {
gorm.Model
Name string `form:"watch_name" yaml:"watch_name" binding:"required" validate:"min=1"`
Interval int `form:"interval" yaml:"interval" binding:"required"`
- URLs []URL
-}
-
-type URL struct {
- gorm.Model
- WatchID uint `form:"url_watch_id" yaml:"url_watch_id" binding:"required"`
- Watch *Watch `form:"watch" yaml:"watch" validate:"omitempty"`
- Name string `form:"url_name" yaml:"url_name" binding:"required" validate:"min=1"`
- URL string `form:"url" yaml:"url" binding:"required,url" validate:"min=1"`
- GroupFilters []FilterGroup
-}
-
-type FilterGroup struct {
- gorm.Model
- URLID uint `form:"group_url_id" yaml:"group_url_id" binding:"required"`
- URL *URL
- Name string `form:"group_name" yaml:"group_name" binding:"required" validate:"min=1"`
- Type string `form:"group_type" yaml:"group_type" binding:"required" validate:"oneof=diff enum number bool"`
- Filters []Filter
+ Filters []Filter
}
type Filter struct {
gorm.Model
- FilterGroupID uint `form:"filter_group_id" yaml:"filter_group_id" binding:"required"`
- FilterGroup *FilterGroup
- Name string `form:"filter_name" yaml:"filter_name" binding:"required" validate:"min=1"`
- Type string `form:"filter_type" yaml:"filter_type" binding:"required" validate:"oneof=xpath json css replace match substring"`
- From string `form:"from" yaml:"from" binding:"required"`
- To string `form:"to" yaml:"to" binding:"required"`
+ WatchID uint `form:"filter_watch_id" yaml:"filter_watch_id" binding:"required"`
+ Watch Watch
+ ParentID *uint `form:"parent_id" yaml:"parent_id"`
+ Parent *Filter `form:"parent_id" yaml:"parent_id"`
+ Name string `form:"filter_name" yaml:"filter_name" binding:"required" validate:"min=1"`
+ Type string `form:"filter_type" yaml:"filter_type" binding:"required" validate:"oneof=url xpath json css replace match substring"`
+ Var1 string `form:"var1" yaml:"var1" binding:"required"`
+ Var2 *string `form:"var2" yaml:"var2"`
+ Var3 *string `form:"var3" yaml:"var3"`
+ Filters []Filter `gorm:"-:all"`
+ results []string `gorm:"-:all"`
}
diff --git a/scraping.go b/scraping.go
index 19234ec..3339250 100644
--- a/scraping.go
+++ b/scraping.go
@@ -2,9 +2,7 @@ package main
import (
"bytes"
- "io/ioutil"
"log"
- "net/http"
"regexp"
"strconv"
"strings"
@@ -15,170 +13,191 @@ import (
"golang.org/x/net/html"
)
-func getGroupResult(group *FilterGroup) []string {
- resp, err := http.Get(group.URL.URL)
- if err != nil {
- log.Print("Something went wrong loading", group.URL.URL)
- return []string{}
+func getFilterResults(filter *Filter) {
+ getFilterResult(filter)
+ for _, filter := range filter.Filters {
+ getFilterResults(&filter)
}
- defer resp.Body.Close()
- html, err := ioutil.ReadAll(resp.Body)
- if err != nil {
- log.Print("Something went wrong loading ", group.URL.URL)
- return []string{}
- }
- resultStrings := []string{string(html)}
- newStrings := []string{}
- for _, filter := range group.Filters {
- for _, resultString := range resultStrings {
- getFilterResult(resultString, &filter, &newStrings)
- }
- resultStrings = newStrings
- newStrings = nil
- }
- return resultStrings
}
-func getFilterResult(s string, filter *Filter, newStrings *[]string) {
+func getFilterResult(filter *Filter) {
switch {
case filter.Type == "xpath":
{
- getFilterResultXPath(s, filter, newStrings)
+ getFilterResultXPath(filter)
}
case filter.Type == "json":
{
- getFilterResultJSON(s, filter, newStrings)
+ getFilterResultJSON(filter)
}
case filter.Type == "css":
{
- getFilterResultCSS(s, filter, newStrings)
+ getFilterResultCSS(filter)
}
case filter.Type == "replace":
{
- getFilterResultReplace(s, filter, newStrings)
+ getFilterResultReplace(filter)
}
case filter.Type == "match":
{
- getFilterResultMatch(s, filter, newStrings)
+ getFilterResultMatch(filter)
}
case filter.Type == "substring":
{
- getFilterResultSubstring(s, filter, newStrings)
+ getFilterResultSubstring(filter)
}
default:
}
}
-func getFilterResultXPath(s string, filter *Filter, newStrings *[]string) {
- doc, err := htmlquery.Parse(strings.NewReader(s))
- if err != nil {
- log.Print(err)
+func getFilterResultXPath(filter *Filter) {
+ if filter.Parent == nil {
+ log.Println("Filter", filter.Name, "called without parent for", filter.Type)
return
}
- nodes, _ := htmlquery.QueryAll(doc, filter.From)
- for _, node := range nodes {
- var b bytes.Buffer
- html.Render(&b, node)
- *newStrings = append(*newStrings, html.UnescapeString(b.String()))
- }
-}
-
-func getFilterResultJSON(s string, filter *Filter, newStrings *[]string) {
-
- for _, result := range gjson.Get(s, filter.From).Array() {
- *newStrings = append(*newStrings, result.String())
- }
-}
-
-func getFilterResultCSS(s string, filter *Filter, newStrings *[]string) {
- doc, err := html.Parse(strings.NewReader(s))
- if err != nil {
- log.Print(err)
- return
- }
- sel, err := cascadia.Parse(filter.From)
- if err != nil {
- log.Print(err)
- return
- }
- for _, node := range cascadia.QueryAll(doc, sel) {
- var b bytes.Buffer
- html.Render(&b, node)
- *newStrings = append(*newStrings, html.UnescapeString(b.String()))
- }
-}
-
-func getFilterResultReplace(s string, filter *Filter, newStrings *[]string) {
- r, err := regexp.Compile(filter.From)
- if err != nil {
- log.Print(err)
- return
- }
- *newStrings = append(*newStrings, r.ReplaceAllString(s, filter.To))
-}
-
-func getFilterResultMatch(s string, filter *Filter, newStrings *[]string) {
- r, err := regexp.Compile(filter.From)
- if err != nil {
- log.Print(err)
- return
- }
- for _, str := range r.FindAllString(s, -1) {
-
- *newStrings = append(*newStrings, str)
- }
-}
-
-func getFilterResultSubstring(s string, filter *Filter, newStrings *[]string) {
- substrings := strings.Split(filter.From, ",")
- var sb strings.Builder
- asRunes := []rune(s)
-
- for _, substring := range substrings {
- if strings.Contains(substring, ":") {
- from_to := strings.Split(substring, ":")
- if len(from_to) != 2 {
- return
- }
- fromStr := from_to[0]
- var hasFrom bool = true
- if fromStr == "" {
- hasFrom = false
- }
- from64, err := strconv.ParseInt(fromStr, 10, 32)
- var from = int(from64)
- if hasFrom && err != nil {
- return
- } else if from < 0 {
- from = len(asRunes) + from
- }
- toStr := from_to[1]
- var hasTo bool = true
- if toStr == "" {
- hasTo = false
- }
- to64, err := strconv.ParseInt(toStr, 10, 32)
- var to = int(to64)
- if hasTo && err != nil {
- return
- } else if to < 0 {
- to = len(asRunes) + to
- }
- if hasFrom && hasTo {
- sb.WriteString(string(asRunes[from:to]))
- } else if hasFrom {
- sb.WriteString(string(asRunes[from:]))
- } else if hasTo {
- sb.WriteString(string(asRunes[:to]))
- }
- } else {
- pos, err := strconv.ParseInt(substring, 10, 32)
- if err != nil || pos < 0 {
- return
- }
- sb.WriteRune(asRunes[pos])
+ for _, result := range filter.Parent.results {
+ doc, err := htmlquery.Parse(strings.NewReader(result))
+ if err != nil {
+ log.Print(err)
+ continue
+ }
+ nodes, _ := htmlquery.QueryAll(doc, filter.Var1)
+ for _, node := range nodes {
+ var b bytes.Buffer
+ html.Render(&b, node)
+ filter.results = append(filter.results, html.UnescapeString(b.String()))
}
}
- *newStrings = append(*newStrings, sb.String())
+}
+
+func getFilterResultJSON(filter *Filter) {
+ if filter.Parent == nil {
+ log.Println("Filter", filter.Name, "called without parent for", filter.Type)
+ return
+ }
+ for _, result := range filter.Parent.results {
+ for _, match := range gjson.Get(result, filter.Var1).Array() {
+ filter.results = append(filter.results, match.String())
+ }
+ }
+}
+
+func getFilterResultCSS(filter *Filter) {
+ if filter.Parent == nil {
+ log.Println("Filter", filter.Name, "called without parent for", filter.Type)
+ return
+ }
+ for _, result := range filter.results {
+ doc, err := html.Parse(strings.NewReader(result))
+ if err != nil {
+ log.Print(err)
+ continue
+ }
+ sel, err := cascadia.Parse(filter.Var1)
+ if err != nil {
+ log.Print(err)
+ continue
+ }
+ for _, node := range cascadia.QueryAll(doc, sel) {
+ var b bytes.Buffer
+ html.Render(&b, node)
+ filter.results = append(filter.results, html.UnescapeString(b.String()))
+ }
+ }
+}
+
+func getFilterResultReplace(filter *Filter) {
+ if filter.Parent == nil {
+ log.Println("Filter", filter.Name, "called without parent for", filter.Type)
+ return
+ }
+ for _, result := range filter.results {
+ r, err := regexp.Compile(filter.Var1)
+ if err != nil {
+ log.Print(err)
+ continue
+ }
+ if filter.Var2 == nil {
+ filter.results = append(filter.results, r.ReplaceAllString(result, ""))
+ } else {
+ filter.results = append(filter.results, r.ReplaceAllString(result, *filter.Var2))
+ }
+ }
+}
+
+func getFilterResultMatch(filter *Filter) {
+ if filter.Parent == nil {
+ log.Println("Filter", filter.Name, "called without parent for", filter.Type)
+ return
+ }
+ for _, result := range filter.results {
+ r, err := regexp.Compile(filter.Var1)
+ if err != nil {
+ log.Print(err)
+ continue
+ }
+ for _, str := range r.FindAllString(result, -1) {
+ filter.results = append(filter.results, str)
+ }
+ }
+}
+
+func getFilterResultSubstring(filter *Filter) {
+ if filter.Parent == nil {
+ log.Println("Filter", filter.Name, "called without parent for", filter.Type)
+ return
+ }
+ for _, result := range filter.results {
+ substrings := strings.Split(filter.Var1, ",")
+ var sb strings.Builder
+ asRunes := []rune(result)
+
+ for _, substring := range substrings {
+ if strings.Contains(substring, ":") {
+ from_to := strings.Split(substring, ":")
+ if len(from_to) != 2 {
+ return
+ }
+ fromStr := from_to[0]
+ var hasFrom bool = true
+ if fromStr == "" {
+ hasFrom = false
+ }
+ from64, err := strconv.ParseInt(fromStr, 10, 32)
+ var from = int(from64)
+ if hasFrom && err != nil {
+ return
+ } else if from < 0 {
+ from = len(asRunes) + from
+ }
+ toStr := from_to[1]
+ var hasTo bool = true
+ if toStr == "" {
+ hasTo = false
+ }
+ to64, err := strconv.ParseInt(toStr, 10, 32)
+ var to = int(to64)
+ if hasTo && err != nil {
+ return
+ } else if to < 0 {
+ to = len(asRunes) + to
+ }
+ if hasFrom && hasTo {
+ sb.WriteString(string(asRunes[from:to]))
+ } else if hasFrom {
+ sb.WriteString(string(asRunes[from:]))
+ } else if hasTo {
+ sb.WriteString(string(asRunes[:to]))
+ }
+ } else {
+ pos, err := strconv.ParseInt(substring, 10, 32)
+ if err != nil || pos < 0 {
+ return
+ }
+ sb.WriteRune(asRunes[pos])
+ }
+ }
+ filter.results = append(filter.results, sb.String())
+ }
}
diff --git a/scraping_test.go b/scraping_test.go
index 0ea4c8f..6e4e92b 100644
--- a/scraping_test.go
+++ b/scraping_test.go
@@ -57,11 +57,9 @@ func TestFilterXPath(t *testing.T) {
t.Run(testname, func(t *testing.T) {
want := []string{}
getFilterResultXPath(
- HTML_STRING,
&Filter{
- From: test.Query,
+ Var1: test.Query,
},
- &want,
)
if !reflect.DeepEqual(test.Want, want) {
t.Errorf("Got %s, want %s", want, test.Want)
@@ -86,11 +84,9 @@ func TestFilterJSON(t *testing.T) {
t.Run(testname, func(t *testing.T) {
want := []string{}
getFilterResultJSON(
- JSON_STRING,
&Filter{
- From: test.Query,
+ Var1: test.Query,
},
- &want,
)
if !reflect.DeepEqual(test.Want, want) {
t.Errorf("Got %s, want %s", want, test.Want)
@@ -116,11 +112,9 @@ func TestFilterCSS(t *testing.T) {
t.Run(testname, func(t *testing.T) {
want := []string{}
getFilterResultCSS(
- HTML_STRING,
&Filter{
- From: test.Query,
+ Var1: test.Query,
},
- &want,
)
if !reflect.DeepEqual(test.Want, want) {
t.Errorf("Got %s, want %s", want, test.Want)
@@ -149,11 +143,9 @@ func TestFilterReplace(t *testing.T) {
t.Run(testname, func(t *testing.T) {
want := []string{test.Want}
getFilterResultReplace(
- test.Input,
&Filter{
- From: test.Query,
+ Var1: test.Query,
},
- &want,
)
if want[0] != test.Want {
t.Errorf("Got %s, want %s", want[0], test.Want)
@@ -180,11 +172,9 @@ func TestFilterMatch(t *testing.T) {
t.Run(testname, func(t *testing.T) {
want := []string{}
getFilterResultMatch(
- test.Input,
&Filter{
- From: test.Query,
+ Var1: test.Query,
},
- &want,
)
if !reflect.DeepEqual(test.Want, want) {
t.Errorf("Got %s, want %s", want, test.Want)
@@ -229,11 +219,9 @@ func TestFilterSubstring(t *testing.T) {
t.Run(testname, func(t *testing.T) {
want := []string{test.Want}
getFilterResultSubstring(
- test.Input,
&Filter{
- From: test.Query,
+ Var1: test.Query,
},
- &want,
)
if want[0] != test.Want {
t.Errorf("Got %s, want %s", want[0], test.Want)
diff --git a/templates/viewWatch.html b/templates/viewWatch.html
index dad4119..7faeb30 100644
--- a/templates/viewWatch.html
+++ b/templates/viewWatch.html
@@ -2,103 +2,23 @@
- {{ .Name }}
+ {{ .Watch.Name }}
- {{ $.Interval }}
+ {{ .Watch.Interval }}
-{{ range .URLs }}
-
-
-
-
- {{ range .GroupFilters }}
-
-
-
-
-
-
-
- {{ if .Filters }}
- {{ range .Filters }}
-
- {{ .Name }} |
- {{ .Type }} |
- {{ .From }} |
- {{ .To }} |
- |
-
- {{ end }}
- {{ else }}
-
- No filters yet, click "Edit" to add |
-
- {{ end }}
-
-
-
-
- {{ end }}
-
-
-
-{{ end }}
-
+
+
+{{ range .Filters }}
+
+ {{ .Depth }} |
+ {{ .Filter.ID }} |
+ {{ .Filter.ParentID }} |
+ {{ .Filter.Name }} |
+
+{{ end }}
+
{{ end }}
diff --git a/util.go b/util.go
index b54906e..0c7a19f 100644
--- a/util.go
+++ b/util.go
@@ -12,16 +12,6 @@ func bindAndValidateWatch(watch *Watch, c *gin.Context) (map[string]string, erro
return validate(err), err
}
-func bindAndValidateURL(url *URL, c *gin.Context) (map[string]string, error) {
- err := c.ShouldBind(url)
- return validate(err), err
-}
-
-func bindAndValidateGroup(group *FilterGroup, c *gin.Context) (map[string]string, error) {
- err := c.ShouldBind(group)
- return validate(err), err
-}
-
func bindAndValidateFilter(filter *Filter, c *gin.Context) (map[string]string, error) {
err := c.ShouldBind(filter)
return validate(err), err