From 242b10e21b811e39e5b2f1062c2ba3307f24eaf6 Mon Sep 17 00:00:00 2001 From: BroodjeAap Date: Mon, 1 Aug 2022 19:40:11 +0000 Subject: [PATCH] converted to FilterGroups with filters per URL --- forms.go | 9 +- main.go | 55 +++++----- models.go | 31 +++--- scraping.go | 63 ++++++++---- templates/{editQuery.html => editGroup.html} | 103 +++++++++++++------ templates/viewWatch.html | 37 +++---- util.go | 8 +- 7 files changed, 183 insertions(+), 123 deletions(-) rename templates/{editQuery.html => editGroup.html} (58%) diff --git a/forms.go b/forms.go index 046832c..75db0ab 100644 --- a/forms.go +++ b/forms.go @@ -1,8 +1,7 @@ package main -type QueryUpdate struct { - ID uint `form:"query_id" binding:"required"` - Name string `form:"query_name" binding:"required" validate:"min=1"` - Type string `form:"query_type" binding:"required" validate:"oneof=css xpath regex json"` - Query string `form:"query" binding:"required"` +type FilterGroupUpdate struct { + ID uint `form:"group_id" binding:"required"` + Name string `form:"group_name" binding:"required" validate:"min=1"` + Type string `form:"group_type" binding:"required" validate:"oneof=diff enum number"` } diff --git a/main.go b/main.go index 6b976ff..32cc32c 100644 --- a/main.go +++ b/main.go @@ -63,7 +63,7 @@ func (web Web) viewWatch(c *gin.Context) { id := c.Param("id") var watch Watch - web.db.Model(&Watch{}).Preload("URLs.Queries.Filters").First(&watch, id) + web.db.Model(&Watch{}).Preload("URLs.GroupFilters.Filters").First(&watch, id) c.HTML(http.StatusOK, "viewWatch", watch) } @@ -79,20 +79,20 @@ func (web Web) createURL(c *gin.Context) { c.Redirect(http.StatusSeeOther, fmt.Sprintf("/watch/view/%d", url.WatchID)) } -func (web Web) createQuery(c *gin.Context) { +func (web Web) createFilterGroup(c *gin.Context) { watch_id, err := strconv.ParseUint(c.PostForm("w_id"), 10, 64) if err != nil { log.Print(err) c.HTML(http.StatusInternalServerError, "500", gin.H{}) return } - var query Query - errMap, err := bindAndValidateQuery(&query, c) + var group FilterGroup + errMap, err := bindAndValidateGroup(&group, c) if err != nil { c.HTML(http.StatusBadRequest, "500", errMap) return } - web.db.Create(&query) + web.db.Create(&group) c.Redirect(http.StatusSeeOther, fmt.Sprintf("/watch/view/%d", watch_id)) } @@ -105,39 +105,38 @@ func (web Web) createFilter(c *gin.Context) { return } web.db.Create(&filter) - c.Redirect(http.StatusSeeOther, fmt.Sprintf("/query/edit/%d", filter.QueryID)) + c.Redirect(http.StatusSeeOther, fmt.Sprintf("/group/edit/%d", filter.FilterGroupID)) } -func (web Web) editQuery(c *gin.Context) { - query_id, err := strconv.ParseUint(c.Param("id"), 10, 64) +func (web Web) editGroup(c *gin.Context) { + group_id, err := strconv.ParseUint(c.Param("id"), 10, 64) if err != nil { c.Redirect(http.StatusSeeOther, "/watch/new") return // TODO response } - var query Query - web.db.Preload("URL.Watch").Preload("Filters").Preload("URL").First(&query, query_id) + var group FilterGroup + web.db.Preload("URL.Watch").Preload("Filters").Preload("URL").First(&group, group_id) - c.HTML(http.StatusOK, "editQuery", gin.H{ - "Query": query, - "currentResult": getQueryResult(&query), + c.HTML(http.StatusOK, "editGroup", gin.H{ + "Group": group, + "currentResult": getGroupResult(&group), }) } -func (web Web) updateQuery(c *gin.Context) { - var queryUpdate QueryUpdate - errMap, err := bindAndValidateQueryUpdate(&queryUpdate, c) +func (web Web) updateGroup(c *gin.Context) { + var groupUpdate FilterGroupUpdate + errMap, err := bindAndValidateGroupUpdate(&groupUpdate, c) if err != nil { log.Print(err) c.HTML(http.StatusBadRequest, "500", errMap) return } - var query Query - web.db.First(&query, queryUpdate.ID) - query.Name = queryUpdate.Name - query.Type = queryUpdate.Type - query.Query = queryUpdate.Query - web.db.Save(&query) - c.Redirect(http.StatusSeeOther, fmt.Sprintf("/query/edit/%d", +query.ID)) + var group FilterGroup + web.db.First(&group, groupUpdate.ID) + group.Name = groupUpdate.Name + group.Type = groupUpdate.Type + web.db.Save(&group) + c.Redirect(http.StatusSeeOther, fmt.Sprintf("/group/edit/%d", +group.ID)) } func passiveBot(bot *tgbotapi.BotAPI) { @@ -183,7 +182,7 @@ func main() { } db, _ := gorm.Open(sqlite.Open(viper.GetString("database.dsn"))) - db.AutoMigrate(&Watch{}, &URL{}, &Query{}, &Filter{}) + db.AutoMigrate(&Watch{}, &URL{}, &FilterGroup{}, &Filter{}) //bot, _ := tgbotapi.NewBotAPI(viper.GetString("telegram.token")) @@ -205,7 +204,7 @@ func main() { templates.AddFromFiles("index", "templates/base.html", "templates/index.html") templates.AddFromFiles("newWatch", "templates/base.html", "templates/newWatch.html") templates.AddFromFiles("viewWatch", "templates/base.html", "templates/viewWatch.html") - templates.AddFromFiles("editQuery", "templates/base.html", "templates/editQuery.html") + templates.AddFromFiles("editGroup", "templates/base.html", "templates/editGroup.html") templates.AddFromFiles("500", "templates/base.html", "templates/500.html") router.HTMLRender = templates @@ -216,9 +215,9 @@ func main() { router.POST("/watch/delete", web.deleteWatch) router.GET("/watch/view/:id/", web.viewWatch) router.POST("/url/create/", web.createURL) - router.POST("/query/create/", web.createQuery) - router.GET("/query/edit/:id", web.editQuery) - router.POST("/query/update", web.updateQuery) + router.POST("/group/create/", web.createFilterGroup) + router.GET("/group/edit/:id", web.editGroup) + router.POST("/group/update", web.updateGroup) router.POST("/filter/create/", web.createFilter) router.Run("0.0.0.0:8080") diff --git a/models.go b/models.go index b625276..acde5de 100644 --- a/models.go +++ b/models.go @@ -13,29 +13,28 @@ type Watch struct { type URL struct { gorm.Model - WatchID uint `form:"url_watch_id" yaml:"url_watch_id" binding:"required"` - Watch *Watch `form:"watch" yaml:"watch" validate:"omitempty"` - Name string `form:"url_name" yaml:"url_name" binding:"required" validate:"min=1"` - URL string `form:"url" yaml:"url" binding:"required,url" validate:"min=1"` - Queries []Query + WatchID uint `form:"url_watch_id" yaml:"url_watch_id" binding:"required"` + Watch *Watch `form:"watch" yaml:"watch" validate:"omitempty"` + Name string `form:"url_name" yaml:"url_name" binding:"required" validate:"min=1"` + URL string `form:"url" yaml:"url" binding:"required,url" validate:"min=1"` + GroupFilters []FilterGroup } -type Query struct { +type FilterGroup struct { gorm.Model - URLID uint `form:"query_url_id" yaml:"query_url_id" binding:"required"` + URLID uint `form:"group_url_id" yaml:"group_url_id" binding:"required"` URL *URL - Name string `form:"query_name" yaml:"query_name" binding:"required" validate:"min=1"` - Type string `form:"query_type" yaml:"query_type" binding:"required" validate:"oneof=css xpath regex json"` - Query string `form:"query" yaml:"query" binding:"required"` + Name string `form:"group_name" yaml:"group_name" binding:"required" validate:"min=1"` + Type string `form:"group_type" yaml:"group_type" binding:"required" validate:"oneof=diff enum number bool"` Filters []Filter } type Filter struct { gorm.Model - QueryID uint `form:"filter_query_id" yaml:"filter_query_id" binding:"required"` - Query *Query - Name string `form:"filter_name" yaml:"filter_name" binding:"required" validate:"min=1"` - Type string `form:"filter_type" yaml:"filter_type" binding:"required" validate:"oneof=replace regex substring"` - From string `form:"from" yaml:"from" binding:"required"` - To string `form:"to" yaml:"to" binding:"required"` + FilterGroupID uint `form:"filter_group_id" yaml:"filter_group_id" binding:"required"` + FilterGroup *FilterGroup + Name string `form:"filter_name" yaml:"filter_name" binding:"required" validate:"min=1"` + Type string `form:"filter_type" yaml:"filter_type" binding:"required" validate:"oneof=replace regex substring"` + From string `form:"from" yaml:"from" binding:"required"` + To string `form:"to" yaml:"to" binding:"required"` } diff --git a/scraping.go b/scraping.go index bf00f2e..63b4cd0 100644 --- a/scraping.go +++ b/scraping.go @@ -2,7 +2,9 @@ package main import ( "bytes" + "io/ioutil" "log" + "net/http" "regexp" "strconv" "strings" @@ -11,43 +13,68 @@ import ( "golang.org/x/net/html" ) -func getQueryResult(query *Query) []string { - doc, err := htmlquery.LoadURL(query.URL.URL) +func getGroupResult(group *FilterGroup) []string { + resp, err := http.Get(group.URL.URL) if err != nil { - log.Print("Something went wrong loading loading", query.URL.URL) + log.Print("Something went wrong loading", group.URL.URL) return []string{} } - nodes, _ := htmlquery.QueryAll(doc, query.Query) - nodeStrings := make([]string, len(nodes)) - for i, node := range nodes { - var b bytes.Buffer - html.Render(&b, node) - nodeStrings[i] = html.UnescapeString(b.String()) + defer resp.Body.Close() + html, err := ioutil.ReadAll(resp.Body) + if err != nil { + log.Print("Something went wrong loading ", group.URL.URL) + return []string{} } - for _, filter := range query.Filters { - for i, nodeString := range nodeStrings { - nodeStrings[i] = getFilterResult(nodeString, &filter) + resultStrings := []string{string(html)} + newStrings := []string{} + for _, filter := range group.Filters { + for _, resultString := range resultStrings { + getFilterResult(resultString, &filter, &newStrings) } + resultStrings = newStrings + log.Println(resultStrings) } - return nodeStrings + return resultStrings } -func getFilterResult(s string, filter *Filter) string { +func getFilterResult(s string, filter *Filter, newStrings *[]string) { switch { + case filter.Type == "css": + { + //getFilterResultReplace(s, filter, newStrings) + } + case filter.Type == "xpath": + { + getFilterResultXPath(s, filter, newStrings) + } case filter.Type == "replace": { - return getFilterResultReplace(s, filter) + //getFilterResultReplace(s, filter, newStrings) } case filter.Type == "regex": { - return getFilterResultRegex(s, filter) + //getFilterResultRegex(s, filter, newStrings) } case filter.Type == "substring": { - return getFilterResultSubstring(s, filter) + //getFilterResultSubstring(s, filter, newStrings) } default: - return s + + } +} + +func getFilterResultXPath(s string, filter *Filter, newStrings *[]string) { + doc, err := htmlquery.Parse(strings.NewReader(s)) + if err != nil { + log.Print(err) + return + } + nodes, _ := htmlquery.QueryAll(doc, filter.From) + for _, node := range nodes { + var b bytes.Buffer + html.Render(&b, node) + *newStrings = append(*newStrings, html.UnescapeString(b.String())) } } diff --git a/templates/editQuery.html b/templates/editGroup.html similarity index 58% rename from templates/editQuery.html rename to templates/editGroup.html index ca6cb8b..211cc22 100644 --- a/templates/editQuery.html +++ b/templates/editGroup.html @@ -1,19 +1,16 @@ {{define "content"}}
-
{{ .Query.URL.Watch.Name }} - {{ .Query.Name }}
-
{{ .Query.URL.URL }}
+
{{ .Group.URL.Watch.Name }} - {{ .Group.Name }}
+
{{ .Group.URL.URL }}
- {{ .Query.Type }} -
-
- {{ .Query.Query }} + {{ .Group.Type }}
-
@@ -22,17 +19,22 @@
- + - {{ range .Query.Filters }} + {{ range .Group.Filters }} - + {{ end }} @@ -68,31 +70,28 @@ {{ end }} -
{{ .Name }} {{ .Type }} {{ .From }} {{ .To }} + + + + +
@@ -53,34 +52,28 @@
-
{{ end }} - - + + - diff --git a/util.go b/util.go index b89479d..8b5abc6 100644 --- a/util.go +++ b/util.go @@ -17,8 +17,8 @@ func bindAndValidateURL(url *URL, c *gin.Context) (map[string]string, error) { return validate(err), err } -func bindAndValidateQuery(query *Query, c *gin.Context) (map[string]string, error) { - err := c.ShouldBind(query) +func bindAndValidateGroup(group *FilterGroup, c *gin.Context) (map[string]string, error) { + err := c.ShouldBind(group) return validate(err), err } @@ -27,8 +27,8 @@ func bindAndValidateFilter(filter *Filter, c *gin.Context) (map[string]string, e return validate(err), err } -func bindAndValidateQueryUpdate(query *QueryUpdate, c *gin.Context) (map[string]string, error) { - err := c.ShouldBind(query) +func bindAndValidateGroupUpdate(group *FilterGroupUpdate, c *gin.Context) (map[string]string, error) { + err := c.ShouldBind(group) return validate(err), err }
- + - + + + + + - - - +