diff --git a/go.mod b/go.mod index ce4ab86..3712119 100644 --- a/go.mod +++ b/go.mod @@ -3,21 +3,25 @@ module broodjeaap.net/go-watch-and-tel go 1.18 require ( + github.com/antchfx/htmlquery v1.2.5 github.com/gin-contrib/multitemplate v0.0.0-20220705015713-e21a0ba39de3 github.com/gin-gonic/gin v1.8.1 github.com/go-telegram-bot-api/telegram-bot-api/v5 v5.5.1 github.com/spf13/viper v1.12.0 + golang.org/x/net v0.0.0-20220722155237-a158d28d115b gorm.io/driver/sqlite v1.3.6 gorm.io/gorm v1.23.8 ) require ( + github.com/antchfx/xpath v1.2.1 // indirect github.com/fsnotify/fsnotify v1.5.4 // indirect github.com/gin-contrib/sse v0.1.0 // indirect github.com/go-playground/locales v0.14.0 // indirect github.com/go-playground/universal-translator v0.18.0 // indirect github.com/go-playground/validator/v10 v10.11.0 // indirect github.com/goccy/go-json v0.9.10 // indirect + github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/hashicorp/hcl v1.0.0 // indirect github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/now v1.1.5 // indirect @@ -38,7 +42,6 @@ require ( github.com/subosito/gotenv v1.3.0 // indirect github.com/ugorji/go/codec v1.2.7 // indirect golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d // indirect - golang.org/x/net v0.0.0-20220708220712-1185a9018129 // indirect golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 // indirect golang.org/x/text v0.3.7 // indirect google.golang.org/protobuf v1.28.0 // indirect diff --git a/go.sum b/go.sum index f94d887..0eb07b4 100644 --- a/go.sum +++ b/go.sum @@ -38,6 +38,10 @@ cloud.google.com/go/storage v1.14.0/go.mod h1:GrKmX003DSIwi9o29oFT7YDnHYwZoctc3f dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/antchfx/htmlquery v1.2.5 h1:1lXnx46/1wtv1E/kzmH8vrfMuUKYgkdDBA9pIdMJnk4= +github.com/antchfx/htmlquery v1.2.5/go.mod h1:2MCVBzYVafPBmKbrmwB9F5xdd+IEgRY61ci2oOsOQVw= +github.com/antchfx/xpath v1.2.1 h1:qhp4EW6aCOVr5XIkT+l6LJ9ck/JsUH/yyauNgTQkBF8= +github.com/antchfx/xpath v1.2.1/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= @@ -86,6 +90,8 @@ github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfU github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y= @@ -296,6 +302,7 @@ golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= @@ -309,8 +316,8 @@ golang.org/x/net v0.0.0-20201209123823-ac852fbbde11/go.mod h1:m0MpNAwzfU5UDzcl9v golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.0.0-20220708220712-1185a9018129 h1:vucSRfWwTsoXro7P+3Cjlr6flUMtzCwzlvkxEQtHHB0= -golang.org/x/net v0.0.0-20220708220712-1185a9018129/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b h1:PxfKdU9lEEDYjdIzOtC4qFWgkU2rGHdKlKowJSMN9h0= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= diff --git a/main.go b/main.go index d4c0558..647157d 100644 --- a/main.go +++ b/main.go @@ -67,7 +67,7 @@ func (web Web) viewWatch(c *gin.Context) { id := c.Param("id") var watch Watch - web.db.Model(&Watch{}).Preload("URLs.Queries").First(&watch, id) + web.db.Model(&Watch{}).Preload("URLs.Queries.Filters").First(&watch, id) c.HTML(http.StatusOK, "viewWatch", watch) } @@ -113,13 +113,13 @@ func (web Web) createQuery(c *gin.Context) { c.Redirect(http.StatusSeeOther, "/watch/new") return } - query := c.PostForm("query") - if query == "" { + typ := c.PostForm("type") + if typ == "" { c.Redirect(http.StatusSeeOther, "/watch/new") return } - typ := c.PostForm("type") - if typ == "" { + query := c.PostForm("query") + if query == "" { c.Redirect(http.StatusSeeOther, "/watch/new") return } @@ -134,6 +134,59 @@ func (web Web) createQuery(c *gin.Context) { c.Redirect(http.StatusSeeOther, fmt.Sprintf("/watch/view/%d", watch_id)) } +func (web Web) createFilter(c *gin.Context) { + query_id, err := strconv.ParseUint(c.PostForm("query_id"), 10, 64) + if err != nil { + log.Print(err) + c.Redirect(http.StatusSeeOther, "/watch/new") + return // TODO response + } + name := c.PostForm("name") + if name == "" { + log.Print(name) + c.Redirect(http.StatusSeeOther, "/watch/new") + return + } + typ := c.PostForm("type") + if typ == "" { + log.Print(typ) + c.Redirect(http.StatusSeeOther, "/watch/new") + return + } + from := c.PostForm("from") + if from == "" { + log.Print(from) + c.Redirect(http.StatusSeeOther, "/watch/new") + return + } + to := c.PostForm("to") + log.Print("To:", to) + filter_model := &Filter{ + QueryID: uint(query_id), + Name: name, + Type: typ, + From: from, + To: to, + } + web.db.Create(filter_model) + c.Redirect(http.StatusSeeOther, fmt.Sprintf("/query/edit/%d", query_id)) +} + +func (web Web) editQuery(c *gin.Context) { + query_id, err := strconv.ParseUint(c.Param("id"), 10, 64) + if err != nil { + c.Redirect(http.StatusSeeOther, "/watch/new") + return // TODO response + } + var query Query + web.db.Preload("URL.Watch").Preload("Filters").Preload("URL").First(&query, query_id) + + c.HTML(http.StatusOK, "editQuery", gin.H{ + "Query": query, + "currentResult": getQueryResult(&query), + }) +} + func passiveBot(bot *tgbotapi.BotAPI) { u := tgbotapi.NewUpdate(0) u.Timeout = 60 @@ -177,7 +230,7 @@ func main() { } db, _ := gorm.Open(sqlite.Open(viper.GetString("database.dsn"))) - db.AutoMigrate(&Watch{}, &URL{}, &Query{}) + db.AutoMigrate(&Watch{}, &URL{}, &Query{}, &Filter{}) //bot, _ := tgbotapi.NewBotAPI(viper.GetString("telegram.token")) @@ -199,6 +252,7 @@ func main() { templates.AddFromFiles("index", "templates/base.html", "templates/index.html") templates.AddFromFiles("newWatch", "templates/base.html", "templates/newWatch.html") templates.AddFromFiles("viewWatch", "templates/base.html", "templates/viewWatch.html") + templates.AddFromFiles("editQuery", "templates/base.html", "templates/editQuery.html") router.HTMLRender = templates router.GET("/", web.index) @@ -208,6 +262,8 @@ func main() { router.GET("/watch/view/:id/", web.viewWatch) router.POST("/url/create/", web.createURL) router.POST("/query/create/", web.createQuery) + router.GET("/query/edit/:id", web.editQuery) + router.POST("/filter/create/", web.createFilter) router.Run("0.0.0.0:8080") } diff --git a/models.go b/models.go index d1d7551..1ca9e9f 100644 --- a/models.go +++ b/models.go @@ -14,6 +14,7 @@ type Watch struct { type URL struct { gorm.Model WatchID uint + Watch Watch Name string URL string Queries []Query @@ -21,8 +22,20 @@ type URL struct { type Query struct { gorm.Model - URLID uint - Name string - Type string - Query string + URLID uint + URL URL + Name string + Type string + Query string + Filters []Filter +} + +type Filter struct { + gorm.Model + QueryID uint + Query Query + Name string + Type string + From string + To string } diff --git a/scraping.go b/scraping.go new file mode 100644 index 0000000..3a98d23 --- /dev/null +++ b/scraping.go @@ -0,0 +1,94 @@ +package main + +import ( + "bytes" + "log" + "regexp" + "strconv" + "strings" + + "github.com/antchfx/htmlquery" + "golang.org/x/net/html" +) + +func getQueryResult(query *Query) []string { + doc, err := htmlquery.LoadURL(query.URL.URL) + if err != nil { + log.Print("Something went wrong loading loading", query.URL.URL) + } + nodes, _ := htmlquery.QueryAll(doc, query.Query) + nodeStrings := make([]string, len(nodes)) + for i, node := range nodes { + var b bytes.Buffer + html.Render(&b, node) + nodeStrings[i] = b.String() + } + for _, filter := range query.Filters { + for i, nodeString := range nodeStrings { + nodeStrings[i] = getFilterResult(nodeString, &filter) + } + } + return nodeStrings +} + +func getFilterResult(s string, filter *Filter) string { + switch { + case filter.Type == "replace": + { + return getFilterResultReplace(s, filter) + } + case filter.Type == "regex": + { + return getFilterResultRegex(s, filter) + } + case filter.Type == "substring": + { + return getFilterResultSubstring(s, filter) + } + default: + return s + } +} + +func getFilterResultReplace(s string, filter *Filter) string { + return strings.ReplaceAll(s, filter.From, filter.To) +} + +func getFilterResultRegex(s string, filter *Filter) string { + regex, err := regexp.Compile(filter.From) + if err != nil { + return s + } + return regex.ReplaceAllString(s, filter.To) +} + +func getFilterResultSubstring(s string, filter *Filter) string { + substrings := strings.Split(filter.From, ",") + var sb strings.Builder + asRunes := []rune(s) + + for _, substring := range substrings { + if strings.Contains(substring, "-") { + from_to := strings.Split(substring, "-") + if len(from_to) != 2 { + return s + } + from, err := strconv.ParseInt(from_to[0], 10, 32) + if err != nil { + return s + } + to, err := strconv.ParseInt(from_to[1], 10, 32) + if err != nil { + return s + } + sb.WriteString(string(asRunes[from:to])) + } else { + pos, err := strconv.ParseInt(substring, 10, 32) + if err != nil { + return s + } + sb.WriteRune(asRunes[pos]) + } + } + return sb.String() +} diff --git a/templates/editQuery.html b/templates/editQuery.html new file mode 100644 index 0000000..256f8ab --- /dev/null +++ b/templates/editQuery.html @@ -0,0 +1,174 @@ +{{define "content"}} +
+ {{ . }}
+
+