git stash for recursive filter stuff that might be over complicated

This commit is contained in:
BroodjeAap 2022-08-25 17:09:42 +00:00
parent 972a4b0447
commit 62bd66f771
6 changed files with 271 additions and 350 deletions

151
main.go
View file

@ -1,6 +1,7 @@
package main
import (
"encoding/json"
"fmt"
"log"
"net/http"
@ -59,41 +60,64 @@ func (web Web) deleteWatch(c *gin.Context) {
c.Redirect(http.StatusSeeOther, "/")
}
type FilterDepth struct {
Filter Filter
Depth int
}
func (web Web) viewWatch(c *gin.Context) {
id := c.Param("id")
var watch Watch
web.db.Model(&Watch{}).Preload("URLs.GroupFilters.Filters").First(&watch, id)
c.HTML(http.StatusOK, "viewWatch", watch)
}
web.db.Model(&Watch{}).First(&watch, id)
func (web Web) createURL(c *gin.Context) {
var url URL
errMap, err := bindAndValidateURL(&url, c)
if err != nil {
log.Print(err)
c.HTML(http.StatusInternalServerError, "500", errMap)
return
}
web.db.Create(&url)
c.Redirect(http.StatusSeeOther, fmt.Sprintf("/watch/view/%d", url.WatchID))
}
var filters []Filter
web.db.Model(&Filter{}).Find(&filters)
func (web Web) createFilterGroup(c *gin.Context) {
watch_id, err := strconv.ParseUint(c.PostForm("w_id"), 10, 64)
if err != nil {
log.Print(err)
c.HTML(http.StatusInternalServerError, "500", gin.H{})
return
queuedFilters := []*Filter{}
filterMap := make(map[uint]*Filter)
for _, filter := range filters {
filterMap[filter.ID] = &filter
if filter.ParentID == nil {
queuedFilters = append(queuedFilters, &filter)
}
s, _ := json.MarshalIndent(filter, "", "\t")
fmt.Println(s)
}
var group FilterGroup
errMap, err := bindAndValidateGroup(&group, c)
if err != nil {
c.HTML(http.StatusBadRequest, "500", errMap)
return
for _, filter := range filterMap {
if filter.Parent != nil {
parent := filterMap[*filter.ParentID]
parent.Filters = append(parent.Filters, *filter)
}
}
web.db.Create(&group)
c.Redirect(http.StatusSeeOther, fmt.Sprintf("/watch/view/%d", watch_id))
nextFilters := []*Filter{}
bftFilters := []FilterDepth{}
depth := 0
for len(queuedFilters) > 0 {
for _, f1 := range queuedFilters {
bftFilters = append(bftFilters, FilterDepth{
Filter: *f1,
Depth: depth,
})
for _, f2 := range f1.Filters {
nextFilters = append(nextFilters, &f2)
}
}
log.Println(nextFilters)
queuedFilters = nextFilters
log.Println(queuedFilters)
nextFilters = []*Filter{}
log.Println(nextFilters)
depth += 1
}
c.HTML(http.StatusOK, "viewWatch", gin.H{
"Watch": watch,
"Filters": bftFilters,
"MaxDepth": depth,
})
}
func (web Web) createFilter(c *gin.Context) {
@ -105,7 +129,7 @@ func (web Web) createFilter(c *gin.Context) {
return
}
web.db.Create(&filter)
c.Redirect(http.StatusSeeOther, fmt.Sprintf("/group/edit/%d", filter.FilterGroupID))
c.Redirect(http.StatusSeeOther, "/group/edit")
}
func (web Web) updateFilter(c *gin.Context) {
@ -120,10 +144,10 @@ func (web Web) updateFilter(c *gin.Context) {
web.db.First(&filter, filterUpdate.ID)
filter.Name = filterUpdate.Name
filter.Type = filterUpdate.Type
filter.From = filterUpdate.From
filter.To = filterUpdate.To
filter.Var1 = filterUpdate.From
filter.Var2 = &filterUpdate.To
web.db.Save(&filter)
c.Redirect(http.StatusSeeOther, fmt.Sprintf("/group/edit/%d", +filter.FilterGroupID))
c.Redirect(http.StatusSeeOther, "/group/edit/")
}
func (web Web) deleteFilter(c *gin.Context) {
@ -138,37 +162,6 @@ func (web Web) deleteFilter(c *gin.Context) {
c.Redirect(http.StatusSeeOther, "/group/edit/"+group_id)
}
func (web Web) editGroup(c *gin.Context) {
group_id, err := strconv.ParseUint(c.Param("id"), 10, 64)
if err != nil {
c.Redirect(http.StatusSeeOther, "/watch/new")
return // TODO response
}
var group FilterGroup
web.db.Preload("URL.Watch").Preload("Filters").Preload("URL").First(&group, group_id)
c.HTML(http.StatusOK, "editGroup", gin.H{
"Group": group,
"currentResult": getGroupResult(&group),
})
}
func (web Web) updateGroup(c *gin.Context) {
var groupUpdate FilterGroupUpdate
errMap, err := bindAndValidateGroupUpdate(&groupUpdate, c)
if err != nil {
log.Print(err)
c.HTML(http.StatusBadRequest, "500", errMap)
return
}
var group FilterGroup
web.db.First(&group, groupUpdate.ID)
group.Name = groupUpdate.Name
group.Type = groupUpdate.Type
web.db.Save(&group)
c.Redirect(http.StatusSeeOther, fmt.Sprintf("/group/edit/%d", +group.ID))
}
func passiveBot(bot *tgbotapi.BotAPI) {
u := tgbotapi.NewUpdate(0)
u.Timeout = 60
@ -212,7 +205,35 @@ func main() {
}
db, _ := gorm.Open(sqlite.Open(viper.GetString("database.dsn")))
db.AutoMigrate(&Watch{}, &URL{}, &FilterGroup{}, &Filter{})
db.AutoMigrate(&Watch{}, &Filter{})
filters := []Filter{}
watch := Watch{
Name: "LG C2 42",
Interval: 60,
Filters: filters,
}
db.Create(&watch)
urlFilter := Filter{
WatchID: watch.ID,
ParentID: nil,
Parent: nil,
Name: "PriceWatch Fetch",
Type: "url",
Var1: "https://tweakers.net/pricewatch/1799060/lg-c2-42-inch-donkerzilveren-voet-zwart.html",
}
db.Create(&urlFilter)
xpathFilter := Filter{
WatchID: watch.ID,
Watch: watch,
ParentID: &urlFilter.ID,
Name: "price select",
Type: "xpath",
Var1: "//td[@class='shop-price']",
}
db.Create(&xpathFilter)
//bot, _ := tgbotapi.NewBotAPI(viper.GetString("telegram.token"))
@ -244,10 +265,6 @@ func main() {
router.POST("/watch/create", web.createWatch)
router.POST("/watch/delete", web.deleteWatch)
router.GET("/watch/view/:id/", web.viewWatch)
router.POST("/url/create/", web.createURL)
router.POST("/group/create/", web.createFilterGroup)
router.GET("/group/edit/:id", web.editGroup)
router.POST("/group/update", web.updateGroup)
router.POST("/filter/create/", web.createFilter)
router.POST("/filter/update/", web.updateFilter)
router.POST("/filter/delete/", web.deleteFilter)

View file

@ -8,33 +8,20 @@ type Watch struct {
gorm.Model
Name string `form:"watch_name" yaml:"watch_name" binding:"required" validate:"min=1"`
Interval int `form:"interval" yaml:"interval" binding:"required"`
URLs []URL
}
type URL struct {
gorm.Model
WatchID uint `form:"url_watch_id" yaml:"url_watch_id" binding:"required"`
Watch *Watch `form:"watch" yaml:"watch" validate:"omitempty"`
Name string `form:"url_name" yaml:"url_name" binding:"required" validate:"min=1"`
URL string `form:"url" yaml:"url" binding:"required,url" validate:"min=1"`
GroupFilters []FilterGroup
}
type FilterGroup struct {
gorm.Model
URLID uint `form:"group_url_id" yaml:"group_url_id" binding:"required"`
URL *URL
Name string `form:"group_name" yaml:"group_name" binding:"required" validate:"min=1"`
Type string `form:"group_type" yaml:"group_type" binding:"required" validate:"oneof=diff enum number bool"`
Filters []Filter
Filters []Filter
}
type Filter struct {
gorm.Model
FilterGroupID uint `form:"filter_group_id" yaml:"filter_group_id" binding:"required"`
FilterGroup *FilterGroup
Name string `form:"filter_name" yaml:"filter_name" binding:"required" validate:"min=1"`
Type string `form:"filter_type" yaml:"filter_type" binding:"required" validate:"oneof=xpath json css replace match substring"`
From string `form:"from" yaml:"from" binding:"required"`
To string `form:"to" yaml:"to" binding:"required"`
WatchID uint `form:"filter_watch_id" yaml:"filter_watch_id" binding:"required"`
Watch Watch
ParentID *uint `form:"parent_id" yaml:"parent_id"`
Parent *Filter `form:"parent_id" yaml:"parent_id"`
Name string `form:"filter_name" yaml:"filter_name" binding:"required" validate:"min=1"`
Type string `form:"filter_type" yaml:"filter_type" binding:"required" validate:"oneof=url xpath json css replace match substring"`
Var1 string `form:"var1" yaml:"var1" binding:"required"`
Var2 *string `form:"var2" yaml:"var2"`
Var3 *string `form:"var3" yaml:"var3"`
Filters []Filter `gorm:"-:all"`
results []string `gorm:"-:all"`
}

View file

@ -2,9 +2,7 @@ package main
import (
"bytes"
"io/ioutil"
"log"
"net/http"
"regexp"
"strconv"
"strings"
@ -15,170 +13,191 @@ import (
"golang.org/x/net/html"
)
func getGroupResult(group *FilterGroup) []string {
resp, err := http.Get(group.URL.URL)
if err != nil {
log.Print("Something went wrong loading", group.URL.URL)
return []string{}
func getFilterResults(filter *Filter) {
getFilterResult(filter)
for _, filter := range filter.Filters {
getFilterResults(&filter)
}
defer resp.Body.Close()
html, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Print("Something went wrong loading ", group.URL.URL)
return []string{}
}
resultStrings := []string{string(html)}
newStrings := []string{}
for _, filter := range group.Filters {
for _, resultString := range resultStrings {
getFilterResult(resultString, &filter, &newStrings)
}
resultStrings = newStrings
newStrings = nil
}
return resultStrings
}
func getFilterResult(s string, filter *Filter, newStrings *[]string) {
func getFilterResult(filter *Filter) {
switch {
case filter.Type == "xpath":
{
getFilterResultXPath(s, filter, newStrings)
getFilterResultXPath(filter)
}
case filter.Type == "json":
{
getFilterResultJSON(s, filter, newStrings)
getFilterResultJSON(filter)
}
case filter.Type == "css":
{
getFilterResultCSS(s, filter, newStrings)
getFilterResultCSS(filter)
}
case filter.Type == "replace":
{
getFilterResultReplace(s, filter, newStrings)
getFilterResultReplace(filter)
}
case filter.Type == "match":
{
getFilterResultMatch(s, filter, newStrings)
getFilterResultMatch(filter)
}
case filter.Type == "substring":
{
getFilterResultSubstring(s, filter, newStrings)
getFilterResultSubstring(filter)
}
default:
}
}
func getFilterResultXPath(s string, filter *Filter, newStrings *[]string) {
doc, err := htmlquery.Parse(strings.NewReader(s))
if err != nil {
log.Print(err)
func getFilterResultXPath(filter *Filter) {
if filter.Parent == nil {
log.Println("Filter", filter.Name, "called without parent for", filter.Type)
return
}
nodes, _ := htmlquery.QueryAll(doc, filter.From)
for _, node := range nodes {
var b bytes.Buffer
html.Render(&b, node)
*newStrings = append(*newStrings, html.UnescapeString(b.String()))
}
}
func getFilterResultJSON(s string, filter *Filter, newStrings *[]string) {
for _, result := range gjson.Get(s, filter.From).Array() {
*newStrings = append(*newStrings, result.String())
}
}
func getFilterResultCSS(s string, filter *Filter, newStrings *[]string) {
doc, err := html.Parse(strings.NewReader(s))
if err != nil {
log.Print(err)
return
}
sel, err := cascadia.Parse(filter.From)
if err != nil {
log.Print(err)
return
}
for _, node := range cascadia.QueryAll(doc, sel) {
var b bytes.Buffer
html.Render(&b, node)
*newStrings = append(*newStrings, html.UnescapeString(b.String()))
}
}
func getFilterResultReplace(s string, filter *Filter, newStrings *[]string) {
r, err := regexp.Compile(filter.From)
if err != nil {
log.Print(err)
return
}
*newStrings = append(*newStrings, r.ReplaceAllString(s, filter.To))
}
func getFilterResultMatch(s string, filter *Filter, newStrings *[]string) {
r, err := regexp.Compile(filter.From)
if err != nil {
log.Print(err)
return
}
for _, str := range r.FindAllString(s, -1) {
*newStrings = append(*newStrings, str)
}
}
func getFilterResultSubstring(s string, filter *Filter, newStrings *[]string) {
substrings := strings.Split(filter.From, ",")
var sb strings.Builder
asRunes := []rune(s)
for _, substring := range substrings {
if strings.Contains(substring, ":") {
from_to := strings.Split(substring, ":")
if len(from_to) != 2 {
return
}
fromStr := from_to[0]
var hasFrom bool = true
if fromStr == "" {
hasFrom = false
}
from64, err := strconv.ParseInt(fromStr, 10, 32)
var from = int(from64)
if hasFrom && err != nil {
return
} else if from < 0 {
from = len(asRunes) + from
}
toStr := from_to[1]
var hasTo bool = true
if toStr == "" {
hasTo = false
}
to64, err := strconv.ParseInt(toStr, 10, 32)
var to = int(to64)
if hasTo && err != nil {
return
} else if to < 0 {
to = len(asRunes) + to
}
if hasFrom && hasTo {
sb.WriteString(string(asRunes[from:to]))
} else if hasFrom {
sb.WriteString(string(asRunes[from:]))
} else if hasTo {
sb.WriteString(string(asRunes[:to]))
}
} else {
pos, err := strconv.ParseInt(substring, 10, 32)
if err != nil || pos < 0 {
return
}
sb.WriteRune(asRunes[pos])
for _, result := range filter.Parent.results {
doc, err := htmlquery.Parse(strings.NewReader(result))
if err != nil {
log.Print(err)
continue
}
nodes, _ := htmlquery.QueryAll(doc, filter.Var1)
for _, node := range nodes {
var b bytes.Buffer
html.Render(&b, node)
filter.results = append(filter.results, html.UnescapeString(b.String()))
}
}
*newStrings = append(*newStrings, sb.String())
}
func getFilterResultJSON(filter *Filter) {
if filter.Parent == nil {
log.Println("Filter", filter.Name, "called without parent for", filter.Type)
return
}
for _, result := range filter.Parent.results {
for _, match := range gjson.Get(result, filter.Var1).Array() {
filter.results = append(filter.results, match.String())
}
}
}
func getFilterResultCSS(filter *Filter) {
if filter.Parent == nil {
log.Println("Filter", filter.Name, "called without parent for", filter.Type)
return
}
for _, result := range filter.results {
doc, err := html.Parse(strings.NewReader(result))
if err != nil {
log.Print(err)
continue
}
sel, err := cascadia.Parse(filter.Var1)
if err != nil {
log.Print(err)
continue
}
for _, node := range cascadia.QueryAll(doc, sel) {
var b bytes.Buffer
html.Render(&b, node)
filter.results = append(filter.results, html.UnescapeString(b.String()))
}
}
}
func getFilterResultReplace(filter *Filter) {
if filter.Parent == nil {
log.Println("Filter", filter.Name, "called without parent for", filter.Type)
return
}
for _, result := range filter.results {
r, err := regexp.Compile(filter.Var1)
if err != nil {
log.Print(err)
continue
}
if filter.Var2 == nil {
filter.results = append(filter.results, r.ReplaceAllString(result, ""))
} else {
filter.results = append(filter.results, r.ReplaceAllString(result, *filter.Var2))
}
}
}
func getFilterResultMatch(filter *Filter) {
if filter.Parent == nil {
log.Println("Filter", filter.Name, "called without parent for", filter.Type)
return
}
for _, result := range filter.results {
r, err := regexp.Compile(filter.Var1)
if err != nil {
log.Print(err)
continue
}
for _, str := range r.FindAllString(result, -1) {
filter.results = append(filter.results, str)
}
}
}
func getFilterResultSubstring(filter *Filter) {
if filter.Parent == nil {
log.Println("Filter", filter.Name, "called without parent for", filter.Type)
return
}
for _, result := range filter.results {
substrings := strings.Split(filter.Var1, ",")
var sb strings.Builder
asRunes := []rune(result)
for _, substring := range substrings {
if strings.Contains(substring, ":") {
from_to := strings.Split(substring, ":")
if len(from_to) != 2 {
return
}
fromStr := from_to[0]
var hasFrom bool = true
if fromStr == "" {
hasFrom = false
}
from64, err := strconv.ParseInt(fromStr, 10, 32)
var from = int(from64)
if hasFrom && err != nil {
return
} else if from < 0 {
from = len(asRunes) + from
}
toStr := from_to[1]
var hasTo bool = true
if toStr == "" {
hasTo = false
}
to64, err := strconv.ParseInt(toStr, 10, 32)
var to = int(to64)
if hasTo && err != nil {
return
} else if to < 0 {
to = len(asRunes) + to
}
if hasFrom && hasTo {
sb.WriteString(string(asRunes[from:to]))
} else if hasFrom {
sb.WriteString(string(asRunes[from:]))
} else if hasTo {
sb.WriteString(string(asRunes[:to]))
}
} else {
pos, err := strconv.ParseInt(substring, 10, 32)
if err != nil || pos < 0 {
return
}
sb.WriteRune(asRunes[pos])
}
}
filter.results = append(filter.results, sb.String())
}
}

View file

@ -57,11 +57,9 @@ func TestFilterXPath(t *testing.T) {
t.Run(testname, func(t *testing.T) {
want := []string{}
getFilterResultXPath(
HTML_STRING,
&Filter{
From: test.Query,
Var1: test.Query,
},
&want,
)
if !reflect.DeepEqual(test.Want, want) {
t.Errorf("Got %s, want %s", want, test.Want)
@ -86,11 +84,9 @@ func TestFilterJSON(t *testing.T) {
t.Run(testname, func(t *testing.T) {
want := []string{}
getFilterResultJSON(
JSON_STRING,
&Filter{
From: test.Query,
Var1: test.Query,
},
&want,
)
if !reflect.DeepEqual(test.Want, want) {
t.Errorf("Got %s, want %s", want, test.Want)
@ -116,11 +112,9 @@ func TestFilterCSS(t *testing.T) {
t.Run(testname, func(t *testing.T) {
want := []string{}
getFilterResultCSS(
HTML_STRING,
&Filter{
From: test.Query,
Var1: test.Query,
},
&want,
)
if !reflect.DeepEqual(test.Want, want) {
t.Errorf("Got %s, want %s", want, test.Want)
@ -149,11 +143,9 @@ func TestFilterReplace(t *testing.T) {
t.Run(testname, func(t *testing.T) {
want := []string{test.Want}
getFilterResultReplace(
test.Input,
&Filter{
From: test.Query,
Var1: test.Query,
},
&want,
)
if want[0] != test.Want {
t.Errorf("Got %s, want %s", want[0], test.Want)
@ -180,11 +172,9 @@ func TestFilterMatch(t *testing.T) {
t.Run(testname, func(t *testing.T) {
want := []string{}
getFilterResultMatch(
test.Input,
&Filter{
From: test.Query,
Var1: test.Query,
},
&want,
)
if !reflect.DeepEqual(test.Want, want) {
t.Errorf("Got %s, want %s", want, test.Want)
@ -229,11 +219,9 @@ func TestFilterSubstring(t *testing.T) {
t.Run(testname, func(t *testing.T) {
want := []string{test.Want}
getFilterResultSubstring(
test.Input,
&Filter{
From: test.Query,
Var1: test.Query,
},
&want,
)
if want[0] != test.Want {
t.Errorf("Got %s, want %s", want[0], test.Want)

View file

@ -2,103 +2,23 @@
<div class="container">
<div class="row">
<div class="col h1">
{{ .Name }}
{{ .Watch.Name }}
</div>
<div class="col h1">
{{ $.Interval }}
{{ .Watch.Interval }}
</div>
</div>
</div>
{{ range .URLs }}
<div class="card mb-5">
<div class="card-header text-center text-white bg-dark ">
<div class="h3">{{ .Name }}</div>
</div>
<div class="card-header bg-secondary text-white text-center">
<div>{{ .URL }}</div>
</div>
<div class="card-body">
{{ range .GroupFilters }}
<div class="card mb-2">
<div class="card-header">
<div class="row">
<div class="col h4 text-start" >{{ .Name }}</div>
<div class="col h5 text-end"><a class="btn btn-success btn-sm" href="/group/edit/{{ .ID }}">Edit</a></div>
</div>
<div class="text-center text-muted">
{{ .Type }}
</div>
</div>
<div class="card-body">
<input type="hidden" name="group_id" value="{{ .ID }}" >
<input type="hidden" name="w_id" value="{{ $.ID }}" >
<table class="table table-hover caption-top">
<tbody>
{{ if .Filters }}
{{ range .Filters }}
<tr>
<td>{{ .Name }}</td>
<td>{{ .Type }}</td>
<td>{{ .From }}</td>
<td>{{ .To }}</td>
<td></td>
</tr>
{{ end }}
{{ else }}
<tr>
<td class="text-center h3">No filters yet, click "Edit" to add</td>
</tr>
{{ end }}
</tbody>
</table>
</div>
</div>
{{ end }}
<form action="/group/create" method="post">
<input type="hidden" name="group_url_id" value="{{ .ID }}" >
<input type="hidden" name="w_id" value="{{ $.ID }}" >
<table class="table table-hover caption-top">
<tbody>
<tr>
<td>
<input type="text" class="form-control" name="group_name" placeholder="Group Name">
</td>
<td>
<select class="form-control" id="group_type" name="group_type">
<option value="diff">Difference</option>
<option value="enum">Enum</option>
<option value="number">Number</option>
<option value="bool">Boolean</option>
<!-- additions/changes should also be added to GroupFilter.Type oneof validator -->
</select>
</td>
<td>
<button class="btn btn-primary">Add Group</button>
</td>
</tr>
</tbody>
</table>
</form>
</div>
</div>
{{ end }}
<div class="card mb-5">
<div class="card-header text-center bg-light ">
<div class="h5">New URL</div>
</div>
<div class="card-body">
<form action="/url/create" method="post">
<div class="form-group mb-2">
<input type="text" class="form-control" name="url_name" id="urlName" placeholder="URL Name">
</div>
<div class="form-group mb-2">
<input type="url" class="form-control" name="url" id="url" placeholder="URL">
</div>
<input type="hidden" name="url_watch_id" value="{{ .ID }}" >
<input class="btn btn-primary" type="submit" value="Create URL">
</form>
</div>
</div>
<table class="table-sm">
{{ range .Filters }}
<tr>
<td>{{ .Depth }}</td>
<td>{{ .Filter.ID }}</td>
<td>{{ .Filter.ParentID }}</td>
<td>{{ .Filter.Name }}</td>
</tr>
{{ end }}
</table>
{{ end }}

10
util.go
View file

@ -12,16 +12,6 @@ func bindAndValidateWatch(watch *Watch, c *gin.Context) (map[string]string, erro
return validate(err), err
}
func bindAndValidateURL(url *URL, c *gin.Context) (map[string]string, error) {
err := c.ShouldBind(url)
return validate(err), err
}
func bindAndValidateGroup(group *FilterGroup, c *gin.Context) (map[string]string, error) {
err := c.ShouldBind(group)
return validate(err), err
}
func bindAndValidateFilter(filter *Filter, c *gin.Context) (map[string]string, error) {
err := c.ShouldBind(filter)
return validate(err), err