184 lines
4.1 KiB
Go
184 lines
4.1 KiB
Go
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"io/ioutil"
|
|
"log"
|
|
"net/http"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/andybalholm/cascadia"
|
|
"github.com/antchfx/htmlquery"
|
|
"github.com/tidwall/gjson"
|
|
"golang.org/x/net/html"
|
|
)
|
|
|
|
func getGroupResult(group *FilterGroup) []string {
|
|
resp, err := http.Get(group.URL.URL)
|
|
if err != nil {
|
|
log.Print("Something went wrong loading", group.URL.URL)
|
|
return []string{}
|
|
}
|
|
defer resp.Body.Close()
|
|
html, err := ioutil.ReadAll(resp.Body)
|
|
if err != nil {
|
|
log.Print("Something went wrong loading ", group.URL.URL)
|
|
return []string{}
|
|
}
|
|
resultStrings := []string{string(html)}
|
|
newStrings := []string{}
|
|
for _, filter := range group.Filters {
|
|
for _, resultString := range resultStrings {
|
|
getFilterResult(resultString, &filter, &newStrings)
|
|
}
|
|
resultStrings = newStrings
|
|
newStrings = nil
|
|
}
|
|
return resultStrings
|
|
}
|
|
|
|
func getFilterResult(s string, filter *Filter, newStrings *[]string) {
|
|
switch {
|
|
case filter.Type == "xpath":
|
|
{
|
|
getFilterResultXPath(s, filter, newStrings)
|
|
}
|
|
case filter.Type == "json":
|
|
{
|
|
getFilterResultJSON(s, filter, newStrings)
|
|
}
|
|
case filter.Type == "css":
|
|
{
|
|
getFilterResultCSS(s, filter, newStrings)
|
|
}
|
|
case filter.Type == "replace":
|
|
{
|
|
getFilterResultReplace(s, filter, newStrings)
|
|
}
|
|
case filter.Type == "match":
|
|
{
|
|
getFilterResultMatch(s, filter, newStrings)
|
|
}
|
|
case filter.Type == "substring":
|
|
{
|
|
getFilterResultSubstring(s, filter, newStrings)
|
|
}
|
|
default:
|
|
|
|
}
|
|
}
|
|
|
|
func getFilterResultXPath(s string, filter *Filter, newStrings *[]string) {
|
|
doc, err := htmlquery.Parse(strings.NewReader(s))
|
|
if err != nil {
|
|
log.Print(err)
|
|
return
|
|
}
|
|
nodes, _ := htmlquery.QueryAll(doc, filter.From)
|
|
for _, node := range nodes {
|
|
var b bytes.Buffer
|
|
html.Render(&b, node)
|
|
*newStrings = append(*newStrings, html.UnescapeString(b.String()))
|
|
}
|
|
}
|
|
|
|
func getFilterResultJSON(s string, filter *Filter, newStrings *[]string) {
|
|
|
|
for _, result := range gjson.Get(s, filter.From).Array() {
|
|
*newStrings = append(*newStrings, result.String())
|
|
}
|
|
}
|
|
|
|
func getFilterResultCSS(s string, filter *Filter, newStrings *[]string) {
|
|
doc, err := html.Parse(strings.NewReader(s))
|
|
if err != nil {
|
|
log.Print(err)
|
|
return
|
|
}
|
|
sel, err := cascadia.Parse(filter.From)
|
|
if err != nil {
|
|
log.Print(err)
|
|
return
|
|
}
|
|
for _, node := range cascadia.QueryAll(doc, sel) {
|
|
var b bytes.Buffer
|
|
html.Render(&b, node)
|
|
*newStrings = append(*newStrings, html.UnescapeString(b.String()))
|
|
}
|
|
}
|
|
|
|
func getFilterResultReplace(s string, filter *Filter, newStrings *[]string) {
|
|
r, err := regexp.Compile(filter.From)
|
|
if err != nil {
|
|
log.Print(err)
|
|
return
|
|
}
|
|
*newStrings = append(*newStrings, r.ReplaceAllString(s, filter.To))
|
|
}
|
|
|
|
func getFilterResultMatch(s string, filter *Filter, newStrings *[]string) {
|
|
r, err := regexp.Compile(filter.From)
|
|
if err != nil {
|
|
log.Print(err)
|
|
return
|
|
}
|
|
for _, str := range r.FindAllString(s, -1) {
|
|
|
|
*newStrings = append(*newStrings, str)
|
|
}
|
|
}
|
|
|
|
func getFilterResultSubstring(s string, filter *Filter, newStrings *[]string) {
|
|
substrings := strings.Split(filter.From, ",")
|
|
var sb strings.Builder
|
|
asRunes := []rune(s)
|
|
|
|
for _, substring := range substrings {
|
|
if strings.Contains(substring, ":") {
|
|
from_to := strings.Split(substring, ":")
|
|
if len(from_to) != 2 {
|
|
return
|
|
}
|
|
fromStr := from_to[0]
|
|
var hasFrom bool = true
|
|
if fromStr == "" {
|
|
hasFrom = false
|
|
}
|
|
from64, err := strconv.ParseInt(fromStr, 10, 32)
|
|
var from = int(from64)
|
|
if hasFrom && err != nil {
|
|
return
|
|
} else if from < 0 {
|
|
from = len(asRunes) + from
|
|
}
|
|
toStr := from_to[1]
|
|
var hasTo bool = true
|
|
if toStr == "" {
|
|
hasTo = false
|
|
}
|
|
to64, err := strconv.ParseInt(toStr, 10, 32)
|
|
var to = int(to64)
|
|
if hasTo && err != nil {
|
|
return
|
|
} else if to < 0 {
|
|
to = len(asRunes) + to
|
|
}
|
|
if hasFrom && hasTo {
|
|
sb.WriteString(string(asRunes[from:to]))
|
|
} else if hasFrom {
|
|
sb.WriteString(string(asRunes[from:]))
|
|
} else if hasTo {
|
|
sb.WriteString(string(asRunes[:to]))
|
|
}
|
|
} else {
|
|
pos, err := strconv.ParseInt(substring, 10, 32)
|
|
if err != nil || pos < 0 {
|
|
return
|
|
}
|
|
sb.WriteRune(asRunes[pos])
|
|
}
|
|
}
|
|
*newStrings = append(*newStrings, sb.String())
|
|
}
|