package main import ( "bytes" "io/ioutil" "log" "net/http" "regexp" "strconv" "strings" "github.com/antchfx/htmlquery" "golang.org/x/net/html" ) func getGroupResult(group *FilterGroup) []string { resp, err := http.Get(group.URL.URL) if err != nil { log.Print("Something went wrong loading", group.URL.URL) return []string{} } defer resp.Body.Close() html, err := ioutil.ReadAll(resp.Body) if err != nil { log.Print("Something went wrong loading ", group.URL.URL) return []string{} } resultStrings := []string{string(html)} newStrings := []string{} for _, filter := range group.Filters { for _, resultString := range resultStrings { getFilterResult(resultString, &filter, &newStrings) } resultStrings = newStrings log.Println(resultStrings) } return resultStrings } func getFilterResult(s string, filter *Filter, newStrings *[]string) { switch { case filter.Type == "css": { //getFilterResultReplace(s, filter, newStrings) } case filter.Type == "xpath": { getFilterResultXPath(s, filter, newStrings) } case filter.Type == "replace": { //getFilterResultReplace(s, filter, newStrings) } case filter.Type == "regex": { //getFilterResultRegex(s, filter, newStrings) } case filter.Type == "substring": { //getFilterResultSubstring(s, filter, newStrings) } default: } } func getFilterResultXPath(s string, filter *Filter, newStrings *[]string) { doc, err := htmlquery.Parse(strings.NewReader(s)) if err != nil { log.Print(err) return } nodes, _ := htmlquery.QueryAll(doc, filter.From) for _, node := range nodes { var b bytes.Buffer html.Render(&b, node) *newStrings = append(*newStrings, html.UnescapeString(b.String())) } } func getFilterResultReplace(s string, filter *Filter) string { return strings.ReplaceAll(s, filter.From, filter.To) } func getFilterResultRegex(s string, filter *Filter) string { regex, err := regexp.Compile(filter.From) if err != nil { return s } return regex.ReplaceAllString(s, filter.To) } func getFilterResultSubstring(s string, filter *Filter) string { substrings := strings.Split(filter.From, ",") var sb strings.Builder asRunes := []rune(s) for _, substring := range substrings { if strings.Contains(substring, ":") { from_to := strings.Split(substring, ":") if len(from_to) != 2 { return s } fromStr := from_to[0] var hasFrom bool = true if fromStr == "" { hasFrom = false } from64, err := strconv.ParseInt(fromStr, 10, 32) var from = int(from64) if hasFrom && err != nil { return s } else if from < 0 { from = len(asRunes) + from } toStr := from_to[1] var hasTo bool = true if toStr == "" { hasTo = false } to64, err := strconv.ParseInt(toStr, 10, 32) var to = int(to64) if hasTo && err != nil { return s } else if to < 0 { to = len(asRunes) + to } if hasFrom && hasTo { sb.WriteString(string(asRunes[from:to])) } else if hasFrom { sb.WriteString(string(asRunes[from:])) } else if hasTo { sb.WriteString(string(asRunes[:to])) } } else { pos, err := strconv.ParseInt(substring, 10, 32) if err != nil || pos < 0 { return s } sb.WriteRune(asRunes[pos]) } } return sb.String() }