added browserless support
This commit is contained in:
parent
c0af47bef7
commit
a74d6a0e4b
2 changed files with 51 additions and 18 deletions
66
scraping.go
66
scraping.go
|
@ -2,6 +2,7 @@ package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"html/template"
|
"html/template"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
|
@ -280,28 +281,59 @@ func getFilterResultURLs(filter *Filter, urlCache map[string]string, debug bool)
|
||||||
}
|
}
|
||||||
|
|
||||||
func getURLContent(filter *Filter, fetchURL string) (string, error) {
|
func getURLContent(filter *Filter, fetchURL string) (string, error) {
|
||||||
var httpClient *http.Client
|
var body []byte
|
||||||
if viper.IsSet("proxy.proxy_url") {
|
if viper.IsSet("browserless.url") {
|
||||||
proxyUrl, err := url.Parse(viper.GetString("proxy.proxy_url"))
|
browserlessURL := viper.GetString("browserless.url")
|
||||||
|
data := struct {
|
||||||
|
URL string `json:"url"`
|
||||||
|
}{
|
||||||
|
URL: fetchURL,
|
||||||
|
}
|
||||||
|
jsn, err := json.Marshal(data)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Println("Could not parse proxy url, check config")
|
log.Println("Could not marshal url:", err)
|
||||||
filter.log("Could not parse proxy url, check config")
|
filter.log("Could not marshal url:", err)
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
resp, err := http.Post(browserlessURL, "application/json", bytes.NewBuffer(jsn))
|
||||||
|
if err != nil {
|
||||||
|
log.Println("Could not get browserless response content:", err)
|
||||||
|
filter.log("Could not get browserless response content:", err)
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
body, err = ioutil.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
log.Println("Could not fetch url through browserless: ", fetchURL, " - ", err)
|
||||||
|
filter.log("Could not fetch url through browserless: ", fetchURL, " - ", err)
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
httpClient = &http.Client{Transport: &http.Transport{Proxy: http.ProxyURL(proxyUrl)}}
|
|
||||||
} else {
|
} else {
|
||||||
httpClient = &http.Client{}
|
var httpClient *http.Client
|
||||||
}
|
if viper.IsSet("proxy.proxy_url") {
|
||||||
resp, err := httpClient.Get(fetchURL)
|
proxyUrl, err := url.Parse(viper.GetString("proxy.proxy_url"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
filter.log("Could not fetch url: ", fetchURL, " - ", err)
|
log.Println("Could not parse proxy url, check config")
|
||||||
return "", err
|
filter.log("Could not parse proxy url, check config")
|
||||||
}
|
return "", err
|
||||||
body, err := ioutil.ReadAll(resp.Body)
|
}
|
||||||
if err != nil {
|
httpClient = &http.Client{Transport: &http.Transport{Proxy: http.ProxyURL(proxyUrl)}}
|
||||||
filter.log("Could not fetch url: ", fetchURL, " - ", err)
|
} else {
|
||||||
return "", err
|
httpClient = &http.Client{}
|
||||||
|
}
|
||||||
|
resp, err := httpClient.Get(fetchURL)
|
||||||
|
if err != nil {
|
||||||
|
log.Println("Could not fetch url: ", fetchURL, " - ", err)
|
||||||
|
filter.log("Could not fetch url: ", fetchURL, " - ", err)
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
body, err = ioutil.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
log.Println("Could not fetch url: ", fetchURL, " - ", err)
|
||||||
|
filter.log("Could not fetch url: ", fetchURL, " - ", err)
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return string(body), nil
|
return string(body), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
3
todo.md
3
todo.md
|
@ -1,7 +1,8 @@
|
||||||
# Todo
|
# Todo
|
||||||
- comments
|
- comments
|
||||||
- run/fix staticcheck
|
- run/fix staticcheck
|
||||||
- add browserless support ?
|
- ~~add browserless support~~
|
||||||
|
- readme
|
||||||
- show startup warnings on page?
|
- show startup warnings on page?
|
||||||
- add compose templates for:
|
- add compose templates for:
|
||||||
- sqlite
|
- sqlite
|
||||||
|
|
Loading…
Add table
Reference in a new issue