added browserless support
This commit is contained in:
parent
c0af47bef7
commit
a74d6a0e4b
2 changed files with 51 additions and 18 deletions
66
scraping.go
66
scraping.go
|
@ -2,6 +2,7 @@ package main
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"html/template"
|
||||
"io/ioutil"
|
||||
|
@ -280,28 +281,59 @@ func getFilterResultURLs(filter *Filter, urlCache map[string]string, debug bool)
|
|||
}
|
||||
|
||||
func getURLContent(filter *Filter, fetchURL string) (string, error) {
|
||||
var httpClient *http.Client
|
||||
if viper.IsSet("proxy.proxy_url") {
|
||||
proxyUrl, err := url.Parse(viper.GetString("proxy.proxy_url"))
|
||||
var body []byte
|
||||
if viper.IsSet("browserless.url") {
|
||||
browserlessURL := viper.GetString("browserless.url")
|
||||
data := struct {
|
||||
URL string `json:"url"`
|
||||
}{
|
||||
URL: fetchURL,
|
||||
}
|
||||
jsn, err := json.Marshal(data)
|
||||
if err != nil {
|
||||
log.Println("Could not parse proxy url, check config")
|
||||
filter.log("Could not parse proxy url, check config")
|
||||
log.Println("Could not marshal url:", err)
|
||||
filter.log("Could not marshal url:", err)
|
||||
return "", err
|
||||
}
|
||||
resp, err := http.Post(browserlessURL, "application/json", bytes.NewBuffer(jsn))
|
||||
if err != nil {
|
||||
log.Println("Could not get browserless response content:", err)
|
||||
filter.log("Could not get browserless response content:", err)
|
||||
return "", err
|
||||
}
|
||||
body, err = ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
log.Println("Could not fetch url through browserless: ", fetchURL, " - ", err)
|
||||
filter.log("Could not fetch url through browserless: ", fetchURL, " - ", err)
|
||||
return "", err
|
||||
}
|
||||
httpClient = &http.Client{Transport: &http.Transport{Proxy: http.ProxyURL(proxyUrl)}}
|
||||
} else {
|
||||
httpClient = &http.Client{}
|
||||
}
|
||||
resp, err := httpClient.Get(fetchURL)
|
||||
if err != nil {
|
||||
filter.log("Could not fetch url: ", fetchURL, " - ", err)
|
||||
return "", err
|
||||
}
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
filter.log("Could not fetch url: ", fetchURL, " - ", err)
|
||||
return "", err
|
||||
var httpClient *http.Client
|
||||
if viper.IsSet("proxy.proxy_url") {
|
||||
proxyUrl, err := url.Parse(viper.GetString("proxy.proxy_url"))
|
||||
if err != nil {
|
||||
log.Println("Could not parse proxy url, check config")
|
||||
filter.log("Could not parse proxy url, check config")
|
||||
return "", err
|
||||
}
|
||||
httpClient = &http.Client{Transport: &http.Transport{Proxy: http.ProxyURL(proxyUrl)}}
|
||||
} else {
|
||||
httpClient = &http.Client{}
|
||||
}
|
||||
resp, err := httpClient.Get(fetchURL)
|
||||
if err != nil {
|
||||
log.Println("Could not fetch url: ", fetchURL, " - ", err)
|
||||
filter.log("Could not fetch url: ", fetchURL, " - ", err)
|
||||
return "", err
|
||||
}
|
||||
body, err = ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
log.Println("Could not fetch url: ", fetchURL, " - ", err)
|
||||
filter.log("Could not fetch url: ", fetchURL, " - ", err)
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
return string(body), nil
|
||||
}
|
||||
|
||||
|
|
3
todo.md
3
todo.md
|
@ -1,7 +1,8 @@
|
|||
# Todo
|
||||
- comments
|
||||
- run/fix staticcheck
|
||||
- add browserless support ?
|
||||
- ~~add browserless support~~
|
||||
- readme
|
||||
- show startup warnings on page?
|
||||
- add compose templates for:
|
||||
- sqlite
|
||||
|
|
Loading…
Add table
Reference in a new issue