refactor to seperate url fetching into function
This commit is contained in:
parent
0c4431a73e
commit
de7bb7c57a
1 changed files with 28 additions and 41 deletions
69
scraping.go
69
scraping.go
|
@ -247,30 +247,10 @@ func getFilterResultURL(filter *Filter, urlCache map[string]string, debug bool)
|
||||||
filter.Results = append(filter.Results, val)
|
filter.Results = append(filter.Results, val)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
str, err := getURLContent(filter, fetchURL)
|
||||||
var httpClient *http.Client
|
|
||||||
if viper.IsSet("proxy.proxy_url") {
|
|
||||||
proxyUrl, err := url.Parse(viper.GetString("proxy.proxy_url"))
|
|
||||||
if err != nil {
|
|
||||||
log.Println("Could not parse proxy url, check config")
|
|
||||||
filter.log("Could not parse proxy url, check config")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
httpClient = &http.Client{Transport: &http.Transport{Proxy: http.ProxyURL(proxyUrl)}}
|
|
||||||
} else {
|
|
||||||
httpClient = &http.Client{}
|
|
||||||
}
|
|
||||||
resp, err := httpClient.Get(fetchURL)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
filter.log("Could not fetch url: ", fetchURL, " - ", err)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
body, err := ioutil.ReadAll(resp.Body)
|
|
||||||
if err != nil {
|
|
||||||
filter.log("Could not fetch url: ", fetchURL, " - ", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
str := string(body)
|
|
||||||
filter.Results = append(filter.Results, str)
|
filter.Results = append(filter.Results, str)
|
||||||
if debug {
|
if debug {
|
||||||
urlCache[fetchURL] = str
|
urlCache[fetchURL] = str
|
||||||
|
@ -287,29 +267,10 @@ func getFilterResultURLs(filter *Filter, urlCache map[string]string, debug bool)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
var httpClient *http.Client
|
str, err := getURLContent(filter, fetchURL)
|
||||||
if viper.IsSet("proxy.proxy_url") {
|
|
||||||
proxyUrl, err := url.Parse(viper.GetString("proxy.proxy_url"))
|
|
||||||
if err != nil {
|
|
||||||
log.Println("Could not parse proxy url, check config")
|
|
||||||
filter.log("Could not parse proxy url, check config")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
httpClient = &http.Client{Transport: &http.Transport{Proxy: http.ProxyURL(proxyUrl)}}
|
|
||||||
} else {
|
|
||||||
httpClient = &http.Client{}
|
|
||||||
}
|
|
||||||
resp, err := httpClient.Get(fetchURL)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
filter.log("Could not fetch url: ", fetchURL, " - ", err)
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
body, err := ioutil.ReadAll(resp.Body)
|
|
||||||
if err != nil {
|
|
||||||
filter.log("Could not fetch url: ", fetchURL, " - ", err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
str := string(body)
|
|
||||||
filter.Results = append(filter.Results, str)
|
filter.Results = append(filter.Results, str)
|
||||||
if debug {
|
if debug {
|
||||||
urlCache[fetchURL] = str
|
urlCache[fetchURL] = str
|
||||||
|
@ -318,6 +279,32 @@ func getFilterResultURLs(filter *Filter, urlCache map[string]string, debug bool)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getURLContent(filter *Filter, fetchURL string) (string, error) {
|
||||||
|
var httpClient *http.Client
|
||||||
|
if viper.IsSet("proxy.proxy_url") {
|
||||||
|
proxyUrl, err := url.Parse(viper.GetString("proxy.proxy_url"))
|
||||||
|
if err != nil {
|
||||||
|
log.Println("Could not parse proxy url, check config")
|
||||||
|
filter.log("Could not parse proxy url, check config")
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
httpClient = &http.Client{Transport: &http.Transport{Proxy: http.ProxyURL(proxyUrl)}}
|
||||||
|
} else {
|
||||||
|
httpClient = &http.Client{}
|
||||||
|
}
|
||||||
|
resp, err := httpClient.Get(fetchURL)
|
||||||
|
if err != nil {
|
||||||
|
filter.log("Could not fetch url: ", fetchURL, " - ", err)
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
body, err := ioutil.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
filter.log("Could not fetch url: ", fetchURL, " - ", err)
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return string(body), nil
|
||||||
|
}
|
||||||
|
|
||||||
func getFilterResultXPath(filter *Filter) {
|
func getFilterResultXPath(filter *Filter) {
|
||||||
for _, parent := range filter.Parents {
|
for _, parent := range filter.Parents {
|
||||||
for _, result := range parent.Results {
|
for _, result := range parent.Results {
|
||||||
|
|
Loading…
Add table
Reference in a new issue