split get url(s) into two functions one 'normal' and one browserless get

This commit is contained in:
BroodjeAap 2023-01-31 19:59:15 +00:00
parent a8ddee70fb
commit 6057506f7b
5 changed files with 225 additions and 48 deletions

View file

@ -3,6 +3,7 @@ package main
import ( import (
"bytes" "bytes"
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"html/template" "html/template"
"io/ioutil" "io/ioutil"
@ -126,6 +127,14 @@ func getFilterResult(filters []Filter, filter *Filter, watch *Watch, web *Web, d
{ {
getFilterResultURLs(filter, web.urlCache, debug) getFilterResultURLs(filter, web.urlCache, debug)
} }
case filter.Type == "bgurl":
{
getFilterResultBrowserlessURL(filter, web.urlCache, debug)
}
case filter.Type == "bgurls":
{
getFilterResultBrowserlessURLs(filter, web.urlCache, debug)
}
case filter.Type == "xpath": case filter.Type == "xpath":
{ {
getFilterResultXPath(filter) getFilterResultXPath(filter)
@ -250,6 +259,8 @@ func getFilterResultURL(filter *Filter, urlCache map[string]string, debug bool)
} }
str, err := getURLContent(filter, fetchURL) str, err := getURLContent(filter, fetchURL)
if err != nil { if err != nil {
log.Println("Could not fetch url: ", fetchURL, " - ", err)
filter.log("Could not fetch url: ", fetchURL, " - ", err)
return return
} }
filter.Results = append(filter.Results, str) filter.Results = append(filter.Results, str)
@ -270,6 +281,8 @@ func getFilterResultURLs(filter *Filter, urlCache map[string]string, debug bool)
str, err := getURLContent(filter, fetchURL) str, err := getURLContent(filter, fetchURL)
if err != nil { if err != nil {
log.Println("Could not fetch url: ", fetchURL, " - ", err)
filter.log("Could not fetch url: ", fetchURL, " - ", err)
continue continue
} }
filter.Results = append(filter.Results, str) filter.Results = append(filter.Results, str)
@ -281,62 +294,102 @@ func getFilterResultURLs(filter *Filter, urlCache map[string]string, debug bool)
} }
func getURLContent(filter *Filter, fetchURL string) (string, error) { func getURLContent(filter *Filter, fetchURL string) (string, error) {
var body []byte var httpClient *http.Client
if viper.IsSet("browserless.url") { if viper.IsSet("proxy.proxy_url") {
browserlessURL := viper.GetString("browserless.url") proxyUrl, err := url.Parse(viper.GetString("proxy.proxy_url"))
data := struct {
URL string `json:"url"`
}{
URL: fetchURL,
}
jsn, err := json.Marshal(data)
if err != nil { if err != nil {
log.Println("Could not marshal url:", err)
filter.log("Could not marshal url:", err)
return "", err
}
resp, err := http.Post(browserlessURL, "application/json", bytes.NewBuffer(jsn))
if err != nil {
log.Println("Could not get browserless response content:", err)
filter.log("Could not get browserless response content:", err)
return "", err
}
body, err = ioutil.ReadAll(resp.Body)
if err != nil {
log.Println("Could not fetch url through browserless: ", fetchURL, " - ", err)
filter.log("Could not fetch url through browserless: ", fetchURL, " - ", err)
return "", err return "", err
} }
httpClient = &http.Client{Transport: &http.Transport{Proxy: http.ProxyURL(proxyUrl)}}
} else { } else {
var httpClient *http.Client httpClient = &http.Client{}
if viper.IsSet("proxy.proxy_url") { }
proxyUrl, err := url.Parse(viper.GetString("proxy.proxy_url")) resp, err := httpClient.Get(fetchURL)
if err != nil { if err != nil {
log.Println("Could not parse proxy url, check config") return "", err
filter.log("Could not parse proxy url, check config") }
return "", err body, err := ioutil.ReadAll(resp.Body)
} if err != nil {
httpClient = &http.Client{Transport: &http.Transport{Proxy: http.ProxyURL(proxyUrl)}} return "", err
} else {
httpClient = &http.Client{}
}
resp, err := httpClient.Get(fetchURL)
if err != nil {
log.Println("Could not fetch url: ", fetchURL, " - ", err)
filter.log("Could not fetch url: ", fetchURL, " - ", err)
return "", err
}
body, err = ioutil.ReadAll(resp.Body)
if err != nil {
log.Println("Could not fetch url: ", fetchURL, " - ", err)
filter.log("Could not fetch url: ", fetchURL, " - ", err)
return "", err
}
} }
return string(body), nil return string(body), nil
} }
func getFilterResultBrowserlessURL(filter *Filter, urlCache map[string]string, debug bool) {
fetchURL := filter.Var1
val, exists := urlCache["b"+fetchURL]
if debug && exists {
filter.Results = append(filter.Results, val)
return
}
str, err := getBrowserlessURLContent(filter, fetchURL)
if err != nil {
log.Println("Could not fetch url: ", fetchURL, " - ", err)
filter.log("Could not fetch url: ", fetchURL, " - ", err)
return
}
filter.Results = append(filter.Results, str)
if debug {
urlCache["b"+fetchURL] = str
}
}
func getFilterResultBrowserlessURLs(filter *Filter, urlCache map[string]string, debug bool) {
for _, parent := range filter.Parents {
for _, result := range parent.Results {
fetchURL := result
val, exists := urlCache["b"+fetchURL]
if debug && exists {
filter.Results = append(filter.Results, val)
continue
}
str, err := getBrowserlessURLContent(filter, fetchURL)
if err != nil {
log.Println("Could not fetch url: ", fetchURL, " - ", err)
filter.log("Could not fetch url: ", fetchURL, " - ", err)
continue
}
filter.Results = append(filter.Results, str)
if debug {
urlCache["b"+fetchURL] = str
}
}
}
}
func getBrowserlessURLContent(filter *Filter, fetchURL string) (string, error) {
if !viper.IsSet("browserless.url") {
return "", errors.New("browserless.url not set")
}
browserlessURL := viper.GetString("browserless.url")
data := struct {
URL string `json:"url"`
}{
URL: fetchURL,
}
jsn, err := json.Marshal(data)
if err != nil {
log.Println("Could not marshal url:", err)
filter.log("Could not marshal url:", err)
return "", err
}
resp, err := http.Post(browserlessURL, "application/json", bytes.NewBuffer(jsn))
if err != nil {
log.Println("Could not get browserless response content:", err)
filter.log("Could not get browserless response content:", err)
return "", err
}
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Println("Could not fetch url through browserless: ", fetchURL, " - ", err)
filter.log("Could not fetch url through browserless: ", fetchURL, " - ", err)
return "", err
}
return string(body), nil
}
func getFilterResultXPath(filter *Filter) { func getFilterResultXPath(filter *Filter) {
selectType := "node" selectType := "node"
if filter.Var2 != nil { if filter.Var2 != nil {

View file

@ -114,6 +114,65 @@ function onTypeChange(node) {
var3Div.appendChild(var3Input); var3Div.appendChild(var3Input);
break; break;
} }
case "bgurl": {
var var1Input = document.createElement("input");
var1Input.name = "var1";
var1Input.id = "var1Input";
var1Input.value = var1Value;
var1Input.classList.add("form-control");
var1Label.innerHTML = "URL";
var1Input.placeholder = "https://shopping.website.com";
var1Div.appendChild(var1Input);
var var2Input = document.createElement("input");
var2Input.name = "var2";
var2Input.id = "var2Input";
var2Input.value = var2Value;
var2Input.classList.add("form-control");
var2Input.disabled = true;
var2Input.placeholder = "";
var2Label.innerHTML = "-";
var2Div.appendChild(var2Input);
var var3Input = document.createElement("input");
var3Input.name = "var3";
var3Input.id = "var3Input";
var3Input.value = var3Value;
var3Input.classList.add("form-control");
var3Input.disabled = true;
var3Input.placeholder = "";
var3Label.innerHTML = "-";
var3Div.appendChild(var3Input);
break;
}
case "bgurls": {
var var1Input = document.createElement("input");
var1Input.name = "var1";
var1Input.id = "var1Input";
var1Input.value = var1Value;
var1Input.classList.add("form-control");
var1Label.innerHTML = "-";
var1Input.placeholder = "From parents";
var1Input.disabled = true;
var1Div.appendChild(var1Input);
var var2Input = document.createElement("input");
var2Input.name = "var2";
var2Input.id = "var2Input";
var2Input.value = var2Value;
var2Input.classList.add("form-control");
var2Input.disabled = true;
var2Input.placeholder = "";
var2Label.innerHTML = "-";
var2Div.appendChild(var2Input);
var var3Input = document.createElement("input");
var3Input.name = "var3";
var3Input.id = "var3Input";
var3Input.value = var3Value;
var3Input.classList.add("form-control");
var3Input.disabled = true;
var3Input.placeholder = "";
var3Label.innerHTML = "-";
var3Div.appendChild(var3Input);
break;
}
case "xpath": { case "xpath": {
var var1Input = document.createElement("input"); var var1Input = document.createElement("input");
var1Input.name = "var1"; var1Input.name = "var1";

View file

@ -90,6 +90,69 @@ function onTypeChange(node: DiagramNode | null = null){
var3Div.appendChild(var3Input); var3Div.appendChild(var3Input);
break; break;
} }
case "bgurl": {
let var1Input = document.createElement("input");
var1Input.name = "var1";
var1Input.id = "var1Input";
var1Input.value = var1Value;
var1Input.classList.add("form-control");
var1Label.innerHTML = "URL";
var1Input.placeholder = "https://shopping.website.com";
var1Div.appendChild(var1Input);
let var2Input = document.createElement("input");
var2Input.name = "var2";
var2Input.id = "var2Input";
var2Input.value = var2Value;
var2Input.classList.add("form-control");
var2Input.disabled = true;
var2Input.placeholder = ""
var2Label.innerHTML = "-";
var2Div.appendChild(var2Input);
let var3Input = document.createElement("input");
var3Input.name = "var3";
var3Input.id = "var3Input";
var3Input.value = var3Value;
var3Input.classList.add("form-control");
var3Input.disabled = true;
var3Input.placeholder = ""
var3Label.innerHTML = "-";
var3Div.appendChild(var3Input);
break;
}
case "bgurls": {
let var1Input = document.createElement("input");
var1Input.name = "var1";
var1Input.id = "var1Input";
var1Input.value = var1Value;
var1Input.classList.add("form-control")
var1Label.innerHTML = "-";
var1Input.placeholder = "From parents";
var1Input.disabled = true;
var1Div.appendChild(var1Input);
let var2Input = document.createElement("input");
var2Input.name = "var2";
var2Input.id = "var2Input";
var2Input.value = var2Value;
var2Input.classList.add("form-control")
var2Input.disabled = true;
var2Input.placeholder = ""
var2Label.innerHTML = "-";
var2Div.appendChild(var2Input);
let var3Input = document.createElement("input");
var3Input.name = "var3";
var3Input.id = "var3Input";
var3Input.value = var3Value;
var3Input.classList.add("form-control");
var3Input.disabled = true;
var3Input.placeholder = ""
var3Label.innerHTML = "-";
var3Div.appendChild(var3Input);
break;
}
case "xpath": { case "xpath": {
let var1Input = document.createElement("input"); let var1Input = document.createElement("input");
var1Input.name = "var1"; var1Input.name = "var1";

View file

@ -77,6 +77,8 @@ GoWatch Edit {{ .Watch.Name }}
<select id="typeInput" class="form-control" name="type"> <select id="typeInput" class="form-control" name="type">
<option value="gurl">Get URL</option> <option value="gurl">Get URL</option>
<option value="gurls">Get URLs</option> <option value="gurls">Get URLs</option>
<option value="bgurl">Browserless Get URL</option>
<option value="bgurls">Browserless Get URLs</option>
<option value="xpath" selected="true">XPath</option> <option value="xpath" selected="true">XPath</option>
<option value="css">CSS</option> <option value="css">CSS</option>
<option value="json">JSON</option> <option value="json">JSON</option>

View file

@ -6,7 +6,7 @@
- edit.ts - edit.ts
- diagram.ts - diagram.ts
- browserless function filters - browserless function filters
- split get url http.get and browserless get into 2 filters - ~~split get url http.get and browserless get into 2 filters~~
- refactor amazon template - refactor amazon template
- url path support - url path support
- refactor project structure - refactor project structure