split get url(s) into two functions one 'normal' and one browserless get

This commit is contained in:
BroodjeAap 2023-01-31 19:59:15 +00:00
parent a8ddee70fb
commit 6057506f7b
5 changed files with 225 additions and 48 deletions

View file

@ -3,6 +3,7 @@ package main
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"html/template"
"io/ioutil"
@ -126,6 +127,14 @@ func getFilterResult(filters []Filter, filter *Filter, watch *Watch, web *Web, d
{
getFilterResultURLs(filter, web.urlCache, debug)
}
case filter.Type == "bgurl":
{
getFilterResultBrowserlessURL(filter, web.urlCache, debug)
}
case filter.Type == "bgurls":
{
getFilterResultBrowserlessURLs(filter, web.urlCache, debug)
}
case filter.Type == "xpath":
{
getFilterResultXPath(filter)
@ -250,6 +259,8 @@ func getFilterResultURL(filter *Filter, urlCache map[string]string, debug bool)
}
str, err := getURLContent(filter, fetchURL)
if err != nil {
log.Println("Could not fetch url: ", fetchURL, " - ", err)
filter.log("Could not fetch url: ", fetchURL, " - ", err)
return
}
filter.Results = append(filter.Results, str)
@ -270,6 +281,8 @@ func getFilterResultURLs(filter *Filter, urlCache map[string]string, debug bool)
str, err := getURLContent(filter, fetchURL)
if err != nil {
log.Println("Could not fetch url: ", fetchURL, " - ", err)
filter.log("Could not fetch url: ", fetchURL, " - ", err)
continue
}
filter.Results = append(filter.Results, str)
@ -281,62 +294,102 @@ func getFilterResultURLs(filter *Filter, urlCache map[string]string, debug bool)
}
func getURLContent(filter *Filter, fetchURL string) (string, error) {
var body []byte
if viper.IsSet("browserless.url") {
browserlessURL := viper.GetString("browserless.url")
data := struct {
URL string `json:"url"`
}{
URL: fetchURL,
}
jsn, err := json.Marshal(data)
var httpClient *http.Client
if viper.IsSet("proxy.proxy_url") {
proxyUrl, err := url.Parse(viper.GetString("proxy.proxy_url"))
if err != nil {
log.Println("Could not marshal url:", err)
filter.log("Could not marshal url:", err)
return "", err
}
resp, err := http.Post(browserlessURL, "application/json", bytes.NewBuffer(jsn))
if err != nil {
log.Println("Could not get browserless response content:", err)
filter.log("Could not get browserless response content:", err)
return "", err
}
body, err = ioutil.ReadAll(resp.Body)
if err != nil {
log.Println("Could not fetch url through browserless: ", fetchURL, " - ", err)
filter.log("Could not fetch url through browserless: ", fetchURL, " - ", err)
return "", err
}
httpClient = &http.Client{Transport: &http.Transport{Proxy: http.ProxyURL(proxyUrl)}}
} else {
var httpClient *http.Client
if viper.IsSet("proxy.proxy_url") {
proxyUrl, err := url.Parse(viper.GetString("proxy.proxy_url"))
if err != nil {
log.Println("Could not parse proxy url, check config")
filter.log("Could not parse proxy url, check config")
return "", err
}
httpClient = &http.Client{Transport: &http.Transport{Proxy: http.ProxyURL(proxyUrl)}}
} else {
httpClient = &http.Client{}
}
resp, err := httpClient.Get(fetchURL)
if err != nil {
log.Println("Could not fetch url: ", fetchURL, " - ", err)
filter.log("Could not fetch url: ", fetchURL, " - ", err)
return "", err
}
body, err = ioutil.ReadAll(resp.Body)
if err != nil {
log.Println("Could not fetch url: ", fetchURL, " - ", err)
filter.log("Could not fetch url: ", fetchURL, " - ", err)
return "", err
}
httpClient = &http.Client{}
}
resp, err := httpClient.Get(fetchURL)
if err != nil {
return "", err
}
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func getFilterResultBrowserlessURL(filter *Filter, urlCache map[string]string, debug bool) {
fetchURL := filter.Var1
val, exists := urlCache["b"+fetchURL]
if debug && exists {
filter.Results = append(filter.Results, val)
return
}
str, err := getBrowserlessURLContent(filter, fetchURL)
if err != nil {
log.Println("Could not fetch url: ", fetchURL, " - ", err)
filter.log("Could not fetch url: ", fetchURL, " - ", err)
return
}
filter.Results = append(filter.Results, str)
if debug {
urlCache["b"+fetchURL] = str
}
}
func getFilterResultBrowserlessURLs(filter *Filter, urlCache map[string]string, debug bool) {
for _, parent := range filter.Parents {
for _, result := range parent.Results {
fetchURL := result
val, exists := urlCache["b"+fetchURL]
if debug && exists {
filter.Results = append(filter.Results, val)
continue
}
str, err := getBrowserlessURLContent(filter, fetchURL)
if err != nil {
log.Println("Could not fetch url: ", fetchURL, " - ", err)
filter.log("Could not fetch url: ", fetchURL, " - ", err)
continue
}
filter.Results = append(filter.Results, str)
if debug {
urlCache["b"+fetchURL] = str
}
}
}
}
func getBrowserlessURLContent(filter *Filter, fetchURL string) (string, error) {
if !viper.IsSet("browserless.url") {
return "", errors.New("browserless.url not set")
}
browserlessURL := viper.GetString("browserless.url")
data := struct {
URL string `json:"url"`
}{
URL: fetchURL,
}
jsn, err := json.Marshal(data)
if err != nil {
log.Println("Could not marshal url:", err)
filter.log("Could not marshal url:", err)
return "", err
}
resp, err := http.Post(browserlessURL, "application/json", bytes.NewBuffer(jsn))
if err != nil {
log.Println("Could not get browserless response content:", err)
filter.log("Could not get browserless response content:", err)
return "", err
}
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Println("Could not fetch url through browserless: ", fetchURL, " - ", err)
filter.log("Could not fetch url through browserless: ", fetchURL, " - ", err)
return "", err
}
return string(body), nil
}
func getFilterResultXPath(filter *Filter) {
selectType := "node"
if filter.Var2 != nil {

View file

@ -114,6 +114,65 @@ function onTypeChange(node) {
var3Div.appendChild(var3Input);
break;
}
case "bgurl": {
var var1Input = document.createElement("input");
var1Input.name = "var1";
var1Input.id = "var1Input";
var1Input.value = var1Value;
var1Input.classList.add("form-control");
var1Label.innerHTML = "URL";
var1Input.placeholder = "https://shopping.website.com";
var1Div.appendChild(var1Input);
var var2Input = document.createElement("input");
var2Input.name = "var2";
var2Input.id = "var2Input";
var2Input.value = var2Value;
var2Input.classList.add("form-control");
var2Input.disabled = true;
var2Input.placeholder = "";
var2Label.innerHTML = "-";
var2Div.appendChild(var2Input);
var var3Input = document.createElement("input");
var3Input.name = "var3";
var3Input.id = "var3Input";
var3Input.value = var3Value;
var3Input.classList.add("form-control");
var3Input.disabled = true;
var3Input.placeholder = "";
var3Label.innerHTML = "-";
var3Div.appendChild(var3Input);
break;
}
case "bgurls": {
var var1Input = document.createElement("input");
var1Input.name = "var1";
var1Input.id = "var1Input";
var1Input.value = var1Value;
var1Input.classList.add("form-control");
var1Label.innerHTML = "-";
var1Input.placeholder = "From parents";
var1Input.disabled = true;
var1Div.appendChild(var1Input);
var var2Input = document.createElement("input");
var2Input.name = "var2";
var2Input.id = "var2Input";
var2Input.value = var2Value;
var2Input.classList.add("form-control");
var2Input.disabled = true;
var2Input.placeholder = "";
var2Label.innerHTML = "-";
var2Div.appendChild(var2Input);
var var3Input = document.createElement("input");
var3Input.name = "var3";
var3Input.id = "var3Input";
var3Input.value = var3Value;
var3Input.classList.add("form-control");
var3Input.disabled = true;
var3Input.placeholder = "";
var3Label.innerHTML = "-";
var3Div.appendChild(var3Input);
break;
}
case "xpath": {
var var1Input = document.createElement("input");
var1Input.name = "var1";

View file

@ -90,6 +90,69 @@ function onTypeChange(node: DiagramNode | null = null){
var3Div.appendChild(var3Input);
break;
}
case "bgurl": {
let var1Input = document.createElement("input");
var1Input.name = "var1";
var1Input.id = "var1Input";
var1Input.value = var1Value;
var1Input.classList.add("form-control");
var1Label.innerHTML = "URL";
var1Input.placeholder = "https://shopping.website.com";
var1Div.appendChild(var1Input);
let var2Input = document.createElement("input");
var2Input.name = "var2";
var2Input.id = "var2Input";
var2Input.value = var2Value;
var2Input.classList.add("form-control");
var2Input.disabled = true;
var2Input.placeholder = ""
var2Label.innerHTML = "-";
var2Div.appendChild(var2Input);
let var3Input = document.createElement("input");
var3Input.name = "var3";
var3Input.id = "var3Input";
var3Input.value = var3Value;
var3Input.classList.add("form-control");
var3Input.disabled = true;
var3Input.placeholder = ""
var3Label.innerHTML = "-";
var3Div.appendChild(var3Input);
break;
}
case "bgurls": {
let var1Input = document.createElement("input");
var1Input.name = "var1";
var1Input.id = "var1Input";
var1Input.value = var1Value;
var1Input.classList.add("form-control")
var1Label.innerHTML = "-";
var1Input.placeholder = "From parents";
var1Input.disabled = true;
var1Div.appendChild(var1Input);
let var2Input = document.createElement("input");
var2Input.name = "var2";
var2Input.id = "var2Input";
var2Input.value = var2Value;
var2Input.classList.add("form-control")
var2Input.disabled = true;
var2Input.placeholder = ""
var2Label.innerHTML = "-";
var2Div.appendChild(var2Input);
let var3Input = document.createElement("input");
var3Input.name = "var3";
var3Input.id = "var3Input";
var3Input.value = var3Value;
var3Input.classList.add("form-control");
var3Input.disabled = true;
var3Input.placeholder = ""
var3Label.innerHTML = "-";
var3Div.appendChild(var3Input);
break;
}
case "xpath": {
let var1Input = document.createElement("input");
var1Input.name = "var1";

View file

@ -77,6 +77,8 @@ GoWatch Edit {{ .Watch.Name }}
<select id="typeInput" class="form-control" name="type">
<option value="gurl">Get URL</option>
<option value="gurls">Get URLs</option>
<option value="bgurl">Browserless Get URL</option>
<option value="bgurls">Browserless Get URLs</option>
<option value="xpath" selected="true">XPath</option>
<option value="css">CSS</option>
<option value="json">JSON</option>

View file

@ -6,7 +6,7 @@
- edit.ts
- diagram.ts
- browserless function filters
- split get url http.get and browserless get into 2 filters
- ~~split get url http.get and browserless get into 2 filters~~
- refactor amazon template
- url path support
- refactor project structure