added subset filter
This commit is contained in:
parent
5419959533
commit
00fcadf38d
5 changed files with 217 additions and 5 deletions
106
web/scraping.go
106
web/scraping.go
|
@ -193,6 +193,10 @@ func getFilterResult(filters []Filter, filter *Filter, watch *Watch, web *Web, d
|
|||
{
|
||||
getFilterResultSubstring(filter)
|
||||
}
|
||||
case filter.Type == "subset":
|
||||
{
|
||||
getFilterResultSubset(filter)
|
||||
}
|
||||
case filter.Type == "contains":
|
||||
{
|
||||
getFilterResultContains(filter)
|
||||
|
@ -694,10 +698,11 @@ func getFilterResultSubstring(filter *Filter) {
|
|||
if hasFrom && err != nil {
|
||||
filter.Log("Could not parse left side of: '", substring, "'")
|
||||
return
|
||||
} else if from < 0 {
|
||||
from = len(asRunes) + from
|
||||
}
|
||||
if from < 0 {
|
||||
from = len(asRunes) + from
|
||||
}
|
||||
if from < 0 || from > len(asRunes) {
|
||||
filter.Log("Out of bounds:", from_to)
|
||||
continue
|
||||
}
|
||||
|
@ -712,13 +717,15 @@ func getFilterResultSubstring(filter *Filter) {
|
|||
if hasTo && err != nil {
|
||||
filter.Log("Could not parse right side of: '", substring, "'")
|
||||
return
|
||||
} else if to < 0 {
|
||||
to = len(asRunes) + to
|
||||
}
|
||||
if to < 0 {
|
||||
to = len(asRunes) + to
|
||||
}
|
||||
if to < 0 || to > len(asRunes) {
|
||||
filter.Log("Out of bounds:", from_to)
|
||||
continue
|
||||
}
|
||||
|
||||
if hasFrom && hasTo {
|
||||
_, err := sb.WriteString(string(asRunes[from:to]))
|
||||
if err != nil {
|
||||
|
@ -735,6 +742,10 @@ func getFilterResultSubstring(filter *Filter) {
|
|||
filter.Log("Could not parse: '", substring, "'")
|
||||
return
|
||||
}
|
||||
if pos < 0 || pos >= int64(len(asRunes)) {
|
||||
filter.Log("Out of bounds:", pos)
|
||||
continue
|
||||
}
|
||||
sb.WriteRune(asRunes[pos])
|
||||
}
|
||||
}
|
||||
|
@ -743,6 +754,91 @@ func getFilterResultSubstring(filter *Filter) {
|
|||
}
|
||||
}
|
||||
|
||||
// getFilterResultSubset performs a subset selection on all the results of its parents
|
||||
func getFilterResultSubset(filter *Filter) {
|
||||
numResults := 0
|
||||
for _, parent := range filter.Parents {
|
||||
numResults += len(parent.Results)
|
||||
}
|
||||
|
||||
results := make([]string, 0, numResults)
|
||||
|
||||
for _, parent := range filter.Parents {
|
||||
for _, result := range parent.Results {
|
||||
results = append(results, result)
|
||||
}
|
||||
}
|
||||
|
||||
substrings := strings.Split(filter.Var1, ",")
|
||||
for _, substring := range substrings {
|
||||
if strings.Contains(substring, ":") {
|
||||
from_to := strings.Split(substring, ":")
|
||||
if len(from_to) != 2 {
|
||||
filter.Log("Missing value in range: '", substring, "'")
|
||||
return
|
||||
}
|
||||
fromStr := from_to[0]
|
||||
var hasFrom bool = true
|
||||
if fromStr == "" {
|
||||
hasFrom = false
|
||||
}
|
||||
from64, err := strconv.ParseInt(fromStr, 10, 32)
|
||||
var from = int(from64)
|
||||
if hasFrom && err != nil {
|
||||
filter.Log("Could not parse left side of: '", substring, "'")
|
||||
return
|
||||
}
|
||||
if from < 0 {
|
||||
from = len(results) + from
|
||||
}
|
||||
if from < 0 || from > len(results) {
|
||||
filter.Log("Out of bounds:", from_to)
|
||||
continue
|
||||
}
|
||||
|
||||
toStr := from_to[1]
|
||||
var hasTo bool = true
|
||||
if toStr == "" {
|
||||
hasTo = false
|
||||
}
|
||||
to64, err := strconv.ParseInt(toStr, 10, 32)
|
||||
var to = int(to64)
|
||||
if hasTo && err != nil {
|
||||
filter.Log("Could not parse right side of: '", substring, "'")
|
||||
return
|
||||
}
|
||||
if to < 0 {
|
||||
to = len(results) + to
|
||||
}
|
||||
if to < 0 || to > len(results) {
|
||||
filter.Log("Out of bounds:", from_to)
|
||||
continue
|
||||
}
|
||||
if hasFrom && hasTo {
|
||||
filter.Results = append(filter.Results, results[from:to]...)
|
||||
if err != nil {
|
||||
filter.Log("Could not substring: ", err)
|
||||
}
|
||||
} else if hasFrom {
|
||||
filter.Results = append(filter.Results, results[from:]...)
|
||||
} else if hasTo {
|
||||
filter.Results = append(filter.Results, results[:to]...)
|
||||
}
|
||||
} else {
|
||||
pos, err := strconv.ParseInt(substring, 10, 32)
|
||||
if err != nil || pos < 0 {
|
||||
filter.Log("Could not parse: '", substring, "'")
|
||||
return
|
||||
}
|
||||
if pos < 0 || pos >= int64(numResults) {
|
||||
filter.Log("Out of bounds:", pos)
|
||||
continue
|
||||
}
|
||||
filter.Results = append(filter.Results, results[pos])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// getFilterResultContains performs a regex contains on all the results of its parents
|
||||
func getFilterResultContains(filter *Filter) {
|
||||
r, err := regexp.Compile(filter.Var1)
|
||||
|
@ -1318,7 +1414,7 @@ func getFilterResultDisableSchedules(filter *Filter, web *Web, debug bool) {
|
|||
return
|
||||
}
|
||||
|
||||
web.db.Model(&Filter{}).Where("watch_id = ?", filter.WatchID).Update("Var2", "no")
|
||||
web.db.Model(&Filter{}).Where("watch_id = ? AND type = 'cron'", filter.WatchID).Update("Var2", "no")
|
||||
}
|
||||
|
||||
// getFilterResultEcho is a debug filter type, used to bootstrap some tests
|
||||
|
|
|
@ -436,8 +436,11 @@ func TestFilterSubstringOutOfBounds(t *testing.T) {
|
|||
Input string
|
||||
Query string
|
||||
}{
|
||||
{"01234", "0:8"},
|
||||
{"01234", ":-6"},
|
||||
{"01234", "-6:"},
|
||||
{"01234", "-1"},
|
||||
{"01234", "6"},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
|
@ -459,6 +462,79 @@ func TestFilterSubstringOutOfBounds(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestFilterSubset(t *testing.T) {
|
||||
var tests = []struct {
|
||||
Input []string
|
||||
Query string
|
||||
Want []string
|
||||
}{
|
||||
{[]string{"zero", "one", "two", "three", "four", "five", "six"}, "0", []string{"zero"}},
|
||||
{[]string{"zero", "one", "two", "three", "four", "five", "six"}, "6", []string{"six"}},
|
||||
{[]string{"zero", "one", "two", "three", "four", "five", "six"}, "-1", []string{}},
|
||||
{[]string{"zero", "one", "two", "three", "four", "five", "six"}, "7", []string{}},
|
||||
|
||||
{[]string{"zero", "one", "two", "three", "four", "five", "six"}, "0,3,6", []string{"zero", "three", "six"}},
|
||||
{[]string{"zero", "one", "two", "three", "four", "five", "six"}, "0,2:5,6", []string{"zero", "two", "three", "four", "six"}},
|
||||
|
||||
{[]string{"zero", "one", "two", "three", "four", "five", "six"}, "3:6", []string{"three", "four", "five"}},
|
||||
|
||||
{[]string{"zero", "one", "two", "three", "four", "five", "six"}, "0:7", []string{"zero", "one", "two", "three", "four", "five", "six"}},
|
||||
{[]string{"zero", "one", "two", "three", "four", "five", "six"}, ":7", []string{"zero", "one", "two", "three", "four", "five", "six"}},
|
||||
{[]string{"zero", "one", "two", "three", "four", "five", "six"}, "0:", []string{"zero", "one", "two", "three", "four", "five", "six"}},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
testname := fmt.Sprintf("%s %s", test.Input, test.Query)
|
||||
t.Run(testname, func(t *testing.T) {
|
||||
filter := Filter{
|
||||
Parents: []*Filter{
|
||||
{Results: test.Input},
|
||||
},
|
||||
Var1: test.Query,
|
||||
}
|
||||
getFilterResultSubset(
|
||||
&filter,
|
||||
)
|
||||
if !DeepEqualStringSlice(filter.Results, test.Want) {
|
||||
t.Errorf("Got %s, want %s", filter.Results, test.Want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFilterSubsetOutOfBounds(t *testing.T) {
|
||||
var tests = []struct {
|
||||
Input []string
|
||||
Query string
|
||||
}{
|
||||
{[]string{"zero", "one", "two", "three", "four", "five", "six"}, "9"},
|
||||
{[]string{"zero", "one", "two", "three", "four", "five", "six"}, "-10"},
|
||||
{[]string{"zero", "one", "two", "three", "four", "five", "six"}, "-10:9"},
|
||||
{[]string{"zero", "one", "two", "three", "four", "five", "six"}, "-10:"},
|
||||
{[]string{"zero", "one", "two", "three", "four", "five", "six"}, ":9"},
|
||||
|
||||
{[]string{"zero", "one", "two", "three", "four", "five", "six"}, "0,1,2,8,4"},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
testname := fmt.Sprintf("%s %s", test.Input, test.Query)
|
||||
t.Run(testname, func(t *testing.T) {
|
||||
filter := Filter{
|
||||
Parents: []*Filter{
|
||||
{Results: test.Input},
|
||||
},
|
||||
Var1: test.Query,
|
||||
}
|
||||
getFilterResultSubset(
|
||||
&filter,
|
||||
)
|
||||
if len(filter.Logs) == 0 {
|
||||
t.Errorf("No log message, expected one for OoB")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFilterContains(t *testing.T) {
|
||||
var tests = []struct {
|
||||
Input []string
|
||||
|
|
|
@ -245,6 +245,25 @@ function onTypeChange(node) {
|
|||
var2Div.appendChild(var2Input);
|
||||
break;
|
||||
}
|
||||
case "subset": {
|
||||
var var1Input = document.createElement("input");
|
||||
var1Input.name = "var1";
|
||||
var1Input.id = "var1Input";
|
||||
var1Input.value = var1Value;
|
||||
var1Input.classList.add("form-control");
|
||||
var1Label.innerHTML = "Subset";
|
||||
var1Input.placeholder = ":20,25-40,45,47,49,-20:";
|
||||
var1Div.appendChild(var1Input);
|
||||
var var2Input = document.createElement("input");
|
||||
var2Input.name = "var2";
|
||||
var2Input.id = "var2Input";
|
||||
var2Input.value = var2Value;
|
||||
var2Input.classList.add("form-control");
|
||||
var2Input.disabled = true;
|
||||
var2Label.innerHTML = "-";
|
||||
var2Div.appendChild(var2Input);
|
||||
break;
|
||||
}
|
||||
case "contains": {
|
||||
var var1Input = document.createElement("input");
|
||||
var1Input.name = "var1";
|
||||
|
|
|
@ -224,6 +224,26 @@ function onTypeChange(node: DiagramNode | null = null){
|
|||
var2Div.appendChild(var2Input);
|
||||
break;
|
||||
}
|
||||
case "subset": {
|
||||
let var1Input = document.createElement("input");
|
||||
var1Input.name = "var1";
|
||||
var1Input.id = "var1Input";
|
||||
var1Input.value = var1Value;
|
||||
var1Input.classList.add("form-control")
|
||||
var1Label.innerHTML = "Subset";
|
||||
var1Input.placeholder = ":20,25-40,45,47,49,-20:";
|
||||
var1Div.appendChild(var1Input);
|
||||
|
||||
let var2Input = document.createElement("input");
|
||||
var2Input.name = "var2";
|
||||
var2Input.id = "var2Input";
|
||||
var2Input.value = var2Value;
|
||||
var2Input.classList.add("form-control")
|
||||
var2Input.disabled = true;
|
||||
var2Label.innerHTML = "-";
|
||||
var2Div.appendChild(var2Input);
|
||||
break;
|
||||
}
|
||||
case "contains": {
|
||||
let var1Input = document.createElement("input");
|
||||
var1Input.name = "var1";
|
||||
|
|
|
@ -83,6 +83,7 @@ GoWatch Edit {{ .Watch.Name }}
|
|||
<option value="replace">Replace</option>
|
||||
<option value="match">Match</option>
|
||||
<option value="substring">Substring</option>
|
||||
<option value="subset">Subset</option>
|
||||
<option value="contains">Contains</option>
|
||||
<option value="unique">Unique</option>
|
||||
<option value="math">Math</option>
|
||||
|
|
Loading…
Add table
Reference in a new issue