diff --git a/scraping_test.go b/scraping_test.go index 3dc566d..c232ce7 100644 --- a/scraping_test.go +++ b/scraping_test.go @@ -18,7 +18,7 @@ const HTML_STRING = `
Name | @@ -46,12 +46,14 @@ const JSON_STRING = `{ ] }` -func TestFilterXPath(t *testing.T) { +func TestFilterXPathNode(t *testing.T) { + var2 := "node" var tests = []struct { Query string Want []string }{ {"//title", []string{"100 | `, `200 | `, `300 | `, `400 | `}}, {"//td[@class='price']", []string{`100 | `, `200 | `, `300 | `, `400 | `}}, {"//table[@id='product-table']//tr//td[2]", []string{`10 | `, `20 | `, `30 | `, `40 | `}}, @@ -66,6 +68,7 @@ func TestFilterXPath(t *testing.T) { {Results: []string{HTML_STRING}}, }, Var1: test.Query, + Var2: &var2, } getFilterResultXPath( &filter, @@ -77,6 +80,74 @@ func TestFilterXPath(t *testing.T) { } } +func TestFilterXPathInnerHTML(t *testing.T) { + var2 := "inner" + var tests = []struct { + Query string + Want []string + }{ + {"//title", []string{"title"}}, + {"//table[@id='product-table']/caption", []string{`product-table-caption`}}, + {"//table[@id='product-table']//tr//td[last()]", []string{`100`, `200`, `300`, `400`}}, + {"//td[@class='price']", []string{`100`, `200`, `300`, `400`}}, + {"//table[@id='product-table']//tr//td[2]", []string{`10`, `20`, `30`, `40`}}, + {"//td[@class='stock']", []string{`10`, `20`, `30`, `40`}}, + } + + for _, test := range tests { + testname := test.Query + t.Run(testname, func(t *testing.T) { + filter := Filter{ + Parents: []*Filter{ + {Results: []string{HTML_STRING}}, + }, + Var1: test.Query, + Var2: &var2, + } + getFilterResultXPath( + &filter, + ) + if !reflect.DeepEqual(test.Want, filter.Results) { + t.Errorf("Got %s, want %s", filter.Results, test.Want) + } + }) + } +} + +func TestFilterXPathAttributes(t *testing.T) { + var2 := "attr" + var tests = []struct { + Query string + Want []string + }{ + {"//title", []string{}}, + {"//table[@id='product-table']/caption", []string{`class="h3"`, `id="table-caption"`, `data="data"`}}, + {"//table[@id='product-table']//tr//td[last()]", []string{`class="price"`, `class="price"`, `class="price"`, `class="price"`}}, + {"//td[@class='price']", []string{`class="price"`, `class="price"`, `class="price"`, `class="price"`}}, + {"//table[@id='product-table']//tr//td[2]", []string{`class="stock"`, `class="stock"`, `class="stock"`, `class="stock"`}}, + {"//td[@class='stock']", []string{`class="stock"`, `class="stock"`, `class="stock"`, `class="stock"`}}, + } + + for _, test := range tests { + testname := test.Query + t.Run(testname, func(t *testing.T) { + filter := Filter{ + Parents: []*Filter{ + {Results: []string{HTML_STRING}}, + }, + Var1: test.Query, + Var2: &var2, + } + getFilterResultXPath( + &filter, + ) + if len(test.Want) != 0 && len(filter.Results) != 0 && !reflect.DeepEqual(test.Want, filter.Results) { + t.Errorf("Got %s, want %s", filter.Results, test.Want) + } + }) + } +} + func TestFilterJSON(t *testing.T) { var tests = []struct { Query string @@ -107,7 +178,8 @@ func TestFilterJSON(t *testing.T) { } } -func TestFilterCSS(t *testing.T) { +func TestFilterCSSNode(t *testing.T) { + var2 := "node" var tests = []struct { Query string Want []string @@ -127,6 +199,7 @@ func TestFilterCSS(t *testing.T) { {Results: []string{HTML_STRING}}, }, Var1: test.Query, + Var2: &var2, } getFilterResultCSS( &filter, @@ -138,6 +211,73 @@ func TestFilterCSS(t *testing.T) { } } +func TestFilterCSSInnerHTML(t *testing.T) { + var2 := "inner" + var tests = []struct { + Query string + Want []string + }{ + {"title", []string{"title"}}, + {".product-table tr td:last-child", []string{`100`, `200`, `300`, `400`}}, + {".price", []string{`100`, `200`, `300`, `400`}}, + {".product-table tr td:nth-child(2)", []string{`10`, `20`, `30`, `40`}}, + {".stock", []string{`10`, `20`, `30`, `40`}}, + } + + for _, test := range tests { + testname := test.Query + t.Run(testname, func(t *testing.T) { + filter := Filter{ + Parents: []*Filter{ + {Results: []string{HTML_STRING}}, + }, + Var1: test.Query, + Var2: &var2, + } + getFilterResultCSS( + &filter, + ) + if !reflect.DeepEqual(test.Want, filter.Results) { + t.Errorf("Got %s, want %s", filter.Results, test.Want) + } + }) + } +} + +func TestFilterCSSAttributes(t *testing.T) { + var2 := "attr" + var tests = []struct { + Query string + Want []string + }{ + {"title", []string{}}, + {"#table-caption", []string{`class="h3"`, `id="table-caption"`, `data="data"`}}, + {".product-table tr td:last-child", []string{`class="price"`, `class="price"`, `class="price"`, `class="price"`}}, + {".price", []string{`class="price"`, `class="price"`, `class="price"`, `class="price"`}}, + {".product-table tr td:nth-child(2)", []string{`class="stock"`, `class="stock"`, `class="stock"`, `class="stock"`}}, + {".stock", []string{`class="stock"`, `class="stock"`, `class="stock"`, `class="stock"`}}, + } + + for _, test := range tests { + testname := test.Query + t.Run(testname, func(t *testing.T) { + filter := Filter{ + Parents: []*Filter{ + {Results: []string{HTML_STRING}}, + }, + Var1: test.Query, + Var2: &var2, + } + getFilterResultCSS( + &filter, + ) + if len(test.Want) != 0 && len(filter.Results) != 0 && !reflect.DeepEqual(test.Want, filter.Results) { + t.Errorf("Got %s, want %s", filter.Results, test.Want) + } + }) + } +} + func TestFilterReplace(t *testing.T) { var tests = []struct { Input string
---|