added tests for node/inner/attr options of xpath/css filters
This commit is contained in:
parent
4cbe9ffbb0
commit
f27fd360b4
1 changed files with 143 additions and 3 deletions
146
scraping_test.go
146
scraping_test.go
|
@ -18,7 +18,7 @@ const HTML_STRING = `<html>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<table class="product-table" id="product-table">
|
<table class="product-table" id="product-table">
|
||||||
<caption>product-table-caption</caption>
|
<caption class="h3" id="table-caption" data="data">product-table-caption</caption>
|
||||||
<thead>
|
<thead>
|
||||||
<tr>
|
<tr>
|
||||||
<th>Name</th>
|
<th>Name</th>
|
||||||
|
@ -46,12 +46,14 @@ const JSON_STRING = `{
|
||||||
]
|
]
|
||||||
}`
|
}`
|
||||||
|
|
||||||
func TestFilterXPath(t *testing.T) {
|
func TestFilterXPathNode(t *testing.T) {
|
||||||
|
var2 := "node"
|
||||||
var tests = []struct {
|
var tests = []struct {
|
||||||
Query string
|
Query string
|
||||||
Want []string
|
Want []string
|
||||||
}{
|
}{
|
||||||
{"//title", []string{"<title>title</title>"}},
|
{"//title", []string{"<title>title</title>"}},
|
||||||
|
{"//table[@id='product-table']/caption", []string{`<caption class="h3" id="table-caption" data="data">product-table-caption</caption>`}},
|
||||||
{"//table[@id='product-table']//tr//td[last()]", []string{`<td class="price">100</td>`, `<td class="price">200</td>`, `<td class="price">300</td>`, `<td class="price">400</td>`}},
|
{"//table[@id='product-table']//tr//td[last()]", []string{`<td class="price">100</td>`, `<td class="price">200</td>`, `<td class="price">300</td>`, `<td class="price">400</td>`}},
|
||||||
{"//td[@class='price']", []string{`<td class="price">100</td>`, `<td class="price">200</td>`, `<td class="price">300</td>`, `<td class="price">400</td>`}},
|
{"//td[@class='price']", []string{`<td class="price">100</td>`, `<td class="price">200</td>`, `<td class="price">300</td>`, `<td class="price">400</td>`}},
|
||||||
{"//table[@id='product-table']//tr//td[2]", []string{`<td class="stock">10</td>`, `<td class="stock">20</td>`, `<td class="stock">30</td>`, `<td class="stock">40</td>`}},
|
{"//table[@id='product-table']//tr//td[2]", []string{`<td class="stock">10</td>`, `<td class="stock">20</td>`, `<td class="stock">30</td>`, `<td class="stock">40</td>`}},
|
||||||
|
@ -66,6 +68,7 @@ func TestFilterXPath(t *testing.T) {
|
||||||
{Results: []string{HTML_STRING}},
|
{Results: []string{HTML_STRING}},
|
||||||
},
|
},
|
||||||
Var1: test.Query,
|
Var1: test.Query,
|
||||||
|
Var2: &var2,
|
||||||
}
|
}
|
||||||
getFilterResultXPath(
|
getFilterResultXPath(
|
||||||
&filter,
|
&filter,
|
||||||
|
@ -77,6 +80,74 @@ func TestFilterXPath(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestFilterXPathInnerHTML(t *testing.T) {
|
||||||
|
var2 := "inner"
|
||||||
|
var tests = []struct {
|
||||||
|
Query string
|
||||||
|
Want []string
|
||||||
|
}{
|
||||||
|
{"//title", []string{"title"}},
|
||||||
|
{"//table[@id='product-table']/caption", []string{`product-table-caption`}},
|
||||||
|
{"//table[@id='product-table']//tr//td[last()]", []string{`100`, `200`, `300`, `400`}},
|
||||||
|
{"//td[@class='price']", []string{`100`, `200`, `300`, `400`}},
|
||||||
|
{"//table[@id='product-table']//tr//td[2]", []string{`10`, `20`, `30`, `40`}},
|
||||||
|
{"//td[@class='stock']", []string{`10`, `20`, `30`, `40`}},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
testname := test.Query
|
||||||
|
t.Run(testname, func(t *testing.T) {
|
||||||
|
filter := Filter{
|
||||||
|
Parents: []*Filter{
|
||||||
|
{Results: []string{HTML_STRING}},
|
||||||
|
},
|
||||||
|
Var1: test.Query,
|
||||||
|
Var2: &var2,
|
||||||
|
}
|
||||||
|
getFilterResultXPath(
|
||||||
|
&filter,
|
||||||
|
)
|
||||||
|
if !reflect.DeepEqual(test.Want, filter.Results) {
|
||||||
|
t.Errorf("Got %s, want %s", filter.Results, test.Want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFilterXPathAttributes(t *testing.T) {
|
||||||
|
var2 := "attr"
|
||||||
|
var tests = []struct {
|
||||||
|
Query string
|
||||||
|
Want []string
|
||||||
|
}{
|
||||||
|
{"//title", []string{}},
|
||||||
|
{"//table[@id='product-table']/caption", []string{`class="h3"`, `id="table-caption"`, `data="data"`}},
|
||||||
|
{"//table[@id='product-table']//tr//td[last()]", []string{`class="price"`, `class="price"`, `class="price"`, `class="price"`}},
|
||||||
|
{"//td[@class='price']", []string{`class="price"`, `class="price"`, `class="price"`, `class="price"`}},
|
||||||
|
{"//table[@id='product-table']//tr//td[2]", []string{`class="stock"`, `class="stock"`, `class="stock"`, `class="stock"`}},
|
||||||
|
{"//td[@class='stock']", []string{`class="stock"`, `class="stock"`, `class="stock"`, `class="stock"`}},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
testname := test.Query
|
||||||
|
t.Run(testname, func(t *testing.T) {
|
||||||
|
filter := Filter{
|
||||||
|
Parents: []*Filter{
|
||||||
|
{Results: []string{HTML_STRING}},
|
||||||
|
},
|
||||||
|
Var1: test.Query,
|
||||||
|
Var2: &var2,
|
||||||
|
}
|
||||||
|
getFilterResultXPath(
|
||||||
|
&filter,
|
||||||
|
)
|
||||||
|
if len(test.Want) != 0 && len(filter.Results) != 0 && !reflect.DeepEqual(test.Want, filter.Results) {
|
||||||
|
t.Errorf("Got %s, want %s", filter.Results, test.Want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestFilterJSON(t *testing.T) {
|
func TestFilterJSON(t *testing.T) {
|
||||||
var tests = []struct {
|
var tests = []struct {
|
||||||
Query string
|
Query string
|
||||||
|
@ -107,7 +178,8 @@ func TestFilterJSON(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestFilterCSS(t *testing.T) {
|
func TestFilterCSSNode(t *testing.T) {
|
||||||
|
var2 := "node"
|
||||||
var tests = []struct {
|
var tests = []struct {
|
||||||
Query string
|
Query string
|
||||||
Want []string
|
Want []string
|
||||||
|
@ -127,6 +199,7 @@ func TestFilterCSS(t *testing.T) {
|
||||||
{Results: []string{HTML_STRING}},
|
{Results: []string{HTML_STRING}},
|
||||||
},
|
},
|
||||||
Var1: test.Query,
|
Var1: test.Query,
|
||||||
|
Var2: &var2,
|
||||||
}
|
}
|
||||||
getFilterResultCSS(
|
getFilterResultCSS(
|
||||||
&filter,
|
&filter,
|
||||||
|
@ -138,6 +211,73 @@ func TestFilterCSS(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestFilterCSSInnerHTML(t *testing.T) {
|
||||||
|
var2 := "inner"
|
||||||
|
var tests = []struct {
|
||||||
|
Query string
|
||||||
|
Want []string
|
||||||
|
}{
|
||||||
|
{"title", []string{"title"}},
|
||||||
|
{".product-table tr td:last-child", []string{`100`, `200`, `300`, `400`}},
|
||||||
|
{".price", []string{`100`, `200`, `300`, `400`}},
|
||||||
|
{".product-table tr td:nth-child(2)", []string{`10`, `20`, `30`, `40`}},
|
||||||
|
{".stock", []string{`10`, `20`, `30`, `40`}},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
testname := test.Query
|
||||||
|
t.Run(testname, func(t *testing.T) {
|
||||||
|
filter := Filter{
|
||||||
|
Parents: []*Filter{
|
||||||
|
{Results: []string{HTML_STRING}},
|
||||||
|
},
|
||||||
|
Var1: test.Query,
|
||||||
|
Var2: &var2,
|
||||||
|
}
|
||||||
|
getFilterResultCSS(
|
||||||
|
&filter,
|
||||||
|
)
|
||||||
|
if !reflect.DeepEqual(test.Want, filter.Results) {
|
||||||
|
t.Errorf("Got %s, want %s", filter.Results, test.Want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFilterCSSAttributes(t *testing.T) {
|
||||||
|
var2 := "attr"
|
||||||
|
var tests = []struct {
|
||||||
|
Query string
|
||||||
|
Want []string
|
||||||
|
}{
|
||||||
|
{"title", []string{}},
|
||||||
|
{"#table-caption", []string{`class="h3"`, `id="table-caption"`, `data="data"`}},
|
||||||
|
{".product-table tr td:last-child", []string{`class="price"`, `class="price"`, `class="price"`, `class="price"`}},
|
||||||
|
{".price", []string{`class="price"`, `class="price"`, `class="price"`, `class="price"`}},
|
||||||
|
{".product-table tr td:nth-child(2)", []string{`class="stock"`, `class="stock"`, `class="stock"`, `class="stock"`}},
|
||||||
|
{".stock", []string{`class="stock"`, `class="stock"`, `class="stock"`, `class="stock"`}},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
testname := test.Query
|
||||||
|
t.Run(testname, func(t *testing.T) {
|
||||||
|
filter := Filter{
|
||||||
|
Parents: []*Filter{
|
||||||
|
{Results: []string{HTML_STRING}},
|
||||||
|
},
|
||||||
|
Var1: test.Query,
|
||||||
|
Var2: &var2,
|
||||||
|
}
|
||||||
|
getFilterResultCSS(
|
||||||
|
&filter,
|
||||||
|
)
|
||||||
|
if len(test.Want) != 0 && len(filter.Results) != 0 && !reflect.DeepEqual(test.Want, filter.Results) {
|
||||||
|
t.Errorf("Got %s, want %s", filter.Results, test.Want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestFilterReplace(t *testing.T) {
|
func TestFilterReplace(t *testing.T) {
|
||||||
var tests = []struct {
|
var tests = []struct {
|
||||||
Input string
|
Input string
|
||||||
|
|
Loading…
Add table
Reference in a new issue