|
@@ -29,9 +29,12 @@ def response(resp):
|
29
|
29
|
dom = html.fromstring(resp.content)
|
30
|
30
|
# ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
|
31
|
31
|
# not ads: div[@class="result"] are the direct childs of div[@id="results"]
|
32
|
|
- for result in dom.xpath('//div[@id="results"]/div[@class="result"]'):
|
|
32
|
+ for result in dom.xpath('//div[@class="result"]'):
|
33
|
33
|
link = result.xpath('.//h3/a')[0]
|
34
|
34
|
url = link.attrib.get('href')
|
|
35
|
+ if url.startswith('http://www.google.')\
|
|
36
|
+ or url.startswith('https://www.google.'):
|
|
37
|
+ continue
|
35
|
38
|
title = link.text_content()
|
36
|
39
|
|
37
|
40
|
content = ''
|