|
@@ -31,8 +31,6 @@ if xpath_results is a string element, then it's already done
|
31
|
31
|
def extract_text(xpath_results):
|
32
|
32
|
if type(xpath_results) == list:
|
33
|
33
|
# it's list of result : concat everything using recursive call
|
34
|
|
- if not xpath_results:
|
35
|
|
- raise Exception('Empty url resultset')
|
36
|
34
|
result = ''
|
37
|
35
|
for e in xpath_results:
|
38
|
36
|
result = result + extract_text(e)
|
|
@@ -48,6 +46,8 @@ def extract_text(xpath_results):
|
48
|
46
|
|
49
|
47
|
|
50
|
48
|
def extract_url(xpath_results, search_url):
|
|
49
|
+ if xpath_results == []:
|
|
50
|
+ raise Exception('Empty url resultset')
|
51
|
51
|
url = extract_text(xpath_results)
|
52
|
52
|
|
53
|
53
|
if url.startswith('//'):
|
|
@@ -103,8 +103,8 @@ def response(resp):
|
103
|
103
|
if results_xpath:
|
104
|
104
|
for result in dom.xpath(results_xpath):
|
105
|
105
|
url = extract_url(result.xpath(url_xpath), search_url)
|
106
|
|
- title = extract_text(result.xpath(title_xpath)[0])
|
107
|
|
- content = extract_text(result.xpath(content_xpath)[0])
|
|
106
|
+ title = extract_text(result.xpath(title_xpath))
|
|
107
|
+ content = extract_text(result.xpath(content_xpath))
|
108
|
108
|
results.append({'url': url, 'title': title, 'content': content})
|
109
|
109
|
else:
|
110
|
110
|
for url, title, content in zip(
|