|
@@ -59,16 +59,14 @@ def response(resp):
|
59
|
59
|
url = link.attrib.get('href')
|
60
|
60
|
title = extract_text(link)
|
61
|
61
|
contentXPath = result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_snip"]')
|
62
|
|
- if contentXPath is not None:
|
63
|
|
- content = escape(extract_text(contentXPath))
|
|
62
|
+ content = escape(extract_text(contentXPath))
|
64
|
63
|
|
65
|
64
|
# parse publishedDate
|
66
|
65
|
publishedDateXPath = result.xpath('.//div[@class="sn_txt"]/div'
|
67
|
66
|
'//span[contains(@class,"sn_ST")]'
|
68
|
67
|
'//span[contains(@class,"sn_tm")]')
|
69
|
68
|
|
70
|
|
- if publishedDateXPath is not None:
|
71
|
|
- publishedDate = escape(extract_text(publishedDateXPath))
|
|
69
|
+ publishedDate = escape(extract_text(publishedDateXPath))
|
72
|
70
|
|
73
|
71
|
if re.match("^[0-9]+ minute(s|) ago$", publishedDate):
|
74
|
72
|
timeNumbers = re.findall(r'\d+', publishedDate)
|