|
|
|
|
24
|
url = urljoin(base_url, link.attrib.get('href'))
|
24
|
url = urljoin(base_url, link.attrib.get('href'))
|
25
|
title_links = result.xpath('.//span[@class="details"]//a[contains(@class, "t")]')
|
25
|
title_links = result.xpath('.//span[@class="details"]//a[contains(@class, "t")]')
|
26
|
title = ''.join(title_links[0].xpath('.//text()'))
|
26
|
title = ''.join(title_links[0].xpath('.//text()'))
|
27
|
- content = html.tostring(link)+'<br />'+link.attrib.get('title')
|
|
|
|
|
27
|
+ content = html.tostring(link)+'<br />'+link.attrib.get('title', '')
|
28
|
results.append({'url': url, 'title': title, 'content': content})
|
28
|
results.append({'url': url, 'title': title, 'content': content})
|
29
|
return results
|
29
|
return results
|