|  | @@ -1,6 +1,6 @@
 | 
	
		
			
			| 1 | 1 |  from urllib import quote
 | 
	
		
			
			| 2 | 2 |  from lxml import html
 | 
	
		
			
			| 3 |  | -from urlparse import urljoin
 | 
	
		
			
			|  | 3 | +from urlparse import urljoin, urlparse
 | 
	
		
			
			| 4 | 4 |  from cgi import escape
 | 
	
		
			
			| 5 | 5 |  
 | 
	
		
			
			| 6 | 6 |  base_url = 'https://startpage.com/'
 | 
	
	
		
			
			|  | @@ -22,6 +22,10 @@ def response(resp):
 | 
	
		
			
			| 22 | 22 |      for result in dom.xpath('//div[@class="result"]'):
 | 
	
		
			
			| 23 | 23 |          link = result.xpath('.//h3/a')[0]
 | 
	
		
			
			| 24 | 24 |          url = urljoin(base_url, link.attrib.get('href'))
 | 
	
		
			
			|  | 25 | +        parsed_url = urlparse(url)
 | 
	
		
			
			|  | 26 | +        # TODO better google link detection
 | 
	
		
			
			|  | 27 | +        if parsed_url.netloc.find('google.com') >= 0:
 | 
	
		
			
			|  | 28 | +            continue
 | 
	
		
			
			| 25 | 29 |          title = ' '.join(link.xpath('.//text()'))
 | 
	
		
			
			| 26 | 30 |          content = escape(' '.join(result.xpath('.//p[@class="desc"]//text()')))
 | 
	
		
			
			| 27 | 31 |          results.append({'url': url, 'title': title, 'content': content})
 |