| 
				
			 | 
			
			
				@@ -1,6 +1,6 @@ 
			 | 
		
	
		
			
			| 
				1
			 | 
			
				1
			 | 
			
			
				 from urllib import quote 
			 | 
		
	
		
			
			| 
				2
			 | 
			
				2
			 | 
			
			
				 from lxml import html 
			 | 
		
	
		
			
			| 
				3
			 | 
			
				
			 | 
			
			
				-from urlparse import urljoin 
			 | 
		
	
		
			
			| 
				
			 | 
			
				3
			 | 
			
			
				+from urlparse import urljoin, urlparse 
			 | 
		
	
		
			
			| 
				4
			 | 
			
				4
			 | 
			
			
				 from cgi import escape 
			 | 
		
	
		
			
			| 
				5
			 | 
			
				5
			 | 
			
			
				  
			 | 
		
	
		
			
			| 
				6
			 | 
			
				6
			 | 
			
			
				 base_url = 'https://startpage.com/' 
			 | 
		
	
	
		
			
			| 
				
			 | 
			
			
				@@ -22,6 +22,10 @@ def response(resp): 
			 | 
		
	
		
			
			| 
				22
			 | 
			
				22
			 | 
			
			
				     for result in dom.xpath('//div[@class="result"]'): 
			 | 
		
	
		
			
			| 
				23
			 | 
			
				23
			 | 
			
			
				         link = result.xpath('.//h3/a')[0] 
			 | 
		
	
		
			
			| 
				24
			 | 
			
				24
			 | 
			
			
				         url = urljoin(base_url, link.attrib.get('href')) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				25
			 | 
			
			
				+        parsed_url = urlparse(url) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				26
			 | 
			
			
				+        # TODO better google link detection 
			 | 
		
	
		
			
			| 
				
			 | 
			
				27
			 | 
			
			
				+        if parsed_url.netloc.find('google.com') >= 0: 
			 | 
		
	
		
			
			| 
				
			 | 
			
				28
			 | 
			
			
				+            continue 
			 | 
		
	
		
			
			| 
				25
			 | 
			
				29
			 | 
			
			
				         title = ' '.join(link.xpath('.//text()')) 
			 | 
		
	
		
			
			| 
				26
			 | 
			
				30
			 | 
			
			
				         content = escape(' '.join(result.xpath('.//p[@class="desc"]//text()'))) 
			 | 
		
	
		
			
			| 
				27
			 | 
			
				31
			 | 
			
			
				         results.append({'url': url, 'title': title, 'content': content}) 
			 |