| 
				
			 | 
			
			
				@@ -1,29 +1,61 @@ 
			 | 
		
	
		
			
			| 
				1
			 | 
			
				
			 | 
			
			
				-from json import loads 
			 | 
		
	
		
			
			| 
				2
			 | 
			
				1
			 | 
			
			
				 from urllib import urlencode 
			 | 
		
	
		
			
			| 
				
			 | 
			
				2
			 | 
			
			
				+from lxml.html import fromstring 
			 | 
		
	
		
			
			| 
				3
			 | 
			
				3
			 | 
			
			
				 from searx.utils import html_to_text 
			 | 
		
	
		
			
			| 
				4
			 | 
			
				4
			 | 
			
			
				  
			 | 
		
	
		
			
			| 
				5
			 | 
			
				
			 | 
			
			
				-url = 'https://duckduckgo.com/' 
			 | 
		
	
		
			
			| 
				6
			 | 
			
				
			 | 
			
			
				-search_url = url + 'd.js?{query}&p=1&s={offset}' 
			 | 
		
	
		
			
			| 
				
			 | 
			
				5
			 | 
			
			
				+url = 'https://duckduckgo.com/html?{query}&s={offset}' 
			 | 
		
	
		
			
			| 
				7
			 | 
			
				6
			 | 
			
			
				 locale = 'us-en' 
			 | 
		
	
		
			
			| 
				8
			 | 
			
				7
			 | 
			
			
				  
			 | 
		
	
		
			
			| 
				9
			 | 
			
				
			 | 
			
			
				-paging = True 
			 | 
		
	
		
			
			| 
				10
			 | 
			
				
			 | 
			
			
				- 
			 | 
		
	
		
			
			| 
				11
			 | 
			
				
			 | 
			
			
				- 
			 | 
		
	
		
			
			| 
				12
			 | 
			
				8
			 | 
			
			
				 def request(query, params): 
			 | 
		
	
		
			
			| 
				13
			 | 
			
				9
			 | 
			
			
				     offset = (params['pageno'] - 1) * 30 
			 | 
		
	
		
			
			| 
				14
			 | 
			
				10
			 | 
			
			
				     q = urlencode({'q': query, 
			 | 
		
	
		
			
			| 
				15
			 | 
			
				11
			 | 
			
			
				                    'l': locale}) 
			 | 
		
	
		
			
			| 
				16
			 | 
			
				
			 | 
			
			
				-    params['url'] = search_url.format(query=q, offset=offset) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				12
			 | 
			
			
				+    params['url'] = url.format(query=q, offset=offset) 
			 | 
		
	
		
			
			| 
				17
			 | 
			
				13
			 | 
			
			
				     return params 
			 | 
		
	
		
			
			| 
				18
			 | 
			
				14
			 | 
			
			
				  
			 | 
		
	
		
			
			| 
				19
			 | 
			
				15
			 | 
			
			
				  
			 | 
		
	
		
			
			| 
				20
			 | 
			
				16
			 | 
			
			
				 def response(resp): 
			 | 
		
	
		
			
			| 
				
			 | 
			
				17
			 | 
			
			
				+    result_xpath = '//div[@class="results_links results_links_deep web-result"]' 
			 | 
		
	
		
			
			| 
				
			 | 
			
				18
			 | 
			
			
				+    url_xpath = './/a[@class="large"]/@href' 
			 | 
		
	
		
			
			| 
				
			 | 
			
				19
			 | 
			
			
				+    title_xpath = './/a[@class="large"]//text()' 
			 | 
		
	
		
			
			| 
				
			 | 
			
				20
			 | 
			
			
				+    content_xpath = './/div[@class="snippet"]//text()' 
			 | 
		
	
		
			
			| 
				21
			 | 
			
				21
			 | 
			
			
				     results = [] 
			 | 
		
	
		
			
			| 
				22
			 | 
			
				
			 | 
			
			
				-    search_res = loads(resp.text[resp.text.find('[{'):-2])[:-1] 
			 | 
		
	
		
			
			| 
				23
			 | 
			
				
			 | 
			
			
				-    for r in search_res: 
			 | 
		
	
		
			
			| 
				24
			 | 
			
				
			 | 
			
			
				-        if not r.get('t'): 
			 | 
		
	
		
			
			| 
				
			 | 
			
				22
			 | 
			
			
				+ 
			 | 
		
	
		
			
			| 
				
			 | 
			
				23
			 | 
			
			
				+    doc = fromstring(resp.text) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				24
			 | 
			
			
				+ 
			 | 
		
	
		
			
			| 
				
			 | 
			
				25
			 | 
			
			
				+    for r in doc.xpath(result_xpath): 
			 | 
		
	
		
			
			| 
				
			 | 
			
				26
			 | 
			
			
				+        res_url = r.xpath(url_xpath)[-1] 
			 | 
		
	
		
			
			| 
				
			 | 
			
				27
			 | 
			
			
				+        if not res_url: 
			 | 
		
	
		
			
			| 
				25
			 | 
			
				28
			 | 
			
			
				             continue 
			 | 
		
	
		
			
			| 
				26
			 | 
			
				
			 | 
			
			
				-        results.append({'title': r['t'], 
			 | 
		
	
		
			
			| 
				27
			 | 
			
				
			 | 
			
			
				-                       'content': html_to_text(r['a']), 
			 | 
		
	
		
			
			| 
				28
			 | 
			
				
			 | 
			
			
				-                       'url': r['u']}) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				29
			 | 
			
			
				+        title = html_to_text(''.join(r.xpath(title_xpath))) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				30
			 | 
			
			
				+        content = html_to_text(''.join(r.xpath(content_xpath))) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				31
			 | 
			
			
				+        results.append({'title': title, 
			 | 
		
	
		
			
			| 
				
			 | 
			
				32
			 | 
			
			
				+                        'content': content, 
			 | 
		
	
		
			
			| 
				
			 | 
			
				33
			 | 
			
			
				+                        'url': res_url}) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				34
			 | 
			
			
				+ 
			 | 
		
	
		
			
			| 
				29
			 | 
			
				35
			 | 
			
			
				     return results 
			 | 
		
	
		
			
			| 
				
			 | 
			
				36
			 | 
			
			
				+ 
			 | 
		
	
		
			
			| 
				
			 | 
			
				37
			 | 
			
			
				+ 
			 | 
		
	
		
			
			| 
				
			 | 
			
				38
			 | 
			
			
				+#from json import loads 
			 | 
		
	
		
			
			| 
				
			 | 
			
				39
			 | 
			
			
				+#search_url = url + 'd.js?{query}&p=1&s={offset}' 
			 | 
		
	
		
			
			| 
				
			 | 
			
				40
			 | 
			
			
				+# 
			 | 
		
	
		
			
			| 
				
			 | 
			
				41
			 | 
			
			
				+#paging = True 
			 | 
		
	
		
			
			| 
				
			 | 
			
				42
			 | 
			
			
				+# 
			 | 
		
	
		
			
			| 
				
			 | 
			
				43
			 | 
			
			
				+# 
			 | 
		
	
		
			
			| 
				
			 | 
			
				44
			 | 
			
			
				+#def request(query, params): 
			 | 
		
	
		
			
			| 
				
			 | 
			
				45
			 | 
			
			
				+#    offset = (params['pageno'] - 1) * 30 
			 | 
		
	
		
			
			| 
				
			 | 
			
				46
			 | 
			
			
				+#    q = urlencode({'q': query, 
			 | 
		
	
		
			
			| 
				
			 | 
			
				47
			 | 
			
			
				+#                   'l': locale}) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				48
			 | 
			
			
				+#    params['url'] = search_url.format(query=q, offset=offset) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				49
			 | 
			
			
				+#    return params 
			 | 
		
	
		
			
			| 
				
			 | 
			
				50
			 | 
			
			
				+# 
			 | 
		
	
		
			
			| 
				
			 | 
			
				51
			 | 
			
			
				+# 
			 | 
		
	
		
			
			| 
				
			 | 
			
				52
			 | 
			
			
				+#def response(resp): 
			 | 
		
	
		
			
			| 
				
			 | 
			
				53
			 | 
			
			
				+#    results = [] 
			 | 
		
	
		
			
			| 
				
			 | 
			
				54
			 | 
			
			
				+#    search_res = loads(resp.text[resp.text.find('[{'):-2])[:-1] 
			 | 
		
	
		
			
			| 
				
			 | 
			
				55
			 | 
			
			
				+#    for r in search_res: 
			 | 
		
	
		
			
			| 
				
			 | 
			
				56
			 | 
			
			
				+#        if not r.get('t'): 
			 | 
		
	
		
			
			| 
				
			 | 
			
				57
			 | 
			
			
				+#            continue 
			 | 
		
	
		
			
			| 
				
			 | 
			
				58
			 | 
			
			
				+#        results.append({'title': r['t'], 
			 | 
		
	
		
			
			| 
				
			 | 
			
				59
			 | 
			
			
				+#                       'content': html_to_text(r['a']), 
			 | 
		
	
		
			
			| 
				
			 | 
			
				60
			 | 
			
			
				+#                       'url': r['u']}) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				61
			 | 
			
			
				+#    return results 
			 |