|  | @@ -1,24 +1,48 @@
 | 
	
		
			
			|  | 1 | +## DuckDuckGo (Web)
 | 
	
		
			
			|  | 2 | +# 
 | 
	
		
			
			|  | 3 | +# @website     https://duckduckgo.com/
 | 
	
		
			
			|  | 4 | +# @provide-api yes (https://duckduckgo.com/api), but not all results from search-site
 | 
	
		
			
			|  | 5 | +# 
 | 
	
		
			
			|  | 6 | +# @using-api   no
 | 
	
		
			
			|  | 7 | +# @results     HTML (using search portal)
 | 
	
		
			
			|  | 8 | +# @stable      no (HTML can change)
 | 
	
		
			
			|  | 9 | +# @parse       url, title, content
 | 
	
		
			
			|  | 10 | +#
 | 
	
		
			
			|  | 11 | +# @todo        rewrite to api
 | 
	
		
			
			|  | 12 | +# @todo        language support
 | 
	
		
			
			|  | 13 | +
 | 
	
		
			
			| 1 | 14 |  from urllib import urlencode
 | 
	
		
			
			| 2 | 15 |  from lxml.html import fromstring
 | 
	
		
			
			| 3 | 16 |  from searx.utils import html_to_text
 | 
	
		
			
			| 4 | 17 |  
 | 
	
		
			
			| 5 |  | -url = 'https://duckduckgo.com/html?{query}&s={offset}'
 | 
	
		
			
			|  | 18 | +# engine dependent config
 | 
	
		
			
			|  | 19 | +categories = ['general']
 | 
	
		
			
			|  | 20 | +paging = True
 | 
	
		
			
			| 6 | 21 |  locale = 'us-en'
 | 
	
		
			
			| 7 | 22 |  
 | 
	
		
			
			|  | 23 | +# search-url
 | 
	
		
			
			|  | 24 | +url = 'https://duckduckgo.com/html?{query}&s={offset}'
 | 
	
		
			
			|  | 25 | +
 | 
	
		
			
			|  | 26 | +# specific xpath variables
 | 
	
		
			
			|  | 27 | +result_xpath = '//div[@class="results_links results_links_deep web-result"]'  # noqa
 | 
	
		
			
			|  | 28 | +url_xpath = './/a[@class="large"]/@href'
 | 
	
		
			
			|  | 29 | +title_xpath = './/a[@class="large"]//text()'
 | 
	
		
			
			|  | 30 | +content_xpath = './/div[@class="snippet"]//text()'
 | 
	
		
			
			| 8 | 31 |  
 | 
	
		
			
			|  | 32 | +
 | 
	
		
			
			|  | 33 | +# do search-request
 | 
	
		
			
			| 9 | 34 |  def request(query, params):
 | 
	
		
			
			| 10 | 35 |      offset = (params['pageno'] - 1) * 30
 | 
	
		
			
			| 11 |  | -    q = urlencode({'q': query,
 | 
	
		
			
			| 12 |  | -                   'l': locale})
 | 
	
		
			
			| 13 |  | -    params['url'] = url.format(query=q, offset=offset)
 | 
	
		
			
			|  | 36 | +
 | 
	
		
			
			|  | 37 | +    params['url'] = url.format(
 | 
	
		
			
			|  | 38 | +        query=urlencode({'q': query, 'l': locale}),
 | 
	
		
			
			|  | 39 | +        offset=offset)
 | 
	
		
			
			|  | 40 | +
 | 
	
		
			
			| 14 | 41 |      return params
 | 
	
		
			
			| 15 | 42 |  
 | 
	
		
			
			| 16 | 43 |  
 | 
	
		
			
			|  | 44 | +# get response from search-request
 | 
	
		
			
			| 17 | 45 |  def response(resp):
 | 
	
		
			
			| 18 |  | -    result_xpath = '//div[@class="results_links results_links_deep web-result"]'  # noqa
 | 
	
		
			
			| 19 |  | -    url_xpath = './/a[@class="large"]/@href'
 | 
	
		
			
			| 20 |  | -    title_xpath = './/a[@class="large"]//text()'
 | 
	
		
			
			| 21 |  | -    content_xpath = './/div[@class="snippet"]//text()'
 | 
	
		
			
			| 22 | 46 |      results = []
 | 
	
		
			
			| 23 | 47 |  
 | 
	
		
			
			| 24 | 48 |      doc = fromstring(resp.text)
 | 
	
	
		
			
			|  | @@ -28,38 +52,17 @@ def response(resp):
 | 
	
		
			
			| 28 | 52 |              res_url = r.xpath(url_xpath)[-1]
 | 
	
		
			
			| 29 | 53 |          except:
 | 
	
		
			
			| 30 | 54 |              continue
 | 
	
		
			
			|  | 55 | +
 | 
	
		
			
			| 31 | 56 |          if not res_url:
 | 
	
		
			
			| 32 | 57 |              continue
 | 
	
		
			
			|  | 58 | +
 | 
	
		
			
			| 33 | 59 |          title = html_to_text(''.join(r.xpath(title_xpath)))
 | 
	
		
			
			| 34 | 60 |          content = html_to_text(''.join(r.xpath(content_xpath)))
 | 
	
		
			
			|  | 61 | +
 | 
	
		
			
			|  | 62 | +        # append result
 | 
	
		
			
			| 35 | 63 |          results.append({'title': title,
 | 
	
		
			
			| 36 | 64 |                          'content': content,
 | 
	
		
			
			| 37 | 65 |                          'url': res_url})
 | 
	
		
			
			| 38 | 66 |  
 | 
	
		
			
			|  | 67 | +    # return results
 | 
	
		
			
			| 39 | 68 |      return results
 | 
	
		
			
			| 40 |  | -
 | 
	
		
			
			| 41 |  | -
 | 
	
		
			
			| 42 |  | -#from json import loads
 | 
	
		
			
			| 43 |  | -#search_url = url + 'd.js?{query}&p=1&s={offset}'
 | 
	
		
			
			| 44 |  | -#
 | 
	
		
			
			| 45 |  | -#paging = True
 | 
	
		
			
			| 46 |  | -#
 | 
	
		
			
			| 47 |  | -#
 | 
	
		
			
			| 48 |  | -#def request(query, params):
 | 
	
		
			
			| 49 |  | -#    offset = (params['pageno'] - 1) * 30
 | 
	
		
			
			| 50 |  | -#    q = urlencode({'q': query,
 | 
	
		
			
			| 51 |  | -#                   'l': locale})
 | 
	
		
			
			| 52 |  | -#    params['url'] = search_url.format(query=q, offset=offset)
 | 
	
		
			
			| 53 |  | -#    return params
 | 
	
		
			
			| 54 |  | -#
 | 
	
		
			
			| 55 |  | -#
 | 
	
		
			
			| 56 |  | -#def response(resp):
 | 
	
		
			
			| 57 |  | -#    results = []
 | 
	
		
			
			| 58 |  | -#    search_res = loads(resp.text[resp.text.find('[{'):-2])[:-1]
 | 
	
		
			
			| 59 |  | -#    for r in search_res:
 | 
	
		
			
			| 60 |  | -#        if not r.get('t'):
 | 
	
		
			
			| 61 |  | -#            continue
 | 
	
		
			
			| 62 |  | -#        results.append({'title': r['t'],
 | 
	
		
			
			| 63 |  | -#                       'content': html_to_text(r['a']),
 | 
	
		
			
			| 64 |  | -#                       'url': r['u']})
 | 
	
		
			
			| 65 |  | -#    return results
 |