Kaynağa Gözat

[mod] ddg engine mods

Adam Tauber 11 yıl önce
ebeveyn
işleme
3854703d95
1 değiştirilmiş dosya ile 45 ekleme ve 13 silme
  1. 45
    13
      searx/engines/duckduckgo.py

+ 45
- 13
searx/engines/duckduckgo.py Dosyayı Görüntüle

@@ -1,29 +1,61 @@
1
-from json import loads
2 1
 from urllib import urlencode
2
+from lxml.html import fromstring
3 3
 from searx.utils import html_to_text
4 4
 
5
-url = 'https://duckduckgo.com/'
6
-search_url = url + 'd.js?{query}&p=1&s={offset}'
5
+url = 'https://duckduckgo.com/html?{query}&s={offset}'
7 6
 locale = 'us-en'
8 7
 
9
-paging = True
10
-
11
-
12 8
 def request(query, params):
13 9
     offset = (params['pageno'] - 1) * 30
14 10
     q = urlencode({'q': query,
15 11
                    'l': locale})
16
-    params['url'] = search_url.format(query=q, offset=offset)
12
+    params['url'] = url.format(query=q, offset=offset)
17 13
     return params
18 14
 
19 15
 
20 16
 def response(resp):
17
+    result_xpath = '//div[@class="results_links results_links_deep web-result"]'
18
+    url_xpath = './/a[@class="large"]/@href'
19
+    title_xpath = './/a[@class="large"]//text()'
20
+    content_xpath = './/div[@class="snippet"]//text()'
21 21
     results = []
22
-    search_res = loads(resp.text[resp.text.find('[{'):-2])[:-1]
23
-    for r in search_res:
24
-        if not r.get('t'):
22
+
23
+    doc = fromstring(resp.text)
24
+
25
+    for r in doc.xpath(result_xpath):
26
+        res_url = r.xpath(url_xpath)[-1]
27
+        if not res_url:
25 28
             continue
26
-        results.append({'title': r['t'],
27
-                       'content': html_to_text(r['a']),
28
-                       'url': r['u']})
29
+        title = html_to_text(''.join(r.xpath(title_xpath)))
30
+        content = html_to_text(''.join(r.xpath(content_xpath)))
31
+        results.append({'title': title,
32
+                        'content': content,
33
+                        'url': res_url})
34
+
29 35
     return results
36
+
37
+
38
+#from json import loads
39
+#search_url = url + 'd.js?{query}&p=1&s={offset}'
40
+#
41
+#paging = True
42
+#
43
+#
44
+#def request(query, params):
45
+#    offset = (params['pageno'] - 1) * 30
46
+#    q = urlencode({'q': query,
47
+#                   'l': locale})
48
+#    params['url'] = search_url.format(query=q, offset=offset)
49
+#    return params
50
+#
51
+#
52
+#def response(resp):
53
+#    results = []
54
+#    search_res = loads(resp.text[resp.text.find('[{'):-2])[:-1]
55
+#    for r in search_res:
56
+#        if not r.get('t'):
57
+#            continue
58
+#        results.append({'title': r['t'],
59
+#                       'content': html_to_text(r['a']),
60
+#                       'url': r['u']})
61
+#    return results