Przeglądaj źródła

[enh] yandex engine added

Adam Tauber 9 lat temu
rodzic
commit
fafc564874
2 zmienionych plików z 60 dodań i 0 usunięć
  1. 55
    0
      searx/engines/yandex.py
  2. 5
    0
      searx/settings.yml

+ 55
- 0
searx/engines/yandex.py Wyświetl plik

@@ -0,0 +1,55 @@
1
+"""
2
+ Yahoo (Web)
3
+
4
+ @website     https://yandex.ru/
5
+ @provide-api ?
6
+ @using-api   no
7
+ @results     HTML (using search portal)
8
+ @stable      no (HTML can change)
9
+ @parse       url, title, content
10
+"""
11
+
12
+from urllib import urlencode
13
+from lxml import html
14
+from searx.search import logger
15
+
16
+logger = logger.getChild('yandex engine')
17
+
18
+# engine dependent config
19
+categories = ['general']
20
+paging = True
21
+language_support = True  # TODO
22
+
23
+# search-url
24
+base_url = 'https://yandex.ru/'
25
+search_url = 'search/?{query}&p={page}'
26
+
27
+results_xpath = '//div[@class="serp-item serp-item_plain_yes clearfix i-bem"]'
28
+url_xpath = './/h2/a/@href'
29
+title_xpath = './/h2/a//text()'
30
+content_xpath = './/div[@class="serp-item__text"]//text()'
31
+
32
+
33
+def request(query, params):
34
+    params['url'] = base_url + search_url.format(page=params['pageno']-1,
35
+                                                 query=urlencode({'text': query}))
36
+    return params
37
+
38
+
39
+# get response from search-request
40
+def response(resp):
41
+    dom = html.fromstring(resp.text)
42
+    results = []
43
+
44
+    for result in dom.xpath(results_xpath):
45
+        try:
46
+            res = {'url': result.xpath(url_xpath)[0],
47
+                   'title': ''.join(result.xpath(title_xpath)),
48
+                   'content': ''.join(result.xpath(content_xpath))}
49
+        except:
50
+            logger.exception('yandex parse crash')
51
+            continue
52
+
53
+        results.append(res)
54
+
55
+    return results

+ 5
- 0
searx/settings.yml Wyświetl plik

@@ -274,6 +274,11 @@ engines:
274 274
     engine : yahoo
275 275
     shortcut : yh
276 276
 
277
+  - name : yandex
278
+    engine : yandex
279
+    shortcut : ya
280
+    disabled : True
281
+
277 282
   - name : yahoo news
278 283
     engine : yahoo_news
279 284
     shortcut : yhn