Przeglądaj źródła

Merge pull request #91 from pointhi/engines

fix yacy-engine and add comments
Adam Tauber 10 lat temu
rodzic
commit
29afa9b4a1
2 zmienionych plików z 74 dodań i 18 usunięć
  1. 67
    18
      searx/engines/yacy.py
  2. 7
    0
      searx/settings.yml

+ 67
- 18
searx/engines/yacy.py Wyświetl plik

@@ -1,40 +1,89 @@
1
+## Yacy (Web, Images, Videos, Music, Files)
2
+# 
3
+# @website     http://yacy.net
4
+# @provide-api yes (http://www.yacy-websuche.de/wiki/index.php/Dev:APIyacysearch)
5
+# 
6
+# @using-api   yes
7
+# @results     JSON
8
+# @stable      yes
9
+# @parse       (general)    url, title, content, publishedDate
10
+# @parse       (images)     url, title, img_src
11
+#
12
+# @todo        parse video, audio and file results
13
+
1 14
 from json import loads
2 15
 from urllib import urlencode
16
+from dateutil import parser
17
+
18
+# engine dependent config
19
+categories = ['general', 'images'] #TODO , 'music', 'videos', 'files'
20
+paging = True
21
+language_support = True
22
+number_of_results = 5
23
+
24
+# search-url
25
+base_url = 'http://localhost:8090'
26
+search_url = '/yacysearch.json?{query}&startRecord={offset}&maximumRecords={limit}&contentdom={search_type}&resource=global'
3 27
 
4
-url = 'http://localhost:8090'
5
-search_url = '/yacysearch.json?{query}&maximumRecords=10'
28
+# yacy specific type-definitions
29
+search_types = {'general': 'text',
30
+                'images': 'image',
31
+                'files': 'app',               
32
+                'music': 'audio',
33
+                'videos': 'video'}
6 34
 
7 35
 
36
+# do search-request
8 37
 def request(query, params):
9
-    params['url'] = url + search_url.format(query=urlencode({'query': query}))
38
+    offset = (params['pageno'] - 1) * number_of_results
39
+    search_type = search_types.get(params['category'], '0')
40
+
41
+    params['url'] = base_url + search_url.format(query=urlencode({'query': query}),
42
+                                                 offset=offset,
43
+                                                 limit=number_of_results,
44
+                                                 search_type=search_type)
45
+
46
+    # add language tag if specified
47
+    if params['language'] != 'all':
48
+        params['url'] += '&lr=lang_' + params['language'].split('_')[0]
49
+
10 50
     return params
11 51
 
12 52
 
53
+# get response from search-request
13 54
 def response(resp):
55
+    results = []
56
+
14 57
     raw_search_results = loads(resp.text)
15 58
 
59
+    # return empty array if there are no results
16 60
     if not raw_search_results:
17 61
         return []
18 62
 
19 63
     search_results = raw_search_results.get('channels', {})[0].get('items', [])
20 64
 
21
-    results = []
22
-
23
-    for result in search_results:
24
-        tmp_result = {}
25
-        tmp_result['title'] = result['title']
26
-        tmp_result['url'] = result['link']
27
-        tmp_result['content'] = ''
28
-
29
-        if result['description']:
30
-            tmp_result['content'] += result['description'] + "<br/>"
65
+    if resp.search_params['category'] == 'general':
66
+        # parse general results
67
+        for result in search_results:
68
+            publishedDate = parser.parse(result['pubDate'])
31 69
 
32
-        if result['pubDate']:
33
-            tmp_result['content'] += result['pubDate'] + "<br/>"
70
+            # append result
71
+            results.append({'url': result['link'],
72
+                        'title': result['title'],
73
+                        'content': result['description'],
74
+                        'publishedDate': publishedDate})
34 75
 
35
-        if result['size'] != '-1':
36
-            tmp_result['content'] += result['sizename']
76
+    elif resp.search_params['category'] == 'images':
77
+        # parse image results
78
+        for result in search_results:
79
+            # append result
80
+            results.append({'url': result['url'],
81
+                        'title': result['title'],
82
+                        'content': '',
83
+                        'img_src': result['image'],
84
+                        'template': 'images.html'})
37 85
 
38
-        results.append(tmp_result)
86
+    #TODO parse video, audio and file results
39 87
 
88
+    # return results
40 89
     return results

+ 7
- 0
searx/settings.yml Wyświetl plik

@@ -143,6 +143,13 @@ engines:
143 143
     locale : en-US
144 144
     shortcut : vm
145 145
 
146
+#  - name : yacy
147
+#    engine : yacy
148
+#    shortcut : ya
149
+#    base_url : 'http://localhost:8090'
150
+#    number_of_results : 5
151
+#    timeout: 3.0
152
+
146 153
 locales:
147 154
     en : English
148 155
     de : Deutsch