浏览代码

Merge pull request #91 from pointhi/engines

fix yacy-engine and add comments
Adam Tauber 10 年前
父节点
当前提交
29afa9b4a1
共有 2 个文件被更改,包括 74 次插入18 次删除
  1. 67
    18
      searx/engines/yacy.py
  2. 7
    0
      searx/settings.yml

+ 67
- 18
searx/engines/yacy.py 查看文件

1
+## Yacy (Web, Images, Videos, Music, Files)
2
+# 
3
+# @website     http://yacy.net
4
+# @provide-api yes (http://www.yacy-websuche.de/wiki/index.php/Dev:APIyacysearch)
5
+# 
6
+# @using-api   yes
7
+# @results     JSON
8
+# @stable      yes
9
+# @parse       (general)    url, title, content, publishedDate
10
+# @parse       (images)     url, title, img_src
11
+#
12
+# @todo        parse video, audio and file results
13
+
1
 from json import loads
14
 from json import loads
2
 from urllib import urlencode
15
 from urllib import urlencode
16
+from dateutil import parser
17
+
18
+# engine dependent config
19
+categories = ['general', 'images'] #TODO , 'music', 'videos', 'files'
20
+paging = True
21
+language_support = True
22
+number_of_results = 5
23
+
24
+# search-url
25
+base_url = 'http://localhost:8090'
26
+search_url = '/yacysearch.json?{query}&startRecord={offset}&maximumRecords={limit}&contentdom={search_type}&resource=global'
3
 
27
 
4
-url = 'http://localhost:8090'
5
-search_url = '/yacysearch.json?{query}&maximumRecords=10'
28
+# yacy specific type-definitions
29
+search_types = {'general': 'text',
30
+                'images': 'image',
31
+                'files': 'app',               
32
+                'music': 'audio',
33
+                'videos': 'video'}
6
 
34
 
7
 
35
 
36
+# do search-request
8
 def request(query, params):
37
 def request(query, params):
9
-    params['url'] = url + search_url.format(query=urlencode({'query': query}))
38
+    offset = (params['pageno'] - 1) * number_of_results
39
+    search_type = search_types.get(params['category'], '0')
40
+
41
+    params['url'] = base_url + search_url.format(query=urlencode({'query': query}),
42
+                                                 offset=offset,
43
+                                                 limit=number_of_results,
44
+                                                 search_type=search_type)
45
+
46
+    # add language tag if specified
47
+    if params['language'] != 'all':
48
+        params['url'] += '&lr=lang_' + params['language'].split('_')[0]
49
+
10
     return params
50
     return params
11
 
51
 
12
 
52
 
53
+# get response from search-request
13
 def response(resp):
54
 def response(resp):
55
+    results = []
56
+
14
     raw_search_results = loads(resp.text)
57
     raw_search_results = loads(resp.text)
15
 
58
 
59
+    # return empty array if there are no results
16
     if not raw_search_results:
60
     if not raw_search_results:
17
         return []
61
         return []
18
 
62
 
19
     search_results = raw_search_results.get('channels', {})[0].get('items', [])
63
     search_results = raw_search_results.get('channels', {})[0].get('items', [])
20
 
64
 
21
-    results = []
22
-
23
-    for result in search_results:
24
-        tmp_result = {}
25
-        tmp_result['title'] = result['title']
26
-        tmp_result['url'] = result['link']
27
-        tmp_result['content'] = ''
28
-
29
-        if result['description']:
30
-            tmp_result['content'] += result['description'] + "<br/>"
65
+    if resp.search_params['category'] == 'general':
66
+        # parse general results
67
+        for result in search_results:
68
+            publishedDate = parser.parse(result['pubDate'])
31
 
69
 
32
-        if result['pubDate']:
33
-            tmp_result['content'] += result['pubDate'] + "<br/>"
70
+            # append result
71
+            results.append({'url': result['link'],
72
+                        'title': result['title'],
73
+                        'content': result['description'],
74
+                        'publishedDate': publishedDate})
34
 
75
 
35
-        if result['size'] != '-1':
36
-            tmp_result['content'] += result['sizename']
76
+    elif resp.search_params['category'] == 'images':
77
+        # parse image results
78
+        for result in search_results:
79
+            # append result
80
+            results.append({'url': result['url'],
81
+                        'title': result['title'],
82
+                        'content': '',
83
+                        'img_src': result['image'],
84
+                        'template': 'images.html'})
37
 
85
 
38
-        results.append(tmp_result)
86
+    #TODO parse video, audio and file results
39
 
87
 
88
+    # return results
40
     return results
89
     return results

+ 7
- 0
searx/settings.yml 查看文件

143
     locale : en-US
143
     locale : en-US
144
     shortcut : vm
144
     shortcut : vm
145
 
145
 
146
+#  - name : yacy
147
+#    engine : yacy
148
+#    shortcut : ya
149
+#    base_url : 'http://localhost:8090'
150
+#    number_of_results : 5
151
+#    timeout: 3.0
152
+
146
 locales:
153
 locales:
147
     en : English
154
     en : English
148
     de : Deutsch
155
     de : Deutsch