瀏覽代碼

Merge branch 'master' into languages

Adam Tauber 8 年之前
父節點
當前提交
8bff42f049
共有 7 個檔案被更改,包括 104 行新增15 行删除
  1. 1
    0
      AUTHORS.rst
  2. 5
    10
      searx/engines/__init__.py
  3. 3
    3
      searx/engines/google_news.py
  4. 57
    0
      searx/engines/searx_engine.py
  5. 32
    0
      searx/settings.yml
  6. 1
    0
      searx/settings_robot.yml
  7. 5
    2
      searx/webapp.py

+ 1
- 0
AUTHORS.rst 查看文件

@@ -59,3 +59,4 @@ generally made searx better:
59 59
 - Thomas Renard @threnard
60 60
 - Pydo `<https://github.com/pydo>`_
61 61
 - Athemis `<https://github.com/Athemis>`_
62
+- Stefan Antoni `<http://stefan.antoni.io>`

+ 5
- 10
searx/engines/__init__.py 查看文件

@@ -34,7 +34,8 @@ engine_dir = dirname(realpath(__file__))
34 34
 engines = {}
35 35
 
36 36
 categories = {'general': []}
37
-_initialized = False
37
+
38
+languages = loads(open(engine_dir + '/../data/engines_languages.json').read())
38 39
 
39 40
 engine_shortcuts = {}
40 41
 engine_default_args = {'paging': False,
@@ -214,13 +215,7 @@ def get_engines_stats():
214 215
     ]
215 216
 
216 217
 
217
-if 'engines' not in settings or not settings['engines']:
218
-    logger.error('No engines found. Edit your settings.yml')
219
-    exit(2)
220
-
221
-languages = loads(open(engine_dir + '/../data/engines_languages.json').read())
222
-
223
-for engine_data in settings['engines']:
224
-    engine = load_engine(engine_data)
225
-    if engine is not None:
218
+def initialize_engines(engine_list):
219
+    for engine_data in engine_list:
220
+        engine = load_engine(engine_data)
226 221
         engines[engine.name] = engine

+ 3
- 3
searx/engines/google_news.py 查看文件

@@ -72,9 +72,9 @@ def response(resp):
72 72
             'content': ''.join(result.xpath('.//div[@class="st"]//text()')),
73 73
         }
74 74
 
75
-        img = result.xpath('.//img/@src')[0]
76
-        if img and not img.startswith('data'):
77
-            r['img_src'] = img
75
+        imgs = result.xpath('.//img/@src')
76
+        if len(imgs) and not imgs[0].startswith('data'):
77
+            r['img_src'] = imgs[0]
78 78
 
79 79
         results.append(r)
80 80
 

+ 57
- 0
searx/engines/searx_engine.py 查看文件

@@ -0,0 +1,57 @@
1
+"""
2
+ Searx (all)
3
+
4
+ @website     https://github.com/asciimoo/searx
5
+ @provide-api yes (https://asciimoo.ithub.io/searx/dev/search_api.html)
6
+
7
+ @using-api   yes
8
+ @results     JSON
9
+ @stable      yes (using api)
10
+ @parse       url, title, content
11
+"""
12
+
13
+from json import loads
14
+from searx.engines import categories as searx_categories
15
+
16
+
17
+categories = searx_categories.keys()
18
+
19
+# search-url
20
+instance_urls = []
21
+instance_index = 0
22
+
23
+
24
+# do search-request
25
+def request(query, params):
26
+    global instance_index
27
+    params['url'] = instance_urls[instance_index % len(instance_urls)]
28
+    params['method'] = 'POST'
29
+
30
+    instance_index += 1
31
+
32
+    params['data'] = {
33
+        'q': query,
34
+        'pageno': params['pageno'],
35
+        'language': params['language'],
36
+        'time_range': params['time_range'],
37
+        'category': params['category'],
38
+        'format': 'json'
39
+    }
40
+
41
+    return params
42
+
43
+
44
+# get response from search-request
45
+def response(resp):
46
+
47
+    response_json = loads(resp.text)
48
+    results = response_json['results']
49
+
50
+    for i in ('answers', 'infoboxes'):
51
+        results.extend(response_json[i])
52
+
53
+    results.extend({'suggestion': s} for s in response_json['suggestions'])
54
+
55
+    results.append({'number_of_results': response_json['number_of_results']})
56
+
57
+    return results

+ 32
- 0
searx/settings.yml 查看文件

@@ -13,6 +13,7 @@ server:
13 13
     secret_key : "ultrasecretkey" # change this!
14 14
     base_url : False # Set custom base_url. Possible values: False or "https://your.custom.host/location/"
15 15
     image_proxy : False # Proxying image results through searx
16
+    http_protocol_version : "1.0"  # 1.0 and 1.1 are supported
16 17
 
17 18
 ui:
18 19
     themes_path : "" # Custom ui themes path - leave it blank if you didn't change
@@ -91,6 +92,17 @@ engines:
91 92
     disabled : True
92 93
     shortcut : bb
93 94
 
95
+  - name : ccc-tv
96
+    engine : xpath
97
+    paging : False
98
+    search_url : https://media.ccc.de/search/?q={query}
99
+    url_xpath : //div[@class="caption"]/h3/a/@href
100
+    title_xpath : //div[@class="caption"]/h3/a/text()
101
+    content_xpath : //div[@class="caption"]/h4/@title
102
+    categories : videos
103
+    disabled : True
104
+    shortcut : c3tv
105
+
94 106
   - name : crossref
95 107
     engine : json_engine
96 108
     paging : True
@@ -154,6 +166,18 @@ engines:
154 166
     shortcut : ddg
155 167
     disabled : True
156 168
 
169
+  - name : etymonline
170
+    engine : xpath
171
+    paging : True
172
+    search_url : http://etymonline.com/?search={query}&p={pageno}
173
+    url_xpath : //dt/a[1]/@href
174
+    title_xpath : //dt
175
+    content_xpath : //dd
176
+    suggestion_xpath : //a[@class="crossreference"]
177
+    first_page_num : 0
178
+    shortcut : et
179
+    disabled : True
180
+
157 181
 # api-key required: http://www.faroo.com/hp/api/api.html#key
158 182
 #  - name : faroo
159 183
 #    engine : faroo
@@ -430,6 +454,14 @@ engines:
430 454
     shortcut : scc
431 455
     disabled : True
432 456
 
457
+#  - name : searx
458
+#    engine : searx_engine
459
+#    shortcut : se
460
+#    instance_urls :
461
+#        - http://127.0.0.1:8888/
462
+#        - ...
463
+#    disabled : True
464
+
433 465
   - name : spotify
434 466
     engine : spotify
435 467
     shortcut : stf

+ 1
- 0
searx/settings_robot.yml 查看文件

@@ -13,6 +13,7 @@ server:
13 13
     secret_key : "ultrasecretkey" # change this!
14 14
     base_url : False
15 15
     image_proxy : False
16
+    http_protocol_version : "1.0"
16 17
 
17 18
 ui:
18 19
     themes_path : ""

+ 5
- 2
searx/webapp.py 查看文件

@@ -53,7 +53,7 @@ from flask_babel import Babel, gettext, format_date, format_decimal
53 53
 from flask.json import jsonify
54 54
 from searx import settings, searx_dir, searx_debug
55 55
 from searx.engines import (
56
-    categories, engines, get_engines_stats, engine_shortcuts
56
+    categories, engines, engine_shortcuts, get_engines_stats, initialize_engines
57 57
 )
58 58
 from searx.utils import (
59 59
     UnicodeWriter, highlight_content, html_to_text, get_themes,
@@ -81,7 +81,7 @@ except ImportError:
81 81
 
82 82
 # serve pages with HTTP/1.1
83 83
 from werkzeug.serving import WSGIRequestHandler
84
-WSGIRequestHandler.protocol_version = "HTTP/1.1"
84
+WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0'))
85 85
 
86 86
 static_path, templates_path, themes =\
87 87
     get_themes(settings['ui']['themes_path']
@@ -769,6 +769,9 @@ def page_not_found(e):
769 769
 
770 770
 
771 771
 def run():
772
+    if not searx_debug or os.environ.get("WERKZEUG_RUN_MAIN") == "true":
773
+        initialize_engines(settings['engines'])
774
+
772 775
     app.run(
773 776
         debug=searx_debug,
774 777
         use_debugger=searx_debug,