浏览代码

[enh] validate input and raise an exception inside search.py. The exception message is output in json and rss format.

Alexandre Flament 8 年前
父节点
当前提交
15eef0ebdb
共有 4 个文件被更改,包括 133 次插入38 次删除
  1. 32
    0
      searx/exceptions.py
  2. 43
    23
      searx/search.py
  3. 6
    0
      searx/templates/__common__/opensearch_response_rss.xml
  4. 52
    15
      searx/webapp.py

+ 32
- 0
searx/exceptions.py 查看文件

1
+'''
2
+searx is free software: you can redistribute it and/or modify
3
+it under the terms of the GNU Affero General Public License as published by
4
+the Free Software Foundation, either version 3 of the License, or
5
+(at your option) any later version.
6
+
7
+searx is distributed in the hope that it will be useful,
8
+but WITHOUT ANY WARRANTY; without even the implied warranty of
9
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
+GNU Affero General Public License for more details.
11
+
12
+You should have received a copy of the GNU Affero General Public License
13
+along with searx. If not, see < http://www.gnu.org/licenses/ >.
14
+
15
+(C) 2017- by Alexandre Flament, <alex@al-f.net>
16
+'''
17
+
18
+
19
+class SearxException(Exception):
20
+    pass
21
+
22
+
23
+class SearxParameterException(SearxException):
24
+
25
+    def __init__(self, name, value):
26
+        if value == '' or value is None:
27
+            message = 'Empty ' + name + ' parameter'
28
+        else:
29
+            message = 'Invalid value "' + value + '" for parameter ' + name
30
+        super(SearxParameterException, self).__init__(message)
31
+        self.parameter_name = name
32
+        self.parameter_value = value

+ 43
- 23
searx/search.py 查看文件

31
 from searx.results import ResultContainer
31
 from searx.results import ResultContainer
32
 from searx import logger
32
 from searx import logger
33
 from searx.plugins import plugins
33
 from searx.plugins import plugins
34
+from searx.languages import language_codes
35
+from searx.exceptions import SearxParameterException
34
 
36
 
35
 logger = logger.getChild('search')
37
 logger = logger.getChild('search')
36
 
38
 
37
 number_of_searches = 0
39
 number_of_searches = 0
38
 
40
 
41
+language_code_set = set(l[0].lower() for l in language_codes)
42
+language_code_set.add('all')
43
+
39
 
44
 
40
 def send_http_request(engine, request_params, start_time, timeout_limit):
45
 def send_http_request(engine, request_params, start_time, timeout_limit):
41
     # for page_load_time stats
46
     # for page_load_time stats
182
 
187
 
183
 
188
 
184
 def get_search_query_from_webapp(preferences, form):
189
 def get_search_query_from_webapp(preferences, form):
185
-    query = None
186
-    query_engines = []
187
-    query_categories = []
188
-    query_pageno = 1
189
-    query_lang = 'all'
190
-    query_time_range = None
190
+    # no text for the query ?
191
+    if not form.get('q'):
192
+        raise SearxParameterException('q', '')
191
 
193
 
192
     # set blocked engines
194
     # set blocked engines
193
     disabled_engines = preferences.engines.get_disabled()
195
     disabled_engines = preferences.engines.get_disabled()
194
 
196
 
195
-    # set specific language if set
196
-    query_lang = preferences.get_value('language')
197
-
198
-    # safesearch
199
-    query_safesearch = preferences.get_value('safesearch')
200
-
201
-    # TODO better exceptions
202
-    if not form.get('q'):
203
-        raise Exception('noquery')
204
-
205
-    # set pagenumber
206
-    pageno_param = form.get('pageno', '1')
207
-    if not pageno_param.isdigit() or int(pageno_param) < 1:
208
-        pageno_param = 1
209
-
210
-    query_pageno = int(pageno_param)
211
-
212
     # parse query, if tags are set, which change
197
     # parse query, if tags are set, which change
213
     # the serch engine or search-language
198
     # the serch engine or search-language
214
     raw_text_query = RawTextQuery(form['q'], disabled_engines)
199
     raw_text_query = RawTextQuery(form['q'], disabled_engines)
217
     # set query
202
     # set query
218
     query = raw_text_query.getSearchQuery()
203
     query = raw_text_query.getSearchQuery()
219
 
204
 
205
+    # get and check page number
206
+    pageno_param = form.get('pageno', '1')
207
+    if not pageno_param.isdigit() or int(pageno_param) < 1:
208
+        raise SearxParameterException('pageno', pageno_param)
209
+    query_pageno = int(pageno_param)
210
+
211
+    # get language
220
     # set specific language if set on request, query or preferences
212
     # set specific language if set on request, query or preferences
221
     # TODO support search with multible languages
213
     # TODO support search with multible languages
222
     if len(raw_text_query.languages):
214
     if len(raw_text_query.languages):
226
     else:
218
     else:
227
         query_lang = preferences.get_value('language')
219
         query_lang = preferences.get_value('language')
228
 
220
 
221
+    # check language
222
+    if query_lang not in language_code_set:
223
+        raise SearxParameterException('language', query_lang)
224
+
225
+    # get safesearch
226
+    if 'safesearch' in form:
227
+        query_safesearch = form.get('safesearch')
228
+        # first check safesearch
229
+        if not query_safesearch.isdigit():
230
+            raise SearxParameterException('safesearch', query_safesearch)
231
+        query_safesearch = int(query_safesearch)
232
+    else:
233
+        query_safesearch = preferences.get_value('safesearch')
234
+
235
+    # safesearch : second check
236
+    if query_safesearch < 0 or query_safesearch > 2:
237
+        raise SearxParameterException('safesearch', query_safesearch)
238
+
239
+    # get time_range
229
     query_time_range = form.get('time_range')
240
     query_time_range = form.get('time_range')
230
 
241
 
242
+    # check time_range
243
+    if not(query_time_range is None)\
244
+       and not (query_time_range in ['', 'day', 'week', 'month', 'year']):
245
+        raise SearxParameterException('time_range', query_time_range)
246
+
247
+    # query_engines
231
     query_engines = raw_text_query.engines
248
     query_engines = raw_text_query.engines
232
 
249
 
250
+    # query_categories
251
+    query_categories = []
252
+
233
     # if engines are calculated from query,
253
     # if engines are calculated from query,
234
     # set categories by using that informations
254
     # set categories by using that informations
235
     if query_engines and raw_text_query.specific:
255
     if query_engines and raw_text_query.specific:

+ 6
- 0
searx/templates/__common__/opensearch_response_rss.xml 查看文件

11
     <opensearch:itemsPerPage>{{ number_of_results }}</opensearch:itemsPerPage>
11
     <opensearch:itemsPerPage>{{ number_of_results }}</opensearch:itemsPerPage>
12
     <atom:link rel="search" type="application/opensearchdescription+xml" href="{{ base_url }}opensearch.xml"/>
12
     <atom:link rel="search" type="application/opensearchdescription+xml" href="{{ base_url }}opensearch.xml"/>
13
     <opensearch:Query role="request" searchTerms="{{ q|e }}" startPage="1" />
13
     <opensearch:Query role="request" searchTerms="{{ q|e }}" startPage="1" />
14
+    {% if error_message %}
15
+    <item>
16
+      <title>Error</title>
17
+      <description>{{ error_message|e }}</description>
18
+    </item>
19
+    {% endif %}
14
     {% for r in results %}
20
     {% for r in results %}
15
     <item>
21
     <item>
16
       <title>{{ r.title }}</title>
22
       <title>{{ r.title }}</title>

+ 52
- 15
searx/webapp.py 查看文件

52
 from flask_babel import Babel, gettext, format_date, format_decimal
52
 from flask_babel import Babel, gettext, format_date, format_decimal
53
 from flask.json import jsonify
53
 from flask.json import jsonify
54
 from searx import settings, searx_dir, searx_debug
54
 from searx import settings, searx_dir, searx_debug
55
+from searx.exceptions import SearxException, SearxParameterException
55
 from searx.engines import (
56
 from searx.engines import (
56
     categories, engines, engine_shortcuts, get_engines_stats, initialize_engines
57
     categories, engines, engine_shortcuts, get_engines_stats, initialize_engines
57
 )
58
 )
400
             request.user_plugins.append(plugin)
401
             request.user_plugins.append(plugin)
401
 
402
 
402
 
403
 
404
+def index_error(output_format, error_message):
405
+    if output_format == 'json':
406
+        return Response(json.dumps({'error': error_message}),
407
+                        mimetype='application/json')
408
+    elif output_format == 'csv':
409
+        response = Response('', mimetype='application/csv')
410
+        cont_disp = 'attachment;Filename=searx.csv'
411
+        response.headers.add('Content-Disposition', cont_disp)
412
+        return response
413
+    elif output_format == 'rss':
414
+        response_rss = render(
415
+            'opensearch_response_rss.xml',
416
+            results=[],
417
+            q=request.form['q'] if 'q' in request.form else '',
418
+            number_of_results=0,
419
+            base_url=get_base_url(),
420
+            error_message=error_message
421
+        )
422
+        return Response(response_rss, mimetype='text/xml')
423
+    else:
424
+        # html
425
+        request.errors.append(gettext('search error'))
426
+        return render(
427
+            'index.html',
428
+        )
429
+
430
+
403
 @app.route('/search', methods=['GET', 'POST'])
431
 @app.route('/search', methods=['GET', 'POST'])
404
 @app.route('/', methods=['GET', 'POST'])
432
 @app.route('/', methods=['GET', 'POST'])
405
 def index():
433
 def index():
408
     Supported outputs: html, json, csv, rss.
436
     Supported outputs: html, json, csv, rss.
409
     """
437
     """
410
 
438
 
439
+    # output_format
440
+    output_format = request.form.get('format', 'html')
441
+    if output_format not in ['html', 'csv', 'json', 'rss']:
442
+        output_format = 'html'
443
+
444
+    # check if there is query
411
     if request.form.get('q') is None:
445
     if request.form.get('q') is None:
412
-        return render(
413
-            'index.html',
414
-        )
446
+        if output_format == 'html':
447
+            return render(
448
+                'index.html',
449
+            )
450
+        else:
451
+            return index_error(output_format, 'No query'), 400
415
 
452
 
416
     # search
453
     # search
417
     search_query = None
454
     search_query = None
421
         # search = Search(search_query) #  without plugins
458
         # search = Search(search_query) #  without plugins
422
         search = SearchWithPlugins(search_query, request)
459
         search = SearchWithPlugins(search_query, request)
423
         result_container = search.search()
460
         result_container = search.search()
424
-    except:
425
-        request.errors.append(gettext('search error'))
461
+    except Exception as e:
462
+        # log exception
426
         logger.exception('search error')
463
         logger.exception('search error')
427
-        return render(
428
-            'index.html',
429
-        )
430
 
464
 
465
+        # is it an invalid input parameter or something else ?
466
+        if (issubclass(e.__class__, SearxParameterException)):
467
+            return index_error(output_format, e.message), 400
468
+        else:
469
+            return index_error(output_format, gettext('search error')), 500
470
+
471
+    # results
431
     results = result_container.get_ordered_results()
472
     results = result_container.get_ordered_results()
473
+    number_of_results = result_container.results_number()
474
+    if number_of_results < result_container.results_length():
475
+        number_of_results = 0
432
 
476
 
433
     # UI
477
     # UI
434
     advanced_search = request.form.get('advanced_search', None)
478
     advanced_search = request.form.get('advanced_search', None)
435
-    output_format = request.form.get('format', 'html')
436
-    if output_format not in ['html', 'csv', 'json', 'rss']:
437
-        output_format = 'html'
438
 
479
 
439
     # output
480
     # output
440
     for result in results:
481
     for result in results:
470
                 else:
511
                 else:
471
                     result['publishedDate'] = format_date(result['publishedDate'])
512
                     result['publishedDate'] = format_date(result['publishedDate'])
472
 
513
 
473
-    number_of_results = result_container.results_number()
474
-    if number_of_results < result_container.results_length():
475
-        number_of_results = 0
476
-
477
     if output_format == 'json':
514
     if output_format == 'json':
478
         return Response(json.dumps({'query': search_query.query,
515
         return Response(json.dumps({'query': search_query.query,
479
                                     'number_of_results': number_of_results,
516
                                     'number_of_results': number_of_results,