Browse Source

[enh] validate input and raise an exception inside search.py. The exception message is output in json and rss format.

Alexandre Flament 8 years ago
parent
commit
15eef0ebdb
4 changed files with 133 additions and 38 deletions
  1. 32
    0
      searx/exceptions.py
  2. 43
    23
      searx/search.py
  3. 6
    0
      searx/templates/__common__/opensearch_response_rss.xml
  4. 52
    15
      searx/webapp.py

+ 32
- 0
searx/exceptions.py View File

@@ -0,0 +1,32 @@
1
+'''
2
+searx is free software: you can redistribute it and/or modify
3
+it under the terms of the GNU Affero General Public License as published by
4
+the Free Software Foundation, either version 3 of the License, or
5
+(at your option) any later version.
6
+
7
+searx is distributed in the hope that it will be useful,
8
+but WITHOUT ANY WARRANTY; without even the implied warranty of
9
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
+GNU Affero General Public License for more details.
11
+
12
+You should have received a copy of the GNU Affero General Public License
13
+along with searx. If not, see < http://www.gnu.org/licenses/ >.
14
+
15
+(C) 2017- by Alexandre Flament, <alex@al-f.net>
16
+'''
17
+
18
+
19
+class SearxException(Exception):
20
+    pass
21
+
22
+
23
+class SearxParameterException(SearxException):
24
+
25
+    def __init__(self, name, value):
26
+        if value == '' or value is None:
27
+            message = 'Empty ' + name + ' parameter'
28
+        else:
29
+            message = 'Invalid value "' + value + '" for parameter ' + name
30
+        super(SearxParameterException, self).__init__(message)
31
+        self.parameter_name = name
32
+        self.parameter_value = value

+ 43
- 23
searx/search.py View File

@@ -31,11 +31,16 @@ from searx.query import RawTextQuery, SearchQuery
31 31
 from searx.results import ResultContainer
32 32
 from searx import logger
33 33
 from searx.plugins import plugins
34
+from searx.languages import language_codes
35
+from searx.exceptions import SearxParameterException
34 36
 
35 37
 logger = logger.getChild('search')
36 38
 
37 39
 number_of_searches = 0
38 40
 
41
+language_code_set = set(l[0].lower() for l in language_codes)
42
+language_code_set.add('all')
43
+
39 44
 
40 45
 def send_http_request(engine, request_params, start_time, timeout_limit):
41 46
     # for page_load_time stats
@@ -182,33 +187,13 @@ def default_request_params():
182 187
 
183 188
 
184 189
 def get_search_query_from_webapp(preferences, form):
185
-    query = None
186
-    query_engines = []
187
-    query_categories = []
188
-    query_pageno = 1
189
-    query_lang = 'all'
190
-    query_time_range = None
190
+    # no text for the query ?
191
+    if not form.get('q'):
192
+        raise SearxParameterException('q', '')
191 193
 
192 194
     # set blocked engines
193 195
     disabled_engines = preferences.engines.get_disabled()
194 196
 
195
-    # set specific language if set
196
-    query_lang = preferences.get_value('language')
197
-
198
-    # safesearch
199
-    query_safesearch = preferences.get_value('safesearch')
200
-
201
-    # TODO better exceptions
202
-    if not form.get('q'):
203
-        raise Exception('noquery')
204
-
205
-    # set pagenumber
206
-    pageno_param = form.get('pageno', '1')
207
-    if not pageno_param.isdigit() or int(pageno_param) < 1:
208
-        pageno_param = 1
209
-
210
-    query_pageno = int(pageno_param)
211
-
212 197
     # parse query, if tags are set, which change
213 198
     # the serch engine or search-language
214 199
     raw_text_query = RawTextQuery(form['q'], disabled_engines)
@@ -217,6 +202,13 @@ def get_search_query_from_webapp(preferences, form):
217 202
     # set query
218 203
     query = raw_text_query.getSearchQuery()
219 204
 
205
+    # get and check page number
206
+    pageno_param = form.get('pageno', '1')
207
+    if not pageno_param.isdigit() or int(pageno_param) < 1:
208
+        raise SearxParameterException('pageno', pageno_param)
209
+    query_pageno = int(pageno_param)
210
+
211
+    # get language
220 212
     # set specific language if set on request, query or preferences
221 213
     # TODO support search with multible languages
222 214
     if len(raw_text_query.languages):
@@ -226,10 +218,38 @@ def get_search_query_from_webapp(preferences, form):
226 218
     else:
227 219
         query_lang = preferences.get_value('language')
228 220
 
221
+    # check language
222
+    if query_lang not in language_code_set:
223
+        raise SearxParameterException('language', query_lang)
224
+
225
+    # get safesearch
226
+    if 'safesearch' in form:
227
+        query_safesearch = form.get('safesearch')
228
+        # first check safesearch
229
+        if not query_safesearch.isdigit():
230
+            raise SearxParameterException('safesearch', query_safesearch)
231
+        query_safesearch = int(query_safesearch)
232
+    else:
233
+        query_safesearch = preferences.get_value('safesearch')
234
+
235
+    # safesearch : second check
236
+    if query_safesearch < 0 or query_safesearch > 2:
237
+        raise SearxParameterException('safesearch', query_safesearch)
238
+
239
+    # get time_range
229 240
     query_time_range = form.get('time_range')
230 241
 
242
+    # check time_range
243
+    if not(query_time_range is None)\
244
+       and not (query_time_range in ['', 'day', 'week', 'month', 'year']):
245
+        raise SearxParameterException('time_range', query_time_range)
246
+
247
+    # query_engines
231 248
     query_engines = raw_text_query.engines
232 249
 
250
+    # query_categories
251
+    query_categories = []
252
+
233 253
     # if engines are calculated from query,
234 254
     # set categories by using that informations
235 255
     if query_engines and raw_text_query.specific:

+ 6
- 0
searx/templates/__common__/opensearch_response_rss.xml View File

@@ -11,6 +11,12 @@
11 11
     <opensearch:itemsPerPage>{{ number_of_results }}</opensearch:itemsPerPage>
12 12
     <atom:link rel="search" type="application/opensearchdescription+xml" href="{{ base_url }}opensearch.xml"/>
13 13
     <opensearch:Query role="request" searchTerms="{{ q|e }}" startPage="1" />
14
+    {% if error_message %}
15
+    <item>
16
+      <title>Error</title>
17
+      <description>{{ error_message|e }}</description>
18
+    </item>
19
+    {% endif %}
14 20
     {% for r in results %}
15 21
     <item>
16 22
       <title>{{ r.title }}</title>

+ 52
- 15
searx/webapp.py View File

@@ -52,6 +52,7 @@ from flask import (
52 52
 from flask_babel import Babel, gettext, format_date, format_decimal
53 53
 from flask.json import jsonify
54 54
 from searx import settings, searx_dir, searx_debug
55
+from searx.exceptions import SearxException, SearxParameterException
55 56
 from searx.engines import (
56 57
     categories, engines, engine_shortcuts, get_engines_stats, initialize_engines
57 58
 )
@@ -400,6 +401,33 @@ def pre_request():
400 401
             request.user_plugins.append(plugin)
401 402
 
402 403
 
404
+def index_error(output_format, error_message):
405
+    if output_format == 'json':
406
+        return Response(json.dumps({'error': error_message}),
407
+                        mimetype='application/json')
408
+    elif output_format == 'csv':
409
+        response = Response('', mimetype='application/csv')
410
+        cont_disp = 'attachment;Filename=searx.csv'
411
+        response.headers.add('Content-Disposition', cont_disp)
412
+        return response
413
+    elif output_format == 'rss':
414
+        response_rss = render(
415
+            'opensearch_response_rss.xml',
416
+            results=[],
417
+            q=request.form['q'] if 'q' in request.form else '',
418
+            number_of_results=0,
419
+            base_url=get_base_url(),
420
+            error_message=error_message
421
+        )
422
+        return Response(response_rss, mimetype='text/xml')
423
+    else:
424
+        # html
425
+        request.errors.append(gettext('search error'))
426
+        return render(
427
+            'index.html',
428
+        )
429
+
430
+
403 431
 @app.route('/search', methods=['GET', 'POST'])
404 432
 @app.route('/', methods=['GET', 'POST'])
405 433
 def index():
@@ -408,10 +436,19 @@ def index():
408 436
     Supported outputs: html, json, csv, rss.
409 437
     """
410 438
 
439
+    # output_format
440
+    output_format = request.form.get('format', 'html')
441
+    if output_format not in ['html', 'csv', 'json', 'rss']:
442
+        output_format = 'html'
443
+
444
+    # check if there is query
411 445
     if request.form.get('q') is None:
412
-        return render(
413
-            'index.html',
414
-        )
446
+        if output_format == 'html':
447
+            return render(
448
+                'index.html',
449
+            )
450
+        else:
451
+            return index_error(output_format, 'No query'), 400
415 452
 
416 453
     # search
417 454
     search_query = None
@@ -421,20 +458,24 @@ def index():
421 458
         # search = Search(search_query) #  without plugins
422 459
         search = SearchWithPlugins(search_query, request)
423 460
         result_container = search.search()
424
-    except:
425
-        request.errors.append(gettext('search error'))
461
+    except Exception as e:
462
+        # log exception
426 463
         logger.exception('search error')
427
-        return render(
428
-            'index.html',
429
-        )
430 464
 
465
+        # is it an invalid input parameter or something else ?
466
+        if (issubclass(e.__class__, SearxParameterException)):
467
+            return index_error(output_format, e.message), 400
468
+        else:
469
+            return index_error(output_format, gettext('search error')), 500
470
+
471
+    # results
431 472
     results = result_container.get_ordered_results()
473
+    number_of_results = result_container.results_number()
474
+    if number_of_results < result_container.results_length():
475
+        number_of_results = 0
432 476
 
433 477
     # UI
434 478
     advanced_search = request.form.get('advanced_search', None)
435
-    output_format = request.form.get('format', 'html')
436
-    if output_format not in ['html', 'csv', 'json', 'rss']:
437
-        output_format = 'html'
438 479
 
439 480
     # output
440 481
     for result in results:
@@ -470,10 +511,6 @@ def index():
470 511
                 else:
471 512
                     result['publishedDate'] = format_date(result['publishedDate'])
472 513
 
473
-    number_of_results = result_container.results_number()
474
-    if number_of_results < result_container.results_length():
475
-        number_of_results = 0
476
-
477 514
     if output_format == 'json':
478 515
         return Response(json.dumps({'query': search_query.query,
479 516
                                     'number_of_results': number_of_results,