Ver código fonte

make search language handling less strict

languages.py can change, so users may query on a language that is not
on the list anymore, even if it is still recognized by a few engines.

also made no and nb the same because they seem to return the same,
though most engines will only support one or the other.
marc 8 anos atrás
pai
commit
fd65c12921

+ 1
- 1
searx/data/engines_languages.json
Diferenças do arquivo suprimidas por serem muito extensas
Ver arquivo


+ 2
- 0
searx/engines/bing.py Ver arquivo

@@ -94,6 +94,8 @@ def _fetch_supported_languages(resp):
94 94
     options = dom.xpath('//div[@id="limit-languages"]//input')
95 95
     for option in options:
96 96
         code = option.xpath('./@id')[0].replace('_', '-')
97
+        if code == 'nb':
98
+            code = 'no'
97 99
         supported_languages.append(code)
98 100
 
99 101
     return supported_languages

+ 4
- 0
searx/engines/qwant.py Ver arquivo

@@ -47,6 +47,8 @@ def request(query, params):
47 47
 
48 48
     # add language tag if specified
49 49
     if params['language'] != 'all':
50
+        if params['language'] == 'no' or params['language'].startswith('no-'):
51
+            params['language'] = params['language'].replace('no', 'nb', 1)
50 52
         if params['language'].find('-') < 0:
51 53
             # tries to get a country code from language
52 54
             for lang in supported_languages:
@@ -118,6 +120,8 @@ def _fetch_supported_languages(resp):
118 120
 
119 121
     supported_languages = []
120 122
     for lang in regions_json['languages'].values():
123
+        if lang['code'] == 'nb':
124
+            lang['code'] = 'no'
121 125
         for country in lang['countries']:
122 126
             supported_languages.append(lang['code'] + '-' + country)
123 127
 

+ 2
- 0
searx/engines/swisscows.py Ver arquivo

@@ -120,6 +120,8 @@ def _fetch_supported_languages(resp):
120 120
     options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
121 121
     for option in options:
122 122
         code = option.xpath('./@data-val')[0]
123
+        if code.startswith('nb-'):
124
+            code = code.replace('nb', 'no', 1)
123 125
         supported_languages.append(code)
124 126
 
125 127
     return supported_languages

+ 1
- 0
searx/languages.py Ver arquivo

@@ -57,6 +57,7 @@ language_codes = (
57 57
     (u"nl", u"Nederlands", u"", u"Dutch"),
58 58
     (u"nl-BE", u"Nederlands", u"België", u"Dutch"),
59 59
     (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
60
+    (u"no-NO", u"Norsk", u"", u"Norwegian"),
60 61
     (u"pl-PL", u"Polski", u"", u"Polish"),
61 62
     (u"pt", u"Português", u"", u"Portuguese"),
62 63
     (u"pt-BR", u"Português", u"Brasil", u"Portuguese"),

+ 2
- 0
searx/preferences.py Ver arquivo

@@ -107,6 +107,8 @@ class SearchLanguageSetting(EnumStringSetting):
107 107
                 pass
108 108
             elif lang in self.choices:
109 109
                 data = lang
110
+            elif data == 'nb-NO':
111
+                data = 'no-NO'
110 112
             elif data == 'ar-XA':
111 113
                 data = 'ar-SA'
112 114
             else:

+ 3
- 3
searx/query.py Ver arquivo

@@ -24,7 +24,7 @@ from searx.engines import (
24 24
 import string
25 25
 import re
26 26
 
27
-VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(\-[A-Z]{2})?$')
27
+VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
28 28
 
29 29
 
30 30
 class RawTextQuery(object):
@@ -68,7 +68,7 @@ class RawTextQuery(object):
68 68
 
69 69
             # this force a language
70 70
             if query_part[0] == ':':
71
-                lang = query_part[1:].lower()
71
+                lang = query_part[1:].lower().replace('_', '-')
72 72
 
73 73
                 # user may set a valid, yet not selectable language
74 74
                 if VALID_LANGUAGE_CODE.match(lang):
@@ -86,7 +86,7 @@ class RawTextQuery(object):
86 86
                        or lang_id.startswith(lang)\
87 87
                        or lang == lang_name\
88 88
                        or lang == english_name\
89
-                       or lang.replace('_', ' ') == country:
89
+                       or lang.replace('-', ' ') == country:
90 90
                         parse_next = True
91 91
                         self.languages.append(lang_id)
92 92
                         # to ensure best match (first match is not necessarily the best one)

+ 2
- 11
searx/search.py Ver arquivo

@@ -27,20 +27,16 @@ from searx.engines import (
27 27
 )
28 28
 from searx.answerers import ask
29 29
 from searx.utils import gen_useragent
30
-from searx.query import RawTextQuery, SearchQuery
30
+from searx.query import RawTextQuery, SearchQuery, VALID_LANGUAGE_CODE
31 31
 from searx.results import ResultContainer
32 32
 from searx import logger
33 33
 from searx.plugins import plugins
34
-from searx.languages import language_codes
35 34
 from searx.exceptions import SearxParameterException
36 35
 
37 36
 logger = logger.getChild('search')
38 37
 
39 38
 number_of_searches = 0
40 39
 
41
-language_code_set = set(l[0].lower() for l in language_codes)
42
-language_code_set.add('all')
43
-
44 40
 
45 41
 def send_http_request(engine, request_params, start_time, timeout_limit):
46 42
     # for page_load_time stats
@@ -219,7 +215,7 @@ def get_search_query_from_webapp(preferences, form):
219 215
         query_lang = preferences.get_value('language')
220 216
 
221 217
     # check language
222
-    if query_lang.lower() not in language_code_set:
218
+    if not VALID_LANGUAGE_CODE.match(query_lang):
223 219
         raise SearxParameterException('language', query_lang)
224 220
 
225 221
     # get safesearch
@@ -371,11 +367,6 @@ class Search(object):
371 367
             if search_query.pageno > 1 and not engine.paging:
372 368
                 continue
373 369
 
374
-            # if search-language is set and engine does not
375
-            # provide language-support, skip
376
-            if search_query.lang != 'all' and not engine.language_support:
377
-                continue
378
-
379 370
             # if time_range is not supported, skip
380 371
             if search_query.time_range and not engine.time_range_support:
381 372
                 continue