Browse Source

make search language handling less strict

languages.py can change, so users may query on a language that is not
on the list anymore, even if it is still recognized by a few engines.

also made no and nb the same because they seem to return the same,
though most engines will only support one or the other.
marc 8 years ago
parent
commit
fd65c12921

+ 1
- 1
searx/data/engines_languages.json
File diff suppressed because it is too large
View File


+ 2
- 0
searx/engines/bing.py View File

94
     options = dom.xpath('//div[@id="limit-languages"]//input')
94
     options = dom.xpath('//div[@id="limit-languages"]//input')
95
     for option in options:
95
     for option in options:
96
         code = option.xpath('./@id')[0].replace('_', '-')
96
         code = option.xpath('./@id')[0].replace('_', '-')
97
+        if code == 'nb':
98
+            code = 'no'
97
         supported_languages.append(code)
99
         supported_languages.append(code)
98
 
100
 
99
     return supported_languages
101
     return supported_languages

+ 4
- 0
searx/engines/qwant.py View File

47
 
47
 
48
     # add language tag if specified
48
     # add language tag if specified
49
     if params['language'] != 'all':
49
     if params['language'] != 'all':
50
+        if params['language'] == 'no' or params['language'].startswith('no-'):
51
+            params['language'] = params['language'].replace('no', 'nb', 1)
50
         if params['language'].find('-') < 0:
52
         if params['language'].find('-') < 0:
51
             # tries to get a country code from language
53
             # tries to get a country code from language
52
             for lang in supported_languages:
54
             for lang in supported_languages:
118
 
120
 
119
     supported_languages = []
121
     supported_languages = []
120
     for lang in regions_json['languages'].values():
122
     for lang in regions_json['languages'].values():
123
+        if lang['code'] == 'nb':
124
+            lang['code'] = 'no'
121
         for country in lang['countries']:
125
         for country in lang['countries']:
122
             supported_languages.append(lang['code'] + '-' + country)
126
             supported_languages.append(lang['code'] + '-' + country)
123
 
127
 

+ 2
- 0
searx/engines/swisscows.py View File

120
     options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
120
     options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
121
     for option in options:
121
     for option in options:
122
         code = option.xpath('./@data-val')[0]
122
         code = option.xpath('./@data-val')[0]
123
+        if code.startswith('nb-'):
124
+            code = code.replace('nb', 'no', 1)
123
         supported_languages.append(code)
125
         supported_languages.append(code)
124
 
126
 
125
     return supported_languages
127
     return supported_languages

+ 1
- 0
searx/languages.py View File

57
     (u"nl", u"Nederlands", u"", u"Dutch"),
57
     (u"nl", u"Nederlands", u"", u"Dutch"),
58
     (u"nl-BE", u"Nederlands", u"België", u"Dutch"),
58
     (u"nl-BE", u"Nederlands", u"België", u"Dutch"),
59
     (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
59
     (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
60
+    (u"no-NO", u"Norsk", u"", u"Norwegian"),
60
     (u"pl-PL", u"Polski", u"", u"Polish"),
61
     (u"pl-PL", u"Polski", u"", u"Polish"),
61
     (u"pt", u"Português", u"", u"Portuguese"),
62
     (u"pt", u"Português", u"", u"Portuguese"),
62
     (u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
63
     (u"pt-BR", u"Português", u"Brasil", u"Portuguese"),

+ 2
- 0
searx/preferences.py View File

107
                 pass
107
                 pass
108
             elif lang in self.choices:
108
             elif lang in self.choices:
109
                 data = lang
109
                 data = lang
110
+            elif data == 'nb-NO':
111
+                data = 'no-NO'
110
             elif data == 'ar-XA':
112
             elif data == 'ar-XA':
111
                 data = 'ar-SA'
113
                 data = 'ar-SA'
112
             else:
114
             else:

+ 3
- 3
searx/query.py View File

24
 import string
24
 import string
25
 import re
25
 import re
26
 
26
 
27
-VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(\-[A-Z]{2})?$')
27
+VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
28
 
28
 
29
 
29
 
30
 class RawTextQuery(object):
30
 class RawTextQuery(object):
68
 
68
 
69
             # this force a language
69
             # this force a language
70
             if query_part[0] == ':':
70
             if query_part[0] == ':':
71
-                lang = query_part[1:].lower()
71
+                lang = query_part[1:].lower().replace('_', '-')
72
 
72
 
73
                 # user may set a valid, yet not selectable language
73
                 # user may set a valid, yet not selectable language
74
                 if VALID_LANGUAGE_CODE.match(lang):
74
                 if VALID_LANGUAGE_CODE.match(lang):
86
                        or lang_id.startswith(lang)\
86
                        or lang_id.startswith(lang)\
87
                        or lang == lang_name\
87
                        or lang == lang_name\
88
                        or lang == english_name\
88
                        or lang == english_name\
89
-                       or lang.replace('_', ' ') == country:
89
+                       or lang.replace('-', ' ') == country:
90
                         parse_next = True
90
                         parse_next = True
91
                         self.languages.append(lang_id)
91
                         self.languages.append(lang_id)
92
                         # to ensure best match (first match is not necessarily the best one)
92
                         # to ensure best match (first match is not necessarily the best one)

+ 2
- 11
searx/search.py View File

27
 )
27
 )
28
 from searx.answerers import ask
28
 from searx.answerers import ask
29
 from searx.utils import gen_useragent
29
 from searx.utils import gen_useragent
30
-from searx.query import RawTextQuery, SearchQuery
30
+from searx.query import RawTextQuery, SearchQuery, VALID_LANGUAGE_CODE
31
 from searx.results import ResultContainer
31
 from searx.results import ResultContainer
32
 from searx import logger
32
 from searx import logger
33
 from searx.plugins import plugins
33
 from searx.plugins import plugins
34
-from searx.languages import language_codes
35
 from searx.exceptions import SearxParameterException
34
 from searx.exceptions import SearxParameterException
36
 
35
 
37
 logger = logger.getChild('search')
36
 logger = logger.getChild('search')
38
 
37
 
39
 number_of_searches = 0
38
 number_of_searches = 0
40
 
39
 
41
-language_code_set = set(l[0].lower() for l in language_codes)
42
-language_code_set.add('all')
43
-
44
 
40
 
45
 def send_http_request(engine, request_params, start_time, timeout_limit):
41
 def send_http_request(engine, request_params, start_time, timeout_limit):
46
     # for page_load_time stats
42
     # for page_load_time stats
219
         query_lang = preferences.get_value('language')
215
         query_lang = preferences.get_value('language')
220
 
216
 
221
     # check language
217
     # check language
222
-    if query_lang.lower() not in language_code_set:
218
+    if not VALID_LANGUAGE_CODE.match(query_lang):
223
         raise SearxParameterException('language', query_lang)
219
         raise SearxParameterException('language', query_lang)
224
 
220
 
225
     # get safesearch
221
     # get safesearch
371
             if search_query.pageno > 1 and not engine.paging:
367
             if search_query.pageno > 1 and not engine.paging:
372
                 continue
368
                 continue
373
 
369
 
374
-            # if search-language is set and engine does not
375
-            # provide language-support, skip
376
-            if search_query.lang != 'all' and not engine.language_support:
377
-                continue
378
-
379
             # if time_range is not supported, skip
370
             # if time_range is not supported, skip
380
             if search_query.time_range and not engine.time_range_support:
371
             if search_query.time_range and not engine.time_range_support:
381
                 continue
372
                 continue