Sfoglia il codice sorgente

Merge pull request #748 from a01200356/languages

[mod] Allow users to search in most engine supported languages
Adam Tauber 8 anni fa
parent
commit
9743bde25e
54 ha cambiato i file con 983 aggiunte e 153 eliminazioni
  1. 1
    2
      AUTHORS.rst
  2. 3
    3
      searx/autocomplete.py
  3. 1
    0
      searx/data/engines_languages.json
  4. 14
    0
      searx/engines/__init__.py
  5. 3
    2
      searx/engines/archlinux.py
  6. 14
    1
      searx/engines/bing.py
  7. 2
    1
      searx/engines/bing_images.py
  8. 2
    1
      searx/engines/bing_news.py
  9. 22
    0
      searx/engines/dailymotion.py
  10. 33
    5
      searx/engines/duckduckgo.py
  11. 2
    1
      searx/engines/duckduckgo_definitions.py
  12. 19
    1
      searx/engines/gigablast.py
  13. 19
    1
      searx/engines/google.py
  14. 3
    1
      searx/engines/google_news.py
  15. 1
    1
      searx/engines/mediawiki.py
  16. 2
    2
      searx/engines/photon.py
  17. 1
    1
      searx/engines/qwant.py
  18. 1
    1
      searx/engines/startpage.py
  19. 10
    4
      searx/engines/subtitleseeker.py
  20. 19
    2
      searx/engines/swisscows.py
  21. 1
    1
      searx/engines/twitter.py
  22. 4
    2
      searx/engines/wikidata.py
  23. 27
    2
      searx/engines/wikipedia.py
  24. 1
    1
      searx/engines/yacy.py
  25. 21
    1
      searx/engines/yahoo.py
  26. 1
    1
      searx/engines/yahoo_news.py
  27. 4
    2
      searx/engines/yandex.py
  28. 1
    1
      searx/engines/youtube_api.py
  29. 129
    76
      searx/languages.py
  30. 21
    2
      searx/preferences.py
  31. 7
    4
      searx/query.py
  32. 5
    1
      searx/search.py
  33. 5
    0
      searx/static/plugins/js/search_on_category_select.js
  34. 3
    3
      searx/templates/courgette/preferences.html
  35. 3
    3
      searx/templates/legacy/preferences.html
  36. 1
    0
      searx/templates/oscar/advanced.html
  37. 12
    0
      searx/templates/oscar/languages.html
  38. 5
    6
      searx/templates/oscar/preferences.html
  39. 3
    3
      searx/templates/pix-art/preferences.html
  40. 8
    4
      searx/webapp.py
  41. 3
    3
      tests/robot/test_basic.robot
  42. 32
    0
      tests/unit/engines/test_bing.py
  43. 37
    0
      tests/unit/engines/test_dailymotion.py
  44. 26
    1
      tests/unit/engines/test_duckduckgo.py
  45. 4
    0
      tests/unit/engines/test_duckduckgo_definitions.py
  46. 31
    0
      tests/unit/engines/test_gigablast.py
  47. 58
    1
      tests/unit/engines/test_google.py
  48. 1
    1
      tests/unit/engines/test_qwant.py
  49. 6
    1
      tests/unit/engines/test_subtitleseeker.py
  50. 28
    1
      tests/unit/engines/test_swisscows.py
  51. 100
    1
      tests/unit/engines/test_wikipedia.py
  52. 30
    0
      tests/unit/engines/test_yahoo.py
  53. 22
    1
      tests/unit/test_preferences.py
  54. 171
    0
      utils/fetch_languages.py

+ 1
- 2
AUTHORS.rst Vedi File

43
 - Kang-min Liu
43
 - Kang-min Liu
44
 - Kirill Isakov
44
 - Kirill Isakov
45
 - Guilhem Bonnefille
45
 - Guilhem Bonnefille
46
-- Marc Abonce Seguin
46
+- Marc Abonce Seguin @a01200356
47
 - @jibe-b
47
 - @jibe-b
48
 - Christian Pietsch @pietsch
48
 - Christian Pietsch @pietsch
49
 - @Maxqia
49
 - @Maxqia
55
 - Ammar Najjar @ammarnajjar
55
 - Ammar Najjar @ammarnajjar
56
 - @stepshal
56
 - @stepshal
57
 - François Revol @mmuman
57
 - François Revol @mmuman
58
-- marc @a01200356
59
 - Harry Wood @harry-wood
58
 - Harry Wood @harry-wood
60
 - Thomas Renard @threnard
59
 - Thomas Renard @threnard
61
 - Pydo `<https://github.com/pydo>`_
60
 - Pydo `<https://github.com/pydo>`_

+ 3
- 3
searx/autocomplete.py Vedi File

81
             engine_query = full_query.getSearchQuery()[1:]
81
             engine_query = full_query.getSearchQuery()[1:]
82
 
82
 
83
             for lc in language_codes:
83
             for lc in language_codes:
84
-                lang_id, lang_name, country = map(str.lower, lc)
84
+                lang_id, lang_name, country, english_name = map(str.lower, lc)
85
 
85
 
86
                 # check if query starts with language-id
86
                 # check if query starts with language-id
87
                 if lang_id.startswith(engine_query):
87
                 if lang_id.startswith(engine_query):
88
                     if len(engine_query) <= 2:
88
                     if len(engine_query) <= 2:
89
-                        results.append(':{lang_id}'.format(lang_id=lang_id.split('_')[0]))
89
+                        results.append(':{lang_id}'.format(lang_id=lang_id.split('-')[0]))
90
                     else:
90
                     else:
91
                         results.append(':{lang_id}'.format(lang_id=lang_id))
91
                         results.append(':{lang_id}'.format(lang_id=lang_id))
92
 
92
 
93
                 # check if query starts with language name
93
                 # check if query starts with language name
94
-                if lang_name.startswith(engine_query):
94
+                if lang_name.startswith(engine_query) or english_name.startswith(engine_query):
95
                     results.append(':{lang_name}'.format(lang_name=lang_name))
95
                     results.append(':{lang_name}'.format(lang_name=lang_name))
96
 
96
 
97
                 # check if query starts with country
97
                 # check if query starts with country

+ 1
- 0
searx/data/engines_languages.json
File diff suppressed because it is too large
Vedi File


+ 14
- 0
searx/engines/__init__.py Vedi File

20
 import sys
20
 import sys
21
 from flask_babel import gettext
21
 from flask_babel import gettext
22
 from operator import itemgetter
22
 from operator import itemgetter
23
+from json import loads
24
+from requests import get
23
 from searx import settings
25
 from searx import settings
24
 from searx import logger
26
 from searx import logger
25
 from searx.utils import load_module
27
 from searx.utils import load_module
33
 
35
 
34
 categories = {'general': []}
36
 categories = {'general': []}
35
 
37
 
38
+languages = loads(open(engine_dir + '/../data/engines_languages.json').read())
39
+
36
 engine_shortcuts = {}
40
 engine_shortcuts = {}
37
 engine_default_args = {'paging': False,
41
 engine_default_args = {'paging': False,
38
                        'categories': ['general'],
42
                        'categories': ['general'],
39
                        'language_support': True,
43
                        'language_support': True,
44
+                       'supported_languages': [],
40
                        'safesearch': False,
45
                        'safesearch': False,
41
                        'timeout': settings['outgoing']['request_timeout'],
46
                        'timeout': settings['outgoing']['request_timeout'],
42
                        'shortcut': '-',
47
                        'shortcut': '-',
85
                          .format(engine.name, engine_attr))
90
                          .format(engine.name, engine_attr))
86
             sys.exit(1)
91
             sys.exit(1)
87
 
92
 
93
+    # assign supported languages from json file
94
+    if engine_data['name'] in languages:
95
+        setattr(engine, 'supported_languages', languages[engine_data['name']])
96
+
97
+    # assign language fetching method if auxiliary method exists
98
+    if hasattr(engine, '_fetch_supported_languages'):
99
+        setattr(engine, 'fetch_supported_languages',
100
+                lambda: engine._fetch_supported_languages(get(engine.supported_languages_url)))
101
+
88
     engine.stats = {
102
     engine.stats = {
89
         'result_count': 0,
103
         'result_count': 0,
90
         'search_count': 0,
104
         'search_count': 0,

+ 3
- 2
searx/engines/archlinux.py Vedi File

29
 
29
 
30
 # cut 'en' from 'en_US', 'de' from 'de_CH', and so on
30
 # cut 'en' from 'en_US', 'de' from 'de_CH', and so on
31
 def locale_to_lang_code(locale):
31
 def locale_to_lang_code(locale):
32
-    if locale.find('_') >= 0:
33
-        locale = locale.split('_')[0]
32
+    if locale.find('-') >= 0:
33
+        locale = locale.split('-')[0]
34
     return locale
34
     return locale
35
 
35
 
36
 
36
 
95
     'uk': 'Українська',
95
     'uk': 'Українська',
96
     'zh': '简体中文'
96
     'zh': '简体中文'
97
 }
97
 }
98
+supported_languages = dict(lang_urls, **main_langs)
98
 
99
 
99
 
100
 
100
 # do search-request
101
 # do search-request

+ 14
- 1
searx/engines/bing.py Vedi File

21
 categories = ['general']
21
 categories = ['general']
22
 paging = True
22
 paging = True
23
 language_support = True
23
 language_support = True
24
+supported_languages_url = 'https://www.bing.com/account/general'
24
 
25
 
25
 # search-url
26
 # search-url
26
 base_url = 'https://www.bing.com/'
27
 base_url = 'https://www.bing.com/'
32
     offset = (params['pageno'] - 1) * 10 + 1
33
     offset = (params['pageno'] - 1) * 10 + 1
33
 
34
 
34
     if params['language'] != 'all':
35
     if params['language'] != 'all':
35
-        query = u'language:{} {}'.format(params['language'].split('_')[0].upper(),
36
+        query = u'language:{} {}'.format(params['language'].split('-')[0].upper(),
36
                                          query.decode('utf-8')).encode('utf-8')
37
                                          query.decode('utf-8')).encode('utf-8')
37
 
38
 
38
     search_path = search_string.format(
39
     search_path = search_string.format(
81
 
82
 
82
     # return results
83
     # return results
83
     return results
84
     return results
85
+
86
+
87
+# get supported languages from their site
88
+def _fetch_supported_languages(resp):
89
+    supported_languages = []
90
+    dom = html.fromstring(resp.text)
91
+    options = dom.xpath('//div[@id="limit-languages"]//input')
92
+    for option in options:
93
+        code = option.xpath('./@id')[0].replace('_', '-')
94
+        supported_languages.append(code)
95
+
96
+    return supported_languages

+ 2
- 1
searx/engines/bing_images.py Vedi File

19
 from lxml import html
19
 from lxml import html
20
 from json import loads
20
 from json import loads
21
 import re
21
 import re
22
+from searx.engines.bing import _fetch_supported_languages, supported_languages_url
22
 
23
 
23
 # engine dependent config
24
 # engine dependent config
24
 categories = ['images']
25
 categories = ['images']
53
     if params['language'] == 'all':
54
     if params['language'] == 'all':
54
         language = 'en-US'
55
         language = 'en-US'
55
     else:
56
     else:
56
-        language = params['language'].replace('_', '-')
57
+        language = params['language']
57
 
58
 
58
     search_path = search_string.format(
59
     search_path = search_string.format(
59
         query=urlencode({'q': query}),
60
         query=urlencode({'q': query}),

+ 2
- 1
searx/engines/bing_news.py Vedi File

17
 from dateutil import parser
17
 from dateutil import parser
18
 from lxml import etree
18
 from lxml import etree
19
 from searx.utils import list_get
19
 from searx.utils import list_get
20
+from searx.engines.bing import _fetch_supported_languages, supported_languages_url
20
 
21
 
21
 # engine dependent config
22
 # engine dependent config
22
 categories = ['news']
23
 categories = ['news']
74
     if params['language'] == 'all':
75
     if params['language'] == 'all':
75
         language = 'en-US'
76
         language = 'en-US'
76
     else:
77
     else:
77
-        language = params['language'].replace('_', '-')
78
+        language = params['language']
78
 
79
 
79
     params['url'] = _get_url(query, language, offset, params['time_range'])
80
     params['url'] = _get_url(query, language, offset, params['time_range'])
80
 
81
 

+ 22
- 0
searx/engines/dailymotion.py Vedi File

15
 from urllib import urlencode
15
 from urllib import urlencode
16
 from json import loads
16
 from json import loads
17
 from datetime import datetime
17
 from datetime import datetime
18
+from requests import get
18
 
19
 
19
 # engine dependent config
20
 # engine dependent config
20
 categories = ['videos']
21
 categories = ['videos']
27
 embedded_url = '<iframe frameborder="0" width="540" height="304" ' +\
28
 embedded_url = '<iframe frameborder="0" width="540" height="304" ' +\
28
     'data-src="//www.dailymotion.com/embed/video/{videoid}" allowfullscreen></iframe>'
29
     'data-src="//www.dailymotion.com/embed/video/{videoid}" allowfullscreen></iframe>'
29
 
30
 
31
+supported_languages_url = 'https://api.dailymotion.com/languages'
32
+
30
 
33
 
31
 # do search-request
34
 # do search-request
32
 def request(query, params):
35
 def request(query, params):
74
 
77
 
75
     # return results
78
     # return results
76
     return results
79
     return results
80
+
81
+
82
+# get supported languages from their site
83
+def _fetch_supported_languages(resp):
84
+    supported_languages = {}
85
+
86
+    response_json = loads(resp.text)
87
+
88
+    for language in response_json['list']:
89
+        supported_languages[language['code']] = {}
90
+
91
+        name = language['native_name']
92
+        if name:
93
+            supported_languages[language['code']]['name'] = name
94
+        english_name = language['name']
95
+        if english_name:
96
+            supported_languages[language['code']]['english_name'] = english_name
97
+
98
+    return supported_languages

+ 33
- 5
searx/engines/duckduckgo.py Vedi File

15
 
15
 
16
 from urllib import urlencode
16
 from urllib import urlencode
17
 from lxml.html import fromstring
17
 from lxml.html import fromstring
18
+from requests import get
19
+from json import loads
18
 from searx.engines.xpath import extract_text
20
 from searx.engines.xpath import extract_text
19
-from searx.languages import language_codes
20
 
21
 
21
 # engine dependent config
22
 # engine dependent config
22
 categories = ['general']
23
 categories = ['general']
23
 paging = True
24
 paging = True
24
 language_support = True
25
 language_support = True
26
+supported_languages_url = 'https://duckduckgo.com/d2030.js'
25
 time_range_support = True
27
 time_range_support = True
26
 
28
 
27
 # search-url
29
 # search-url
46
 
48
 
47
     offset = (params['pageno'] - 1) * 30
49
     offset = (params['pageno'] - 1) * 30
48
 
50
 
51
+    # custom fixes for languages
49
     if params['language'] == 'all':
52
     if params['language'] == 'all':
50
         locale = None
53
         locale = None
54
+    elif params['language'][:2] == 'ja':
55
+        locale = 'jp-jp'
56
+    elif params['language'][:2] == 'sl':
57
+        locale = 'sl-sl'
58
+    elif params['language'] == 'zh-TW':
59
+        locale = 'tw-tzh'
60
+    elif params['language'] == 'zh-HK':
61
+        locale = 'hk-tzh'
62
+    elif params['language'][-2:] == 'SA':
63
+        locale = 'xa-' + params['language'].split('-')[0]
64
+    elif params['language'][-2:] == 'GB':
65
+        locale = 'uk-' + params['language'].split('-')[0]
51
     else:
66
     else:
52
-        locale = params['language'].split('_')
67
+        locale = params['language'].split('-')
53
         if len(locale) == 2:
68
         if len(locale) == 2:
54
             # country code goes first
69
             # country code goes first
55
             locale = locale[1].lower() + '-' + locale[0].lower()
70
             locale = locale[1].lower() + '-' + locale[0].lower()
56
         else:
71
         else:
57
             # tries to get a country code from language
72
             # tries to get a country code from language
58
             locale = locale[0].lower()
73
             locale = locale[0].lower()
59
-            lang_codes = [x[0] for x in language_codes]
60
-            for lc in lang_codes:
61
-                lc = lc.split('_')
74
+            for lc in supported_languages:
75
+                lc = lc.split('-')
62
                 if locale == lc[0]:
76
                 if locale == lc[0]:
63
                     locale = lc[1].lower() + '-' + lc[0].lower()
77
                     locale = lc[1].lower() + '-' + lc[0].lower()
64
                     break
78
                     break
102
 
116
 
103
     # return results
117
     # return results
104
     return results
118
     return results
119
+
120
+
121
+# get supported languages from their site
122
+def _fetch_supported_languages(resp):
123
+
124
+    # response is a js file with regions as an embedded object
125
+    response_page = resp.text
126
+    response_page = response_page[response_page.find('regions:{') + 8:]
127
+    response_page = response_page[:response_page.find('}') + 1]
128
+
129
+    regions_json = loads(response_page)
130
+    supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())
131
+
132
+    return supported_languages

+ 2
- 1
searx/engines/duckduckgo_definitions.py Vedi File

4
 from lxml import html
4
 from lxml import html
5
 from searx.utils import html_to_text
5
 from searx.utils import html_to_text
6
 from searx.engines.xpath import extract_text
6
 from searx.engines.xpath import extract_text
7
+from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
7
 
8
 
8
 url = 'https://api.duckduckgo.com/'\
9
 url = 'https://api.duckduckgo.com/'\
9
     + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
10
     + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
23
 
24
 
24
 def request(query, params):
25
 def request(query, params):
25
     params['url'] = url.format(query=urlencode({'q': query}))
26
     params['url'] = url.format(query=urlencode({'q': query}))
26
-    params['headers']['Accept-Language'] = params['language']
27
+    params['headers']['Accept-Language'] = params['language'].split('-')[0]
27
     return params
28
     return params
28
 
29
 
29
 
30
 

+ 19
- 1
searx/engines/gigablast.py Vedi File

14
 from random import randint
14
 from random import randint
15
 from time import time
15
 from time import time
16
 from urllib import urlencode
16
 from urllib import urlencode
17
+from lxml.html import fromstring
17
 
18
 
18
 # engine dependent config
19
 # engine dependent config
19
 categories = ['general']
20
 categories = ['general']
40
 title_xpath = './/title'
41
 title_xpath = './/title'
41
 content_xpath = './/sum'
42
 content_xpath = './/sum'
42
 
43
 
44
+supported_languages_url = 'https://gigablast.com/search?&rxikd=1'
45
+
43
 
46
 
44
 # do search-request
47
 # do search-request
45
 def request(query, params):
48
 def request(query, params):
48
     if params['language'] == 'all':
51
     if params['language'] == 'all':
49
         language = 'xx'
52
         language = 'xx'
50
     else:
53
     else:
51
-        language = params['language'][0:2]
54
+        language = params['language'].replace('-', '_').lower()
55
+        if language.split('-')[0] != 'zh':
56
+            language = language.split('-')[0]
52
 
57
 
53
     if params['safesearch'] >= 1:
58
     if params['safesearch'] >= 1:
54
         safesearch = 1
59
         safesearch = 1
82
 
87
 
83
     # return results
88
     # return results
84
     return results
89
     return results
90
+
91
+
92
+# get supported languages from their site
93
+def _fetch_supported_languages(resp):
94
+    supported_languages = []
95
+    dom = fromstring(resp.text)
96
+    links = dom.xpath('//span[@id="menu2"]/a')
97
+    for link in links:
98
+        code = link.xpath('./@href')[0][-2:]
99
+        if code != 'xx' and code not in supported_languages:
100
+            supported_languages.append(code)
101
+
102
+    return supported_languages

+ 19
- 1
searx/engines/google.py Vedi File

103
 maps_path = '/maps'
103
 maps_path = '/maps'
104
 redirect_path = '/url'
104
 redirect_path = '/url'
105
 images_path = '/images'
105
 images_path = '/images'
106
+supported_languages_url = 'https://www.google.com/preferences?#languages'
106
 
107
 
107
 # specific xpath variables
108
 # specific xpath variables
108
 results_xpath = '//div[@class="g"]'
109
 results_xpath = '//div[@class="g"]'
167
         language = 'en'
168
         language = 'en'
168
         country = 'US'
169
         country = 'US'
169
         url_lang = ''
170
         url_lang = ''
171
+    elif params['language'][:2] == 'jv':
172
+        language = 'jw'
173
+        country = 'ID'
174
+        url_lang = 'lang_jw'
170
     else:
175
     else:
171
-        language_array = params['language'].lower().split('_')
176
+        language_array = params['language'].lower().split('-')
172
         if len(language_array) == 2:
177
         if len(language_array) == 2:
173
             country = language_array[1]
178
             country = language_array[1]
174
         else:
179
         else:
355
         retval = retval + '<tr><th>' + a.get('label') + '</th><td>' + value + '</td></tr>'
360
         retval = retval + '<tr><th>' + a.get('label') + '</th><td>' + value + '</td></tr>'
356
     retval = retval + '</table>'
361
     retval = retval + '</table>'
357
     return retval
362
     return retval
363
+
364
+
365
+# get supported languages from their site
366
+def _fetch_supported_languages(resp):
367
+    supported_languages = {}
368
+    dom = html.fromstring(resp.text)
369
+    options = dom.xpath('//table//td/font/label/span')
370
+    for option in options:
371
+        code = option.xpath('./@id')[0][1:]
372
+        name = option.text.title()
373
+        supported_languages[code] = {"name": name}
374
+
375
+    return supported_languages

+ 3
- 1
searx/engines/google_news.py Vedi File

12
 
12
 
13
 from lxml import html
13
 from lxml import html
14
 from urllib import urlencode
14
 from urllib import urlencode
15
+from json import loads
16
+from searx.engines.google import _fetch_supported_languages, supported_languages_url
15
 
17
 
16
 # search-url
18
 # search-url
17
 categories = ['news']
19
 categories = ['news']
50
                                       search_options=urlencode(search_options))
52
                                       search_options=urlencode(search_options))
51
 
53
 
52
     if params['language'] != 'all':
54
     if params['language'] != 'all':
53
-        language_array = params['language'].lower().split('_')
55
+        language_array = params['language'].lower().split('-')
54
         params['url'] += '&lr=lang_' + language_array[0]
56
         params['url'] += '&lr=lang_' + language_array[0]
55
 
57
 
56
     return params
58
     return params

+ 1
- 1
searx/engines/mediawiki.py Vedi File

46
     if params['language'] == 'all':
46
     if params['language'] == 'all':
47
         language = 'en'
47
         language = 'en'
48
     else:
48
     else:
49
-        language = params['language'].split('_')[0]
49
+        language = params['language'].split('-')[0]
50
 
50
 
51
     # format_string [('https://', 'language', '', None), ('.wikipedia.org/', None, None, None)]
51
     # format_string [('https://', 'language', '', None), ('.wikipedia.org/', None, None, None)]
52
     if any(x[1] == 'language' for x in format_strings):
52
     if any(x[1] == 'language' for x in format_strings):

+ 2
- 2
searx/engines/photon.py Vedi File

26
 result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
26
 result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
27
 
27
 
28
 # list of supported languages
28
 # list of supported languages
29
-allowed_languages = ['de', 'en', 'fr', 'it']
29
+supported_languages = ['de', 'en', 'fr', 'it']
30
 
30
 
31
 
31
 
32
 # do search-request
32
 # do search-request
37
 
37
 
38
     if params['language'] != 'all':
38
     if params['language'] != 'all':
39
         language = params['language'].split('_')[0]
39
         language = params['language'].split('_')[0]
40
-        if language in allowed_languages:
40
+        if language in supported_languages:
41
             params['url'] = params['url'] + "&lang=" + language
41
             params['url'] = params['url'] + "&lang=" + language
42
 
42
 
43
     # using searx User-Agent
43
     # using searx User-Agent

+ 1
- 1
searx/engines/qwant.py Vedi File

46
 
46
 
47
     # add language tag if specified
47
     # add language tag if specified
48
     if params['language'] != 'all':
48
     if params['language'] != 'all':
49
-        params['url'] += '&locale=' + params['language'].lower()
49
+        params['url'] += '&locale=' + params['language'].replace('-', '_').lower()
50
 
50
 
51
     return params
51
     return params
52
 
52
 

+ 1
- 1
searx/engines/startpage.py Vedi File

47
 
47
 
48
     # set language if specified
48
     # set language if specified
49
     if params['language'] != 'all':
49
     if params['language'] != 'all':
50
-        params['data']['with_language'] = ('lang_' + params['language'].split('_')[0])
50
+        params['data']['with_language'] = ('lang_' + params['language'].split('-')[0])
51
 
51
 
52
     return params
52
     return params
53
 
53
 

+ 10
- 4
searx/engines/subtitleseeker.py Vedi File

22
 
22
 
23
 # search-url
23
 # search-url
24
 url = 'http://www.subtitleseeker.com/'
24
 url = 'http://www.subtitleseeker.com/'
25
-search_url = url + 'search/TITLES/{query}&p={pageno}'
25
+search_url = url + 'search/TITLES/{query}?p={pageno}'
26
 
26
 
27
 # specific xpath variables
27
 # specific xpath variables
28
 results_xpath = '//div[@class="boxRows"]'
28
 results_xpath = '//div[@class="boxRows"]'
43
 
43
 
44
     search_lang = ""
44
     search_lang = ""
45
 
45
 
46
-    if resp.search_params['language'] != 'all':
47
-        search_lang = [lc[1]
46
+    # dirty fix for languages named differenly in their site
47
+    if resp.search_params['language'][:2] == 'fa':
48
+        search_lang = 'Farsi'
49
+    elif resp.search_params['language'] == 'pt-BR':
50
+        search_lang = 'Brazilian'
51
+    elif resp.search_params['language'] != 'all':
52
+        search_lang = [lc[3]
48
                        for lc in language_codes
53
                        for lc in language_codes
49
-                       if lc[0][:2] == resp.search_params['language'].split('_')[0]][0]
54
+                       if lc[0].split('-')[0] == resp.search_params['language'].split('-')[0]]
55
+        search_lang = search_lang[0].split(' (')[0]
50
 
56
 
51
     # parse results
57
     # parse results
52
     for result in dom.xpath(results_xpath):
58
     for result in dom.xpath(results_xpath):

+ 19
- 2
searx/engines/swisscows.py Vedi File

13
 from json import loads
13
 from json import loads
14
 from urllib import urlencode, unquote
14
 from urllib import urlencode, unquote
15
 import re
15
 import re
16
+from lxml.html import fromstring
16
 
17
 
17
 # engine dependent config
18
 # engine dependent config
18
 categories = ['general', 'images']
19
 categories = ['general', 'images']
23
 base_url = 'https://swisscows.ch/'
24
 base_url = 'https://swisscows.ch/'
24
 search_string = '?{query}&page={page}'
25
 search_string = '?{query}&page={page}'
25
 
26
 
27
+supported_languages_url = base_url
28
+
26
 # regex
29
 # regex
27
 regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
30
 regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
28
 regex_json_remove_start = re.compile(r'^initialData:\s*')
31
 regex_json_remove_start = re.compile(r'^initialData:\s*')
35
     if params['language'] == 'all':
38
     if params['language'] == 'all':
36
         ui_language = 'browser'
39
         ui_language = 'browser'
37
         region = 'browser'
40
         region = 'browser'
41
+    elif params['language'].split('-')[0] == 'no':
42
+        region = 'nb-NO'
38
     else:
43
     else:
39
-        region = params['language'].replace('_', '-')
40
-        ui_language = params['language'].split('_')[0]
44
+        region = params['language']
45
+        ui_language = params['language'].split('-')[0]
41
 
46
 
42
     search_path = search_string.format(
47
     search_path = search_string.format(
43
         query=urlencode({'query': query,
48
         query=urlencode({'query': query,
106
 
111
 
107
     # return results
112
     # return results
108
     return results
113
     return results
114
+
115
+
116
+# get supported languages from their site
117
+def _fetch_supported_languages(resp):
118
+    supported_languages = []
119
+    dom = fromstring(resp.text)
120
+    options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
121
+    for option in options:
122
+        code = option.xpath('./@data-val')[0]
123
+        supported_languages.append(code)
124
+
125
+    return supported_languages

+ 1
- 1
searx/engines/twitter.py Vedi File

40
 
40
 
41
     # set language if specified
41
     # set language if specified
42
     if params['language'] != 'all':
42
     if params['language'] != 'all':
43
-        params['cookies']['lang'] = params['language'].split('_')[0]
43
+        params['cookies']['lang'] = params['language'].split('-')[0]
44
     else:
44
     else:
45
         params['cookies']['lang'] = 'en'
45
         params['cookies']['lang'] = 'en'
46
 
46
 

+ 4
- 2
searx/engines/wikidata.py Vedi File

14
 from searx import logger
14
 from searx import logger
15
 from searx.poolrequests import get
15
 from searx.poolrequests import get
16
 from searx.engines.xpath import extract_text
16
 from searx.engines.xpath import extract_text
17
+from searx.utils import format_date_by_locale
18
+from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
17
 
19
 
18
 from json import loads
20
 from json import loads
19
 from lxml.html import fromstring
21
 from lxml.html import fromstring
55
 
57
 
56
 
58
 
57
 def request(query, params):
59
 def request(query, params):
58
-    language = params['language'].split('_')[0]
60
+    language = params['language'].split('-')[0]
59
     if language == 'all':
61
     if language == 'all':
60
         language = 'en'
62
         language = 'en'
61
 
63
 
70
     html = fromstring(resp.content)
72
     html = fromstring(resp.content)
71
     wikidata_ids = html.xpath(wikidata_ids_xpath)
73
     wikidata_ids = html.xpath(wikidata_ids_xpath)
72
 
74
 
73
-    language = resp.search_params['language'].split('_')[0]
75
+    language = resp.search_params['language'].split('-')[0]
74
     if language == 'all':
76
     if language == 'all':
75
         language = 'en'
77
         language = 'en'
76
 
78
 

+ 27
- 2
searx/engines/wikipedia.py Vedi File

12
 
12
 
13
 from json import loads
13
 from json import loads
14
 from urllib import urlencode, quote
14
 from urllib import urlencode, quote
15
+from lxml.html import fromstring
16
+
15
 
17
 
16
 # search-url
18
 # search-url
17
 base_url = 'https://{language}.wikipedia.org/'
19
 base_url = 'https://{language}.wikipedia.org/'
24
     '&explaintext'\
26
     '&explaintext'\
25
     '&pithumbsize=300'\
27
     '&pithumbsize=300'\
26
     '&redirects'
28
     '&redirects'
29
+supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
27
 
30
 
28
 
31
 
29
 # set language in base_url
32
 # set language in base_url
30
 def url_lang(lang):
33
 def url_lang(lang):
31
-    if lang == 'all':
34
+    lang = lang.split('-')[0]
35
+    if lang == 'all' or lang not in supported_languages:
32
         language = 'en'
36
         language = 'en'
33
     else:
37
     else:
34
-        language = lang.split('_')[0]
38
+        language = lang
35
 
39
 
36
     return base_url.format(language=language)
40
     return base_url.format(language=language)
37
 
41
 
111
                     'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}]})
115
                     'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}]})
112
 
116
 
113
     return results
117
     return results
118
+
119
+
120
+# get supported languages from their site
121
+def _fetch_supported_languages(resp):
122
+    supported_languages = {}
123
+    dom = fromstring(resp.text)
124
+    tables = dom.xpath('//table[contains(@class,"sortable")]')
125
+    for table in tables:
126
+        # exclude header row
127
+        trs = table.xpath('.//tr')[1:]
128
+        for tr in trs:
129
+            td = tr.xpath('./td')
130
+            code = td[3].xpath('./a')[0].text
131
+            name = td[2].xpath('./a')[0].text
132
+            english_name = td[1].xpath('./a')[0].text
133
+            articles = int(td[4].xpath('./a/b')[0].text.replace(',', ''))
134
+            # exclude languages with too few articles
135
+            if articles >= 100000:
136
+                supported_languages[code] = {"name": name, "english_name": english_name, "articles": articles}
137
+
138
+    return supported_languages

+ 1
- 1
searx/engines/yacy.py Vedi File

53
 
53
 
54
     # add language tag if specified
54
     # add language tag if specified
55
     if params['language'] != 'all':
55
     if params['language'] != 'all':
56
-        params['url'] += '&lr=lang_' + params['language'].split('_')[0]
56
+        params['url'] += '&lr=lang_' + params['language'].split('-')[0]
57
 
57
 
58
     return params
58
     return params
59
 
59
 

+ 21
- 1
searx/engines/yahoo.py Vedi File

27
 search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
27
 search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
28
 search_url_with_time = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}&age={age}&btf={btf}&fr2=time'
28
 search_url_with_time = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}&age={age}&btf={btf}&fr2=time'
29
 
29
 
30
+supported_languages_url = 'https://search.yahoo.com/web/advanced'
31
+
30
 # specific xpath variables
32
 # specific xpath variables
31
 results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]"
33
 results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]"
32
 url_xpath = './/h3/a/@href'
34
 url_xpath = './/h3/a/@href'
72
 def _get_language(params):
74
 def _get_language(params):
73
     if params['language'] == 'all':
75
     if params['language'] == 'all':
74
         return 'en'
76
         return 'en'
75
-    return params['language'].split('_')[0]
77
+    elif params['language'][:2] == 'zh':
78
+        if params['language'] == 'zh' or params['language'] == 'zh-CH':
79
+            return 'szh'
80
+        else:
81
+            return 'tzh'
82
+    else:
83
+        return params['language'].split('-')[0]
76
 
84
 
77
 
85
 
78
 # do search-request
86
 # do search-request
132
 
140
 
133
     # return results
141
     # return results
134
     return results
142
     return results
143
+
144
+
145
+# get supported languages from their site
146
+def _fetch_supported_languages(resp):
147
+    supported_languages = []
148
+    dom = html.fromstring(resp.text)
149
+    options = dom.xpath('//div[@id="yschlang"]/span/label/input')
150
+    for option in options:
151
+        code = option.xpath('./@value')[0][5:].replace('_', '-')
152
+        supported_languages.append(code)
153
+
154
+    return supported_languages

+ 1
- 1
searx/engines/yahoo_news.py Vedi File

12
 from urllib import urlencode
12
 from urllib import urlencode
13
 from lxml import html
13
 from lxml import html
14
 from searx.engines.xpath import extract_text, extract_url
14
 from searx.engines.xpath import extract_text, extract_url
15
-from searx.engines.yahoo import parse_url
15
+from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url
16
 from datetime import datetime, timedelta
16
 from datetime import datetime, timedelta
17
 import re
17
 import re
18
 from dateutil import parser
18
 from dateutil import parser

+ 4
- 2
searx/engines/yandex.py Vedi File

22
 
22
 
23
 default_tld = 'com'
23
 default_tld = 'com'
24
 language_map = {'ru': 'ru',
24
 language_map = {'ru': 'ru',
25
-                'ua': 'uk',
25
+                'ua': 'ua',
26
+                'be': 'by',
27
+                'kk': 'kz',
26
                 'tr': 'com.tr'}
28
                 'tr': 'com.tr'}
27
 
29
 
28
 # search-url
30
 # search-url
36
 
38
 
37
 
39
 
38
 def request(query, params):
40
 def request(query, params):
39
-    lang = params['language'].split('_')[0]
41
+    lang = params['language'].split('-')[0]
40
     host = base_url.format(tld=language_map.get(lang) or default_tld)
42
     host = base_url.format(tld=language_map.get(lang) or default_tld)
41
     params['url'] = host + search_url.format(page=params['pageno'] - 1,
43
     params['url'] = host + search_url.format(page=params['pageno'] - 1,
42
                                              query=urlencode({'text': query}))
44
                                              query=urlencode({'text': query}))

+ 1
- 1
searx/engines/youtube_api.py Vedi File

36
 
36
 
37
     # add language tag if specified
37
     # add language tag if specified
38
     if params['language'] != 'all':
38
     if params['language'] != 'all':
39
-        params['url'] += '&relevanceLanguage=' + params['language'].split('_')[0]
39
+        params['url'] += '&relevanceLanguage=' + params['language'].split('-')[0]
40
 
40
 
41
     return params
41
     return params
42
 
42
 

+ 129
- 76
searx/languages.py Vedi File

1
-'''
2
-searx is free software: you can redistribute it and/or modify
3
-it under the terms of the GNU Affero General Public License as published by
4
-the Free Software Foundation, either version 3 of the License, or
5
-(at your option) any later version.
6
-
7
-searx is distributed in the hope that it will be useful,
8
-but WITHOUT ANY WARRANTY; without even the implied warranty of
9
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
-GNU Affero General Public License for more details.
11
-
12
-You should have received a copy of the GNU Affero General Public License
13
-along with searx. If not, see < http://www.gnu.org/licenses/ >.
14
-
15
-(C) 2013- by Adam Tauber, <asciimoo@gmail.com>
16
-'''
17
-
1
+# -*- coding: utf-8 -*-
18
 # list of language codes
2
 # list of language codes
3
+# this file is generated automatically by utils/update_search_languages.py
4
+
19
 language_codes = (
5
 language_codes = (
20
-    ("ar_XA", "Arabic", "Arabia"),
21
-    ("bg_BG", "Bulgarian", "Bulgaria"),
22
-    ("cs_CZ", "Czech", "Czech Republic"),
23
-    ("da_DK", "Danish", "Denmark"),
24
-    ("de_AT", "German", "Austria"),
25
-    ("de_CH", "German", "Switzerland"),
26
-    ("de_DE", "German", "Germany"),
27
-    ("el_GR", "Greek", "Greece"),
28
-    ("en_AU", "English", "Australia"),
29
-    ("en_CA", "English", "Canada"),
30
-    ("en_GB", "English", "United Kingdom"),
31
-    ("en_ID", "English", "Indonesia"),
32
-    ("en_IE", "English", "Ireland"),
33
-    ("en_IN", "English", "India"),
34
-    ("en_MY", "English", "Malaysia"),
35
-    ("en_NZ", "English", "New Zealand"),
36
-    ("en_PH", "English", "Philippines"),
37
-    ("en_SG", "English", "Singapore"),
38
-    ("en_US", "English", "United States"),
39
-    ("en_XA", "English", "Arabia"),
40
-    ("en_ZA", "English", "South Africa"),
41
-    ("es_AR", "Spanish", "Argentina"),
42
-    ("es_CL", "Spanish", "Chile"),
43
-    ("es_ES", "Spanish", "Spain"),
44
-    ("es_MX", "Spanish", "Mexico"),
45
-    ("es_US", "Spanish", "United States"),
46
-    ("es_XL", "Spanish", "Latin America"),
47
-    ("et_EE", "Estonian", "Estonia"),
48
-    ("fi_FI", "Finnish", "Finland"),
49
-    ("fr_BE", "French", "Belgium"),
50
-    ("fr_CA", "French", "Canada"),
51
-    ("fr_CH", "French", "Switzerland"),
52
-    ("fr_FR", "French", "France"),
53
-    ("he_IL", "Hebrew", "Israel"),
54
-    ("hr_HR", "Croatian", "Croatia"),
55
-    ("hu_HU", "Hungarian", "Hungary"),
56
-    ("it_IT", "Italian", "Italy"),
57
-    ("ja_JP", "Japanese", "Japan"),
58
-    ("ko_KR", "Korean", "Korea"),
59
-    ("lt_LT", "Lithuanian", "Lithuania"),
60
-    ("lv_LV", "Latvian", "Latvia"),
61
-    ("nb_NO", "Norwegian", "Norway"),
62
-    ("nl_BE", "Dutch", "Belgium"),
63
-    ("nl_NL", "Dutch", "Netherlands"),
64
-    ("oc_OC", "Occitan", "Occitan"),
65
-    ("pl_PL", "Polish", "Poland"),
66
-    ("pt_BR", "Portuguese", "Brazil"),
67
-    ("pt_PT", "Portuguese", "Portugal"),
68
-    ("ro_RO", "Romanian", "Romania"),
69
-    ("ru_RU", "Russian", "Russia"),
70
-    ("sk_SK", "Slovak", "Slovak Republic"),
71
-    ("sl_SL", "Slovenian", "Slovenia"),
72
-    ("sv_SE", "Swedish", "Sweden"),
73
-    ("th_TH", "Thai", "Thailand"),
74
-    ("tr_TR", "Turkish", "Turkey"),
75
-    ("uk_UA", "Ukrainian", "Ukraine"),
76
-    ("zh_CN", "Chinese", "China"),
77
-    ("zh_HK", "Chinese", "Hong Kong SAR"),
78
-    ("zh_TW", "Chinese", "Taiwan"))
6
+    (u"af", u"Afrikaans", u"", u""),
7
+    (u"am", u"አማርኛ", u"", u"Amharic"),
8
+    (u"ar-SA", u"العربية", u"المملكة العربية السعودية", u"Arabic"),
9
+    (u"az", u"Azərbaycanca", u"", u"Azerbaijani"),
10
+    (u"be", u"Беларуская", u"", u"Belarusian"),
11
+    (u"bg-BG", u"Български", u"България", u"Bulgarian"),
12
+    (u"bn", u"বাংলা", u"", u"Bengali"),
13
+    (u"br", u"Brezhoneg", u"", u"Breton"),
14
+    (u"bs", u"Bosnian", u"", u"Bosnian"),
15
+    (u"ca", u"Català", u"", u"Catalan"),
16
+    (u"ca-CT", u"Català", u"", u"Catalan"),
17
+    (u"ca-ES", u"Català", u"Espanya", u"Catalan"),
18
+    (u"ce", u"Нохчийн", u"", u"Chechen"),
19
+    (u"ceb", u"Sinugboanong Binisaya", u"", u"Cebuano"),
20
+    (u"cs-CZ", u"Čeština", u"Česko", u"Czech"),
21
+    (u"cy", u"Cymraeg", u"", u"Welsh"),
22
+    (u"da-DK", u"Dansk", u"Danmark", u"Danish"),
23
+    (u"de", u"Deutsch", u"", u"German"),
24
+    (u"de-AT", u"Deutsch", u"Österreich", u"German"),
25
+    (u"de-CH", u"Deutsch", u"Schweiz", u"German"),
26
+    (u"de-DE", u"Deutsch", u"Deutschland", u"German"),
27
+    (u"el-GR", u"Ελληνικά", u"Ελλάδα", u"Greek"),
28
+    (u"en", u"English", u"", u"English"),
29
+    (u"en-AU", u"English", u"Australia", u"English"),
30
+    (u"en-CA", u"English", u"Canada", u"English"),
31
+    (u"en-GB", u"English", u"United Kingdom", u"English"),
32
+    (u"en-ID", u"English", u"Indonesia", u"English"),
33
+    (u"en-IE", u"English", u"Ireland", u"English"),
34
+    (u"en-IN", u"English", u"India", u"English"),
35
+    (u"en-MY", u"English", u"Malaysia", u"English"),
36
+    (u"en-NZ", u"English", u"New Zealand", u"English"),
37
+    (u"en-PH", u"English", u"Philippines", u"English"),
38
+    (u"en-SG", u"English", u"Singapore", u"English"),
39
+    (u"en-US", u"English", u"United States", u"English"),
40
+    (u"en-ZA", u"English", u"South Africa", u"English"),
41
+    (u"eo", u"Esperanto", u"", u"Esperanto"),
42
+    (u"es", u"Español", u"", u"Spanish"),
43
+    (u"es-AR", u"Español", u"Argentina", u"Spanish"),
44
+    (u"es-CL", u"Español", u"Chile", u"Spanish"),
45
+    (u"es-CO", u"Español", u"Colombia", u"Spanish"),
46
+    (u"es-ES", u"Español", u"España", u"Spanish"),
47
+    (u"es-MX", u"Español", u"México", u"Spanish"),
48
+    (u"es-PE", u"Español", u"Perú", u"Spanish"),
49
+    (u"es-US", u"Español", u"Estados Unidos", u"Spanish"),
50
+    (u"et-EE", u"Eesti", u"Eesti", u"Estonian"),
51
+    (u"eu", u"Euskara", u"", u"Basque"),
52
+    (u"fa", u"فارسی", u"", u"Persian"),
53
+    (u"fi-FI", u"Suomi", u"Suomi", u"Finnish"),
54
+    (u"fr", u"Français", u"", u"French"),
55
+    (u"fr-BE", u"Français", u"Belgique", u"French"),
56
+    (u"fr-CA", u"Français", u"Canada", u"French"),
57
+    (u"fr-CH", u"Français", u"Suisse", u"French"),
58
+    (u"fr-FR", u"Français", u"France", u"French"),
59
+    (u"ga", u"Gaeilge", u"", u"Irish"),
60
+    (u"gl", u"Galego", u"", u"Galician"),
61
+    (u"gu", u"ગુજરાતી", u"", u"Gujarati"),
62
+    (u"he-IL", u"עברית", u"ישראל", u"Hebrew"),
63
+    (u"hi", u"हिन्दी", u"", u"Hindi"),
64
+    (u"hr-HR", u"Hrvatski", u"Hrvatska", u"Croatian"),
65
+    (u"hu-HU", u"Magyar", u"Magyarország", u"Hungarian"),
66
+    (u"hy", u"Հայերեն", u"", u"Armenian"),
67
+    (u"id-ID", u"Bahasa Indonesia", u"Indonesia", u"Indonesian"),
68
+    (u"is", u"Íslenska", u"", u""),
69
+    (u"it", u"Italiano", u"", u"Italian"),
70
+    (u"it-CH", u"Italiano", u"Svizzera", u"Italian"),
71
+    (u"it-IT", u"Italiano", u"Italia", u"Italian"),
72
+    (u"iw", u"עברית", u"", u""),
73
+    (u"ja-JP", u"日本語", u"日本", u"Japanese"),
74
+    (u"ka", u"ქართული", u"", u"Georgian"),
75
+    (u"kk", u"Қазақша", u"", u"Kazakh"),
76
+    (u"kn", u"ಕನ್ನಡ", u"", u"Kannada"),
77
+    (u"ko-KR", u"한국어", u"대한민국", u"Korean"),
78
+    (u"la", u"Latina", u"", u"Latin"),
79
+    (u"lt-LT", u"Lietuvių", u"Lietuva", u"Lithuanian"),
80
+    (u"lv-LV", u"Latviešu", u"Latvijas Republika", u""),
81
+    (u"mi", u"Reo Māori", u"", u"Maori"),
82
+    (u"min", u"Minangkabau", u"", u"Minangkabau"),
83
+    (u"mk", u"Македонски", u"", u"Macedonian"),
84
+    (u"mn", u"Монгол", u"", u"Mongolian"),
85
+    (u"mr", u"मराठी", u"", u"Marathi"),
86
+    (u"ms-MY", u"Bahasa Melayu", u"Malaysia", u"Malay"),
87
+    (u"mt", u"Malti", u"", u"Maltese"),
88
+    (u"nb-NO", u"Norwegian Bokmål", u"Norge", u"Norwegian Bokmål"),
89
+    (u"nl", u"Nederlands", u"", u"Dutch"),
90
+    (u"nl-BE", u"Nederlands", u"België", u"Dutch"),
91
+    (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
92
+    (u"nn", u"Nynorsk", u"", u"Norwegian"),
93
+    (u"no-NO", u"Norsk", u"Norge", u"Norwegian"),
94
+    (u"oc", u"Occitan", u"", u"Occitan"),
95
+    (u"or", u"Oriya", u"", u"Oriya"),
96
+    (u"pa", u"ਪੰਜਾਬੀ", u"", u"Panjabi"),
97
+    (u"pl-PL", u"Polski", u"Rzeczpospolita Polska", u"Polish"),
98
+    (u"ps", u"Pushto", u"", u"Pushto"),
99
+    (u"pt", u"Português", u"", u"Portuguese"),
100
+    (u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
101
+    (u"pt-PT", u"Português", u"Portugal", u"Portuguese"),
102
+    (u"ro-RO", u"Română", u"România", u"Romanian"),
103
+    (u"ru-RU", u"Русский", u"Россия", u"Russian"),
104
+    (u"rw", u"Ikinyarwanda", u"", u"Kinyarwanda"),
105
+    (u"sh", u"Srpskohrvatski / Српскохрватски", u"", u"Serbo-Croatian"),
106
+    (u"sk-SK", u"Slovenčina", u"Slovenská republika", u"Slovak"),
107
+    (u"sl", u"Slovenščina", u"", u"Slovenian"),
108
+    (u"sr", u"Српски / Srpski", u"", u"Serbian"),
109
+    (u"sv-SE", u"Svenska", u"Sverige", u"Swedish"),
110
+    (u"sw", u"Kiswahili", u"", u""),
111
+    (u"ta", u"தமிழ்", u"", u"Tamil"),
112
+    (u"th-TH", u"ไทย", u"ไทย", u"Thai"),
113
+    (u"ti", u"ትግርኛ", u"", u"Tigrinya"),
114
+    (u"tl-PH", u"Filipino", u"Pilipinas", u""),
115
+    (u"tr-TR", u"Türkçe", u"Türkiye", u"Turkish"),
116
+    (u"tt", u"Татарча", u"", u"Tatar"),
117
+    (u"uk-UA", u"Українська", u"Україна", u"Ukrainian"),
118
+    (u"ur", u"اردو", u"", u"Urdu"),
119
+    (u"uz", u"O‘zbek", u"", u"Uzbek"),
120
+    (u"ve", u"Venda", u"", u"Venda"),
121
+    (u"vi-VN", u"Tiếng Việt", u"Công Hòa Xã Hội Chủ Nghĩa Việt Nam", u"Vietnamese"),
122
+    (u"vo", u"Volapük", u"", u"Volapük"),
123
+    (u"wa", u"Walon", u"", u"Walloon"),
124
+    (u"war", u"Winaray", u"", u"Waray-Waray"),
125
+    (u"xh", u"Xhosa", u"", u"Xhosa"),
126
+    (u"zh", u"中文", u"", u"Chinese"),
127
+    (u"zh-CN", u"中文", u"中国", u"Chinese"),
128
+    (u"zh-HK", u"中文", u"香港", u"Chinese"),
129
+    (u"zh-TW", u"中文", u"台湾", u"Chinese"),
130
+    (u"zu", u"Isi-Zulu", u"", u"Zulu")
131
+)

+ 21
- 2
searx/preferences.py Vedi File

95
         resp.set_cookie(name, ','.join(self.value), max_age=COOKIE_MAX_AGE)
95
         resp.set_cookie(name, ','.join(self.value), max_age=COOKIE_MAX_AGE)
96
 
96
 
97
 
97
 
98
+class SearchLanguageSetting(EnumStringSetting):
99
+    """Available choices may change, so user's value may not be in choices anymore"""
100
+
101
+    def parse(self, data):
102
+        if data not in self.choices and data != self.value:
103
+            # hack to give some backwards compatibility with old language cookies
104
+            data = str(data).replace('_', '-')
105
+            lang = data.split('-')[0]
106
+            if data in self.choices:
107
+                pass
108
+            elif lang in self.choices:
109
+                data = lang
110
+            elif data == 'ar-XA':
111
+                data = 'ar-SA'
112
+            else:
113
+                data = self.value
114
+        self.value = data
115
+
116
+
98
 class MapSetting(Setting):
117
 class MapSetting(Setting):
99
     """Setting of a value that has to be translated in order to be storable"""
118
     """Setting of a value that has to be translated in order to be storable"""
100
 
119
 
216
         super(Preferences, self).__init__()
235
         super(Preferences, self).__init__()
217
 
236
 
218
         self.key_value_settings = {'categories': MultipleChoiceSetting(['general'], choices=categories),
237
         self.key_value_settings = {'categories': MultipleChoiceSetting(['general'], choices=categories),
219
-                                   'language': EnumStringSetting(settings['search']['language'],
220
-                                                                 choices=LANGUAGE_CODES),
238
+                                   'language': SearchLanguageSetting(settings['search']['language'],
239
+                                                                     choices=LANGUAGE_CODES),
221
                                    'locale': EnumStringSetting(settings['ui']['default_locale'],
240
                                    'locale': EnumStringSetting(settings['ui']['default_locale'],
222
                                                                choices=settings['locales'].keys() + ['']),
241
                                                                choices=settings['locales'].keys() + ['']),
223
                                    'autocomplete': EnumStringSetting(settings['search']['autocomplete'],
242
                                    'autocomplete': EnumStringSetting(settings['search']['autocomplete'],

+ 7
- 4
searx/query.py Vedi File

71
                 # check if any language-code is equal with
71
                 # check if any language-code is equal with
72
                 # declared language-codes
72
                 # declared language-codes
73
                 for lc in language_codes:
73
                 for lc in language_codes:
74
-                    lang_id, lang_name, country = map(str.lower, lc)
74
+                    lang_id, lang_name, country, english_name = map(unicode.lower, lc)
75
 
75
 
76
                     # if correct language-code is found
76
                     # if correct language-code is found
77
                     # set it as new search-language
77
                     # set it as new search-language
78
                     if lang == lang_id\
78
                     if lang == lang_id\
79
                        or lang_id.startswith(lang)\
79
                        or lang_id.startswith(lang)\
80
                        or lang == lang_name\
80
                        or lang == lang_name\
81
+                       or lang == english_name\
81
                        or lang.replace('_', ' ') == country:
82
                        or lang.replace('_', ' ') == country:
82
                         parse_next = True
83
                         parse_next = True
83
-                        self.languages.append(lang)
84
-                        break
84
+                        self.languages.append(lang_id)
85
+                        # to ensure best match (first match is not necessarily the best one)
86
+                        if lang == lang_id:
87
+                            break
85
 
88
 
86
             # this force a engine or category
89
             # this force a engine or category
87
             if query_part[0] == '!' or query_part[0] == '?':
90
             if query_part[0] == '!' or query_part[0] == '?':
88
-                prefix = query_part[1:].replace('_', ' ')
91
+                prefix = query_part[1:].replace('-', ' ')
89
 
92
 
90
                 # check if prefix is equal with engine shortcut
93
                 # check if prefix is equal with engine shortcut
91
                 if prefix in engine_shortcuts:
94
                 if prefix in engine_shortcuts:

+ 5
- 1
searx/search.py Vedi File

211
     # set query
211
     # set query
212
     query = raw_text_query.getSearchQuery()
212
     query = raw_text_query.getSearchQuery()
213
 
213
 
214
-    # get last selected language in query, if possible
214
+    # set specific language if set on request, query or preferences
215
     # TODO support search with multible languages
215
     # TODO support search with multible languages
216
     if len(raw_text_query.languages):
216
     if len(raw_text_query.languages):
217
         query_lang = raw_text_query.languages[-1]
217
         query_lang = raw_text_query.languages[-1]
218
+    elif 'language' in form:
219
+        query_lang = form.get('language')
220
+    else:
221
+        query_lang = preferences.get_value('language')
218
 
222
 
219
     query_time_range = form.get('time_range')
223
     query_time_range = form.get('time_range')
220
 
224
 

+ 5
- 0
searx/static/plugins/js/search_on_category_select.js Vedi File

15
                 $('#search_form').submit();
15
                 $('#search_form').submit();
16
             }
16
             }
17
         });
17
         });
18
+        $('#language').change(function(e) {
19
+            if($('#q').val()) {
20
+                $('#search_form').submit();
21
+            }
22
+        });
18
     }
23
     }
19
 });
24
 });

+ 3
- 3
searx/templates/courgette/preferences.html Vedi File

13
         <legend>{{ _('Search language') }}</legend>
13
         <legend>{{ _('Search language') }}</legend>
14
         <p>
14
         <p>
15
             <select name='language'>
15
             <select name='language'>
16
-                <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option>
17
-                {% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %}
18
-                <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option>
16
+                <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
17
+                {% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
18
+                <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option>
19
                 {% endfor %}
19
                 {% endfor %}
20
             </select>
20
             </select>
21
         </p>
21
         </p>

+ 3
- 3
searx/templates/legacy/preferences.html Vedi File

14
         <legend>{{ _('Search language') }}</legend>
14
         <legend>{{ _('Search language') }}</legend>
15
         <p>
15
         <p>
16
         <select name='language'>
16
         <select name='language'>
17
-            <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option>
18
-            {% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %}
19
-            <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option>
17
+            <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
18
+            {% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
19
+            <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option>
20
             {% endfor %}
20
             {% endfor %}
21
         </select>
21
         </select>
22
         </p>
22
         </p>

+ 1
- 0
searx/templates/oscar/advanced.html Vedi File

6
 <div id="advanced-search-container">
6
 <div id="advanced-search-container">
7
     {% include 'oscar/categories.html' %}
7
     {% include 'oscar/categories.html' %}
8
     {% include 'oscar/time-range.html' %}
8
     {% include 'oscar/time-range.html' %}
9
+    {% include 'oscar/languages.html' %}
9
 </div>
10
 </div>

+ 12
- 0
searx/templates/oscar/languages.html Vedi File

1
+{% if preferences %}
2
+<select class="form-control" name='language'>
3
+{% else %}
4
+<select class="time_range" id='language' name='language'>
5
+{% endif %}
6
+	<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
7
+		{% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
8
+		<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>
9
+			{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}
10
+		</option>
11
+		{% endfor %}
12
+</select>

+ 5
- 6
searx/templates/oscar/preferences.html Vedi File

40
                     {% set language_label = _('Search language') %}
40
                     {% set language_label = _('Search language') %}
41
                     {% set language_info = _('What language do you prefer for search?') %}
41
                     {% set language_info = _('What language do you prefer for search?') %}
42
                     {{ preferences_item_header(language_info, language_label, rtl) }}
42
                     {{ preferences_item_header(language_info, language_label, rtl) }}
43
-                        <select class="form-control" name='language'>
44
-                            <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option>
45
-                            {% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %}
46
-                            <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option>
47
-                            {% endfor %}
48
-                        </select>
43
+						{% include 'oscar/languages.html' %}
49
                     {{ preferences_item_footer(language_info, language_label, rtl) }}
44
                     {{ preferences_item_footer(language_info, language_label, rtl) }}
50
 
45
 
51
                     {% set locale_label = _('Interface language') %}
46
                     {% set locale_label = _('Interface language') %}
153
 				    <th>{{ _("Allow") }}</th>
148
 				    <th>{{ _("Allow") }}</th>
154
 				    <th>{{ _("Engine name") }}</th>
149
 				    <th>{{ _("Engine name") }}</th>
155
 				    <th>{{ _("Shortcut") }}</th>
150
 				    <th>{{ _("Shortcut") }}</th>
151
+				    <th>{{ _("Language support") }}</th>
156
 				    <th>{{ _("SafeSearch") }}</th>
152
 				    <th>{{ _("SafeSearch") }}</th>
157
 				    <th>{{ _("Time range") }}</th>
153
 				    <th>{{ _("Time range") }}</th>
158
 				    <th>{{ _("Avg. time") }}</th>
154
 				    <th>{{ _("Avg. time") }}</th>
161
 				    <th>{{ _("Max time") }}</th>
157
 				    <th>{{ _("Max time") }}</th>
162
 				    <th>{{ _("Avg. time") }}</th>
158
 				    <th>{{ _("Avg. time") }}</th>
163
 				    <th>{{ _("SafeSearch") }}</th>
159
 				    <th>{{ _("SafeSearch") }}</th>
160
+				    <th>{{ _("Language support") }}</th>
164
 				    <th>{{ _("Shortcut") }}</th>
161
 				    <th>{{ _("Shortcut") }}</th>
165
 				    <th>{{ _("Engine name") }}</th>
162
 				    <th>{{ _("Engine name") }}</th>
166
 				    <th>{{ _("Allow") }}</th>
163
 				    <th>{{ _("Allow") }}</th>
175
                                     </td>
172
                                     </td>
176
                                     <th>{{ search_engine.name }}</th>
173
                                     <th>{{ search_engine.name }}</th>
177
 				    <td>{{ shortcuts[search_engine.name] }}</td>
174
 				    <td>{{ shortcuts[search_engine.name] }}</td>
175
+				    <td><input type="checkbox" {{ "checked" if current_language == 'all' or current_language in search_engine.supported_languages or current_language.split('-')[0] in search_engine.supported_languages else ""}} readonly="readonly" disabled="disabled"></td>
178
 				    <td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td>
176
 				    <td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td>
179
 				    <td><input type="checkbox" {{ "checked" if search_engine.time_range_support==True else ""}} readonly="readonly" disabled="disabled"></td>
177
 				    <td><input type="checkbox" {{ "checked" if search_engine.time_range_support==True else ""}} readonly="readonly" disabled="disabled"></td>
180
 				    <td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td>
178
 				    <td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td>
183
 				    <td class="{{ 'danger' if stats[search_engine.name]['warn_timeout'] else '' }}">{{ search_engine.timeout }}</td>
181
 				    <td class="{{ 'danger' if stats[search_engine.name]['warn_timeout'] else '' }}">{{ search_engine.timeout }}</td>
184
 				    <td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td>
182
 				    <td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td>
185
 				    <td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td>
183
 				    <td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td>
184
+				    <td><input type="checkbox" {{ "checked" if current_language == 'all' or current_language in search_engine.supported_languages or current_language.split('-')[0] in search_engine.supported_languages else ""}} readonly="readonly" disabled="disabled"></td>
186
 				    <td>{{ shortcuts[search_engine.name] }}</td>
185
 				    <td>{{ shortcuts[search_engine.name] }}</td>
187
                                     <th>{{ search_engine.name }}</th>
186
                                     <th>{{ search_engine.name }}</th>
188
                                     <td class="onoff-checkbox">
187
                                     <td class="onoff-checkbox">

+ 3
- 3
searx/templates/pix-art/preferences.html Vedi File

9
         <legend>{{ _('Search language') }}</legend>
9
         <legend>{{ _('Search language') }}</legend>
10
         <p>
10
         <p>
11
         <select name='language'>
11
         <select name='language'>
12
-            <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option>
13
-            {% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %}
14
-            <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option>
12
+            <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
13
+            {% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
14
+            <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option>
15
             {% endfor %}
15
             {% endfor %}
16
         </select>
16
         </select>
17
         </p>
17
         </p>

+ 8
- 4
searx/webapp.py Vedi File

330
 
330
 
331
     kwargs['safesearch'] = str(request.preferences.get_value('safesearch'))
331
     kwargs['safesearch'] = str(request.preferences.get_value('safesearch'))
332
 
332
 
333
+    kwargs['language_codes'] = language_codes
334
+    if 'current_language' not in kwargs:
335
+        kwargs['current_language'] = request.preferences.get_value('language')
336
+
333
     # override url_for function in templates
337
     # override url_for function in templates
334
     kwargs['url_for'] = url_for_theme
338
     kwargs['url_for'] = url_for_theme
335
 
339
 
510
         answers=result_container.answers,
514
         answers=result_container.answers,
511
         infoboxes=result_container.infoboxes,
515
         infoboxes=result_container.infoboxes,
512
         paging=result_container.paging,
516
         paging=result_container.paging,
517
+        current_language=search_query.lang,
513
         base_url=get_base_url(),
518
         base_url=get_base_url(),
514
         theme=get_current_theme_name(),
519
         theme=get_current_theme_name(),
515
         favicons=global_favicons[themes.index(get_current_theme_name())]
520
         favicons=global_favicons[themes.index(get_current_theme_name())]
552
         if not language or language == 'all':
557
         if not language or language == 'all':
553
             language = 'en'
558
             language = 'en'
554
         else:
559
         else:
555
-            language = language.split('_')[0]
560
+            language = language.split('-')[0]
556
         # run autocompletion
561
         # run autocompletion
557
         raw_results.extend(completer(raw_text_query.getSearchQuery(), language))
562
         raw_results.extend(completer(raw_text_query.getSearchQuery(), language))
558
 
563
 
615
     return render('preferences.html',
620
     return render('preferences.html',
616
                   locales=settings['locales'],
621
                   locales=settings['locales'],
617
                   current_locale=get_locale(),
622
                   current_locale=get_locale(),
618
-                  current_language=lang,
619
                   image_proxy=image_proxy,
623
                   image_proxy=image_proxy,
620
-                  language_codes=language_codes,
621
                   engines_by_category=categories,
624
                   engines_by_category=categories,
622
                   stats=stats,
625
                   stats=stats,
623
                   answerers=[{'info': a.self_info(), 'keywords': a.keywords} for a in answerers],
626
                   answerers=[{'info': a.self_info(), 'keywords': a.keywords} for a in answerers],
627
                   themes=themes,
630
                   themes=themes,
628
                   plugins=plugins,
631
                   plugins=plugins,
629
                   allowed_plugins=allowed_plugins,
632
                   allowed_plugins=allowed_plugins,
630
-                  theme=get_current_theme_name())
633
+                  theme=get_current_theme_name(),
634
+                  preferences=True)
631
 
635
 
632
 
636
 
633
 @app.route('/image_proxy', methods=['GET'])
637
 @app.route('/image_proxy', methods=['GET'])

+ 3
- 3
tests/robot/test_basic.robot Vedi File

101
     Page Should Contain  about
101
     Page Should Contain  about
102
     Page Should Contain  preferences
102
     Page Should Contain  preferences
103
     Go To  http://localhost:11111/preferences
103
     Go To  http://localhost:11111/preferences
104
-    List Selection Should Be  language  Automatic
105
-    Select From List  language  Turkish (Turkey) - tr_TR
104
+    List Selection Should Be  language  Default language
105
+    Select From List  language  Türkçe (Türkiye) - tr-TR
106
     Submit Preferences
106
     Submit Preferences
107
     Go To  http://localhost:11111/preferences
107
     Go To  http://localhost:11111/preferences
108
-    List Selection Should Be  language  Turkish (Turkey) - tr_TR
108
+    List Selection Should Be  language  Türkçe (Türkiye) - tr-TR
109
 
109
 
110
 Change autocomplete
110
 Change autocomplete
111
     Page Should Contain  about
111
     Page Should Contain  about

+ 32
- 0
tests/unit/engines/test_bing.py Vedi File

86
         self.assertEqual(results[0]['title'], 'This should be the title')
86
         self.assertEqual(results[0]['title'], 'This should be the title')
87
         self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
87
         self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
88
         self.assertEqual(results[0]['content'], 'This should be the content.')
88
         self.assertEqual(results[0]['content'], 'This should be the content.')
89
+
90
+    def test_fetch_supported_languages(self):
91
+        html = """<html></html>"""
92
+        response = mock.Mock(text=html)
93
+        results = bing._fetch_supported_languages(response)
94
+        self.assertEqual(type(results), list)
95
+        self.assertEqual(len(results), 0)
96
+
97
+        html = """
98
+        <html>
99
+            <body>
100
+                <form>
101
+                    <div id="limit-languages">
102
+                        <div>
103
+                            <div><input id="es" value="es"></input></div>
104
+                        </div>
105
+                        <div>
106
+                            <div><input id="pt_BR" value="pt_BR"></input></div>
107
+                            <div><input id="pt_PT" value="pt_PT"></input></div>
108
+                        </div>
109
+                    </div>
110
+                </form>
111
+            </body>
112
+        </html>
113
+        """
114
+        response = mock.Mock(text=html)
115
+        languages = bing._fetch_supported_languages(response)
116
+        self.assertEqual(type(languages), list)
117
+        self.assertEqual(len(languages), 3)
118
+        self.assertIn('es', languages)
119
+        self.assertIn('pt-BR', languages)
120
+        self.assertIn('pt-PT', languages)

+ 37
- 0
tests/unit/engines/test_dailymotion.py Vedi File

1
+# -*- coding: utf-8 -*-
1
 from collections import defaultdict
2
 from collections import defaultdict
2
 import mock
3
 import mock
3
 from searx.engines import dailymotion
4
 from searx.engines import dailymotion
72
         results = dailymotion.response(response)
73
         results = dailymotion.response(response)
73
         self.assertEqual(type(results), list)
74
         self.assertEqual(type(results), list)
74
         self.assertEqual(len(results), 0)
75
         self.assertEqual(len(results), 0)
76
+
77
+    def test_fetch_supported_languages(self):
78
+        json = r"""
79
+        {"list":[{"code":"af","name":"Afrikaans","native_name":"Afrikaans",
80
+                  "localized_name":"Afrikaans","display_name":"Afrikaans"},
81
+                 {"code":"ar","name":"Arabic","native_name":"\u0627\u0644\u0639\u0631\u0628\u064a\u0629",
82
+                  "localized_name":"Arabic","display_name":"Arabic"},
83
+                 {"code":"la","name":"Latin","native_name":null,
84
+                  "localized_name":"Latin","display_name":"Latin"}
85
+        ]}
86
+        """
87
+        response = mock.Mock(text=json)
88
+        languages = dailymotion._fetch_supported_languages(response)
89
+        self.assertEqual(type(languages), dict)
90
+        self.assertEqual(len(languages), 3)
91
+        self.assertIn('af', languages)
92
+        self.assertIn('ar', languages)
93
+        self.assertIn('la', languages)
94
+
95
+        self.assertEqual(type(languages['af']), dict)
96
+        self.assertEqual(type(languages['ar']), dict)
97
+        self.assertEqual(type(languages['la']), dict)
98
+
99
+        self.assertIn('name', languages['af'])
100
+        self.assertIn('name', languages['ar'])
101
+        self.assertNotIn('name', languages['la'])
102
+
103
+        self.assertIn('english_name', languages['af'])
104
+        self.assertIn('english_name', languages['ar'])
105
+        self.assertIn('english_name', languages['la'])
106
+
107
+        self.assertEqual(languages['af']['name'], 'Afrikaans')
108
+        self.assertEqual(languages['af']['english_name'], 'Afrikaans')
109
+        self.assertEqual(languages['ar']['name'], u'العربية')
110
+        self.assertEqual(languages['ar']['english_name'], 'Arabic')
111
+        self.assertEqual(languages['la']['english_name'], 'Latin')

+ 26
- 1
tests/unit/engines/test_duckduckgo.py Vedi File

11
         query = 'test_query'
11
         query = 'test_query'
12
         dicto = defaultdict(dict)
12
         dicto = defaultdict(dict)
13
         dicto['pageno'] = 1
13
         dicto['pageno'] = 1
14
-        dicto['language'] = 'de_CH'
14
+        dicto['language'] = 'de-CH'
15
         dicto['time_range'] = ''
15
         dicto['time_range'] = ''
16
         params = duckduckgo.request(query, dicto)
16
         params = duckduckgo.request(query, dicto)
17
         self.assertIn('url', params)
17
         self.assertIn('url', params)
19
         self.assertIn('duckduckgo.com', params['url'])
19
         self.assertIn('duckduckgo.com', params['url'])
20
         self.assertIn('ch-de', params['url'])
20
         self.assertIn('ch-de', params['url'])
21
 
21
 
22
+        # when ddg uses non standard code
23
+        dicto['language'] = 'en-GB'
24
+        params = duckduckgo.request(query, dicto)
25
+        self.assertIn('uk-en', params['url'])
26
+
27
+        # no country given
28
+        duckduckgo.supported_languages = ['de-CH', 'en-US']
29
+        dicto['language'] = 'de'
30
+        params = duckduckgo.request(query, dicto)
31
+        self.assertIn('ch-de', params['url'])
32
+
22
     def test_no_url_in_request_year_time_range(self):
33
     def test_no_url_in_request_year_time_range(self):
23
         dicto = defaultdict(dict)
34
         dicto = defaultdict(dict)
24
         query = 'test_query'
35
         query = 'test_query'
73
         self.assertEqual(results[0]['title'], 'This is the title')
84
         self.assertEqual(results[0]['title'], 'This is the title')
74
         self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű')
85
         self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű')
75
         self.assertEqual(results[0]['content'], 'This should be the content.')
86
         self.assertEqual(results[0]['content'], 'This should be the content.')
87
+
88
+    def test_fetch_supported_languages(self):
89
+        js = """some code...regions:{
90
+        "wt-wt":"All Results","ar-es":"Argentina","au-en":"Australia","at-de":"Austria","be-fr":"Belgium (fr)"
91
+        }some more code..."""
92
+        response = mock.Mock(text=js)
93
+        languages = duckduckgo._fetch_supported_languages(response)
94
+        self.assertEqual(type(languages), list)
95
+        self.assertEqual(len(languages), 5)
96
+        self.assertIn('wt-WT', languages)
97
+        self.assertIn('es-AR', languages)
98
+        self.assertIn('en-AU', languages)
99
+        self.assertIn('de-AT', languages)
100
+        self.assertIn('fr-BE', languages)

+ 4
- 0
tests/unit/engines/test_duckduckgo_definitions.py Vedi File

21
         query = 'test_query'
21
         query = 'test_query'
22
         dicto = defaultdict(dict)
22
         dicto = defaultdict(dict)
23
         dicto['pageno'] = 1
23
         dicto['pageno'] = 1
24
+        dicto['language'] = 'es'
24
         params = duckduckgo_definitions.request(query, dicto)
25
         params = duckduckgo_definitions.request(query, dicto)
25
         self.assertIn('url', params)
26
         self.assertIn('url', params)
26
         self.assertIn(query, params['url'])
27
         self.assertIn(query, params['url'])
27
         self.assertIn('duckduckgo.com', params['url'])
28
         self.assertIn('duckduckgo.com', params['url'])
29
+        self.assertIn('headers', params)
30
+        self.assertIn('Accept-Language', params['headers'])
31
+        self.assertIn('es', params['headers']['Accept-Language'])
28
 
32
 
29
     def test_response(self):
33
     def test_response(self):
30
         self.assertRaises(AttributeError, duckduckgo_definitions.response, None)
34
         self.assertRaises(AttributeError, duckduckgo_definitions.response, None)

+ 31
- 0
tests/unit/engines/test_gigablast.py Vedi File

15
         self.assertTrue('url' in params)
15
         self.assertTrue('url' in params)
16
         self.assertTrue(query in params['url'])
16
         self.assertTrue(query in params['url'])
17
         self.assertTrue('gigablast.com' in params['url'])
17
         self.assertTrue('gigablast.com' in params['url'])
18
+        self.assertTrue('xx' in params['url'])
19
+
20
+        dicto['language'] = 'en-US'
21
+        params = gigablast.request(query, dicto)
22
+        self.assertTrue('en' in params['url'])
23
+        self.assertFalse('en-US' in params['url'])
18
 
24
 
19
     def test_response(self):
25
     def test_response(self):
20
         self.assertRaises(AttributeError, gigablast.response, None)
26
         self.assertRaises(AttributeError, gigablast.response, None)
83
         self.assertEqual(results[0]['title'], 'South by Southwest 2016')
89
         self.assertEqual(results[0]['title'], 'South by Southwest 2016')
84
         self.assertEqual(results[0]['url'], 'www.sxsw.com')
90
         self.assertEqual(results[0]['url'], 'www.sxsw.com')
85
         self.assertEqual(results[0]['content'], 'This should be the content.')
91
         self.assertEqual(results[0]['content'], 'This should be the content.')
92
+
93
+    def test_fetch_supported_languages(self):
94
+        html = """<html></html>"""
95
+        response = mock.Mock(text=html)
96
+        results = gigablast._fetch_supported_languages(response)
97
+        self.assertEqual(type(results), list)
98
+        self.assertEqual(len(results), 0)
99
+
100
+        html = """
101
+        <html>
102
+            <body>
103
+                <span id="menu2">
104
+                    <a href="/search?&rxikd=1&qlang=xx"></a>
105
+                    <a href="/search?&rxikd=1&qlang=en"></a>
106
+                    <a href="/search?&rxikd=1&qlang=fr"></a>
107
+                </span>
108
+            </body>
109
+        </html>
110
+        """
111
+        response = mock.Mock(text=html)
112
+        languages = gigablast._fetch_supported_languages(response)
113
+        self.assertEqual(type(languages), list)
114
+        self.assertEqual(len(languages), 2)
115
+        self.assertIn('en', languages)
116
+        self.assertIn('fr', languages)

+ 58
- 1
tests/unit/engines/test_google.py Vedi File

18
         query = 'test_query'
18
         query = 'test_query'
19
         dicto = defaultdict(dict)
19
         dicto = defaultdict(dict)
20
         dicto['pageno'] = 1
20
         dicto['pageno'] = 1
21
-        dicto['language'] = 'fr_FR'
21
+        dicto['language'] = 'fr-FR'
22
         dicto['time_range'] = ''
22
         dicto['time_range'] = ''
23
         params = google.request(query, dicto)
23
         params = google.request(query, dicto)
24
         self.assertIn('url', params)
24
         self.assertIn('url', params)
177
         self.assertEqual(results[0]['title'], '')
177
         self.assertEqual(results[0]['title'], '')
178
         self.assertEqual(results[0]['content'], '')
178
         self.assertEqual(results[0]['content'], '')
179
         self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg')
179
         self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg')
180
+
181
+    def test_fetch_supported_languages(self):
182
+        html = """<html></html>"""
183
+        response = mock.Mock(text=html)
184
+        languages = google._fetch_supported_languages(response)
185
+        self.assertEqual(type(languages), dict)
186
+        self.assertEqual(len(languages), 0)
187
+
188
+        html = u"""
189
+        <html>
190
+            <body>
191
+                <table>
192
+                    <tbody>
193
+                        <tr>
194
+                            <td>
195
+                                <font>
196
+                                    <label>
197
+                                        <span id="ten">English</span>
198
+                                    </label>
199
+                                </font>
200
+                            </td>
201
+                            <td>
202
+                                <font>
203
+                                    <label>
204
+                                        <span id="tzh-CN">中文 (简体)</span>
205
+                                    </label>
206
+                                    <label>
207
+                                        <span id="tzh-TW">中文 (繁體)</span>
208
+                                    </label>
209
+                                </font>
210
+                            </td>
211
+                        </tr>
212
+                    </tbody>
213
+                </table>
214
+            </body>
215
+        </html>
216
+        """
217
+        response = mock.Mock(text=html)
218
+        languages = google._fetch_supported_languages(response)
219
+        self.assertEqual(type(languages), dict)
220
+        self.assertEqual(len(languages), 3)
221
+
222
+        self.assertIn('en', languages)
223
+        self.assertIn('zh-CN', languages)
224
+        self.assertIn('zh-TW', languages)
225
+
226
+        self.assertEquals(type(languages['en']), dict)
227
+        self.assertEquals(type(languages['zh-CN']), dict)
228
+        self.assertEquals(type(languages['zh-TW']), dict)
229
+
230
+        self.assertIn('name', languages['en'])
231
+        self.assertIn('name', languages['zh-CN'])
232
+        self.assertIn('name', languages['zh-TW'])
233
+
234
+        self.assertEquals(languages['en']['name'], 'English')
235
+        self.assertEquals(languages['zh-CN']['name'], u'中文 (简体)')
236
+        self.assertEquals(languages['zh-TW']['name'], u'中文 (繁體)')

+ 1
- 1
tests/unit/engines/test_qwant.py Vedi File

10
         query = 'test_query'
10
         query = 'test_query'
11
         dicto = defaultdict(dict)
11
         dicto = defaultdict(dict)
12
         dicto['pageno'] = 0
12
         dicto['pageno'] = 0
13
-        dicto['language'] = 'fr_FR'
13
+        dicto['language'] = 'fr-FR'
14
         qwant.categories = ['']
14
         qwant.categories = ['']
15
         params = qwant.request(query, dicto)
15
         params = qwant.request(query, dicto)
16
         self.assertIn('url', params)
16
         self.assertIn('url', params)

+ 6
- 1
tests/unit/engines/test_subtitleseeker.py Vedi File

10
         query = 'test_query'
10
         query = 'test_query'
11
         dicto = defaultdict(dict)
11
         dicto = defaultdict(dict)
12
         dicto['pageno'] = 1
12
         dicto['pageno'] = 1
13
+        dicto['language'] = 'fr-FR'
13
         params = subtitleseeker.request(query, dicto)
14
         params = subtitleseeker.request(query, dicto)
14
         self.assertTrue('url' in params)
15
         self.assertTrue('url' in params)
15
         self.assertTrue(query in params['url'])
16
         self.assertTrue(query in params['url'])
17
 
18
 
18
     def test_response(self):
19
     def test_response(self):
19
         dicto = defaultdict(dict)
20
         dicto = defaultdict(dict)
20
-        dicto['language'] = 'fr_FR'
21
+        dicto['language'] = 'fr-FR'
21
         response = mock.Mock(search_params=dicto)
22
         response = mock.Mock(search_params=dicto)
22
 
23
 
23
         self.assertRaises(AttributeError, subtitleseeker.response, None)
24
         self.assertRaises(AttributeError, subtitleseeker.response, None)
68
         self.assertIn('1039 Subs', results[0]['content'])
69
         self.assertIn('1039 Subs', results[0]['content'])
69
         self.assertIn('Alternative Title', results[0]['content'])
70
         self.assertIn('Alternative Title', results[0]['content'])
70
 
71
 
72
+        dicto['language'] = 'pt-BR'
73
+        results = subtitleseeker.response(response)
74
+        self.assertEqual(results[0]['url'], 'http://this.is.the.url/Brazilian/')
75
+
71
         html = """
76
         html = """
72
         <div class="boxRows">
77
         <div class="boxRows">
73
             <div class="boxRowsInner" style="width:600px;">
78
             <div class="boxRowsInner" style="width:600px;">

+ 28
- 1
tests/unit/engines/test_swisscows.py Vedi File

10
         query = 'test_query'
10
         query = 'test_query'
11
         dicto = defaultdict(dict)
11
         dicto = defaultdict(dict)
12
         dicto['pageno'] = 1
12
         dicto['pageno'] = 1
13
-        dicto['language'] = 'de_DE'
13
+        dicto['language'] = 'de-DE'
14
         params = swisscows.request(query, dicto)
14
         params = swisscows.request(query, dicto)
15
         self.assertTrue('url' in params)
15
         self.assertTrue('url' in params)
16
         self.assertTrue(query in params['url'])
16
         self.assertTrue(query in params['url'])
126
         self.assertEqual(results[2]['url'], 'http://de.wikipedia.org/wiki/Datei:This should.svg')
126
         self.assertEqual(results[2]['url'], 'http://de.wikipedia.org/wiki/Datei:This should.svg')
127
         self.assertEqual(results[2]['img_src'], 'http://ts2.mm.This/should.png')
127
         self.assertEqual(results[2]['img_src'], 'http://ts2.mm.This/should.png')
128
         self.assertEqual(results[2]['template'], 'images.html')
128
         self.assertEqual(results[2]['template'], 'images.html')
129
+
130
+    def test_fetch_supported_languages(self):
131
+        html = """<html></html>"""
132
+        response = mock.Mock(text=html)
133
+        languages = swisscows._fetch_supported_languages(response)
134
+        self.assertEqual(type(languages), list)
135
+        self.assertEqual(len(languages), 0)
136
+
137
+        html = """
138
+        <html>
139
+            <div id="regions-popup">
140
+                <div>
141
+                    <ul>
142
+                        <li><a data-val="browser"></a></li>
143
+                        <li><a data-val="de-CH"></a></li>
144
+                        <li><a data-val="fr-CH"></a></li>
145
+                    </ul>
146
+                </div>
147
+            </div>
148
+        </html>
149
+        """
150
+        response = mock.Mock(text=html)
151
+        languages = swisscows._fetch_supported_languages(response)
152
+        self.assertEqual(type(languages), list)
153
+        self.assertEqual(len(languages), 3)
154
+        self.assertIn('de-CH', languages)
155
+        self.assertIn('fr-CH', languages)

+ 100
- 1
tests/unit/engines/test_wikipedia.py Vedi File

8
 class TestWikipediaEngine(SearxTestCase):
8
 class TestWikipediaEngine(SearxTestCase):
9
 
9
 
10
     def test_request(self):
10
     def test_request(self):
11
+        wikipedia.supported_languages = ['fr', 'en']
12
+
11
         query = 'test_query'
13
         query = 'test_query'
12
         dicto = defaultdict(dict)
14
         dicto = defaultdict(dict)
13
-        dicto['language'] = 'fr_FR'
15
+        dicto['language'] = 'fr-FR'
14
         params = wikipedia.request(query, dicto)
16
         params = wikipedia.request(query, dicto)
15
         self.assertIn('url', params)
17
         self.assertIn('url', params)
16
         self.assertIn(query, params['url'])
18
         self.assertIn(query, params['url'])
27
         params = wikipedia.request(query, dicto)
29
         params = wikipedia.request(query, dicto)
28
         self.assertIn('en', params['url'])
30
         self.assertIn('en', params['url'])
29
 
31
 
32
+        dicto['language'] = 'xx'
33
+        params = wikipedia.request(query, dicto)
34
+        self.assertIn('en', params['url'])
35
+
30
     def test_response(self):
36
     def test_response(self):
31
         dicto = defaultdict(dict)
37
         dicto = defaultdict(dict)
32
         dicto['language'] = 'fr'
38
         dicto['language'] = 'fr'
158
         self.assertEqual(len(results), 2)
164
         self.assertEqual(len(results), 2)
159
         self.assertEqual(results[1]['infobox'], u'披頭四樂隊')
165
         self.assertEqual(results[1]['infobox'], u'披頭四樂隊')
160
         self.assertIn(u'披头士乐队...', results[1]['content'])
166
         self.assertIn(u'披头士乐队...', results[1]['content'])
167
+
168
+    def test_fetch_supported_languages(self):
169
+        html = u"""<html></html>"""
170
+        response = mock.Mock(text=html)
171
+        languages = wikipedia._fetch_supported_languages(response)
172
+        self.assertEqual(type(languages), dict)
173
+        self.assertEqual(len(languages), 0)
174
+
175
+        html = u"""
176
+        <html>
177
+            <body>
178
+                <div>
179
+                    <div>
180
+                        <h3>Table header</h3>
181
+                        <table class="sortable jquery-tablesorter">
182
+                            <thead>
183
+                                <tr>
184
+                                    <th>N</th>
185
+                                    <th>Language</th>
186
+                                    <th>Language (local)</th>
187
+                                    <th>Wiki</th>
188
+                                    <th>Articles</th>
189
+                                </tr>
190
+                            </thead>
191
+                            <tbody>
192
+                                <tr>
193
+                                    <td>2</td>
194
+                                    <td><a>Swedish</a></td>
195
+                                    <td><a>Svenska</a></td>
196
+                                    <td><a>sv</a></td>
197
+                                    <td><a><b>3000000</b></a></td>
198
+                                </tr>
199
+                                <tr>
200
+                                    <td>3</td>
201
+                                    <td><a>Cebuano</a></td>
202
+                                    <td><a>Sinugboanong Binisaya</a></td>
203
+                                    <td><a>ceb</a></td>
204
+                                    <td><a><b>3000000</b></a></td>
205
+                                </tr>
206
+                            </tbody>
207
+                        </table>
208
+                        <h3>Table header</h3>
209
+                        <table class="sortable jquery-tablesorter">
210
+                            <thead>
211
+                                <tr>
212
+                                    <th>N</th>
213
+                                    <th>Language</th>
214
+                                    <th>Language (local)</th>
215
+                                    <th>Wiki</th>
216
+                                    <th>Articles</th>
217
+                                </tr>
218
+                            </thead>
219
+                            <tbody>
220
+                                <tr>
221
+                                    <td>2</td>
222
+                                    <td><a>Norwegian (Bokmål)</a></td>
223
+                                    <td><a>Norsk (Bokmål)</a></td>
224
+                                    <td><a>no</a></td>
225
+                                    <td><a><b>100000</b></a></td>
226
+                                </tr>
227
+                            </tbody>
228
+                        </table>
229
+                    </div>
230
+                </div>
231
+            </body>
232
+        </html>
233
+        """
234
+        response = mock.Mock(text=html)
235
+        languages = wikipedia._fetch_supported_languages(response)
236
+        self.assertEqual(type(languages), dict)
237
+        self.assertEqual(len(languages), 3)
238
+
239
+        self.assertIn('sv', languages)
240
+        self.assertIn('ceb', languages)
241
+        self.assertIn('no', languages)
242
+
243
+        self.assertEqual(type(languages['sv']), dict)
244
+        self.assertEqual(type(languages['ceb']), dict)
245
+        self.assertEqual(type(languages['no']), dict)
246
+
247
+        self.assertIn('name', languages['sv'])
248
+        self.assertIn('english_name', languages['sv'])
249
+        self.assertIn('articles', languages['sv'])
250
+
251
+        self.assertEqual(languages['sv']['name'], 'Svenska')
252
+        self.assertEqual(languages['sv']['english_name'], 'Swedish')
253
+        self.assertEqual(languages['sv']['articles'], 3000000)
254
+        self.assertEqual(languages['ceb']['name'], 'Sinugboanong Binisaya')
255
+        self.assertEqual(languages['ceb']['english_name'], 'Cebuano')
256
+        self.assertEqual(languages['ceb']['articles'], 3000000)
257
+        self.assertEqual(languages['no']['name'], u'Norsk (Bokmål)')
258
+        self.assertEqual(languages['no']['english_name'], u'Norwegian (Bokmål)')
259
+        self.assertEqual(languages['no']['articles'], 100000)

+ 30
- 0
tests/unit/engines/test_yahoo.py Vedi File

147
         results = yahoo.response(response)
147
         results = yahoo.response(response)
148
         self.assertEqual(type(results), list)
148
         self.assertEqual(type(results), list)
149
         self.assertEqual(len(results), 0)
149
         self.assertEqual(len(results), 0)
150
+
151
+    def test_fetch_supported_languages(self):
152
+        html = """<html></html>"""
153
+        response = mock.Mock(text=html)
154
+        results = yahoo._fetch_supported_languages(response)
155
+        self.assertEqual(type(results), list)
156
+        self.assertEqual(len(results), 0)
157
+
158
+        html = """
159
+        <html>
160
+            <div>
161
+                <div id="yschlang">
162
+                    <span>
163
+                        <label><input value="lang_ar"></input></label>
164
+                    </span>
165
+                    <span>
166
+                        <label><input value="lang_zh_chs"></input></label>
167
+                        <label><input value="lang_zh_cht"></input></label>
168
+                    </span>
169
+                </div>
170
+            </div>
171
+        </html>
172
+        """
173
+        response = mock.Mock(text=html)
174
+        languages = yahoo._fetch_supported_languages(response)
175
+        self.assertEqual(type(languages), list)
176
+        self.assertEqual(len(languages), 3)
177
+        self.assertIn('ar', languages)
178
+        self.assertIn('zh-chs', languages)
179
+        self.assertIn('zh-cht', languages)

+ 22
- 1
tests/unit/test_preferences.py Vedi File

1
-from searx.preferences import (EnumStringSetting, MapSetting, MissingArgumentException,
1
+from searx.preferences import (EnumStringSetting, MapSetting, MissingArgumentException, SearchLanguageSetting,
2
                                MultipleChoiceSetting, PluginsSetting, ValidationException)
2
                                MultipleChoiceSetting, PluginsSetting, ValidationException)
3
 from searx.testing import SearxTestCase
3
 from searx.testing import SearxTestCase
4
 
4
 
88
         setting.parse('2')
88
         setting.parse('2')
89
         self.assertEquals(setting.get_value(), ['2'])
89
         self.assertEquals(setting.get_value(), ['2'])
90
 
90
 
91
+    # search language settings
92
+    def test_lang_setting_valid_choice(self):
93
+        setting = SearchLanguageSetting('all', choices=['all', 'de', 'en'])
94
+        setting.parse('de')
95
+        self.assertEquals(setting.get_value(), 'de')
96
+
97
+    def test_lang_setting_invalid_choice(self):
98
+        setting = SearchLanguageSetting('all', choices=['all', 'de', 'en'])
99
+        setting.parse('xx')
100
+        self.assertEquals(setting.get_value(), 'all')
101
+
102
+    def test_lang_setting_old_cookie_choice(self):
103
+        setting = SearchLanguageSetting('all', choices=['all', 'es', 'es-ES'])
104
+        setting.parse('es_XA')
105
+        self.assertEquals(setting.get_value(), 'es')
106
+
107
+    def test_lang_setting_old_cookie_format(self):
108
+        setting = SearchLanguageSetting('all', choices=['all', 'es', 'es-ES'])
109
+        setting.parse('es_ES')
110
+        self.assertEquals(setting.get_value(), 'es-ES')
111
+
91
     # plugins settings
112
     # plugins settings
92
     def test_plugins_setting_all_default_enabled(self):
113
     def test_plugins_setting_all_default_enabled(self):
93
         plugin1 = PluginStub('plugin1', True)
114
         plugin1 = PluginStub('plugin1', True)

+ 171
- 0
utils/fetch_languages.py Vedi File

1
+# -*- coding: utf-8 -*-
2
+
3
+# This script generates languages.py from intersecting each engine's supported languages.
4
+#
5
+# The country names are obtained from http://api.geonames.org which requires registering as a user.
6
+#
7
+# Output files (engines_languages.json and languages.py)
8
+# are written in current directory to avoid overwriting in case something goes wrong.
9
+
10
+from requests import get
11
+from urllib import urlencode
12
+from lxml.html import fromstring
13
+from json import loads, dumps
14
+import io
15
+from sys import path
16
+path.append('../searx')  # noqa
17
+from searx.engines import engines
18
+
19
+# Geonames API for country names.
20
+geonames_user = ''  # ADD USER NAME HERE
21
+country_names_url = 'http://api.geonames.org/countryInfoJSON?{parameters}'
22
+
23
+# Output files.
24
+engines_languages_file = 'engines_languages.json'
25
+languages_file = 'languages.py'
26
+
27
+engines_languages = {}
28
+languages = {}
29
+
30
+
31
+# To filter out invalid codes and dialects.
32
+def valid_code(lang_code):
33
+    # filter invalid codes
34
+    # sl-SL is technically not invalid, but still a mistake
35
+    invalid_codes = ['sl-SL', 'wt-WT', 'jw']
36
+    invalid_countries = ['UK', 'XA', 'XL']
37
+    if lang_code[:2] == 'xx'\
38
+       or lang_code in invalid_codes\
39
+       or lang_code[-2:] in invalid_countries\
40
+       or is_dialect(lang_code):
41
+        return False
42
+
43
+    return True
44
+
45
+
46
+# Language codes with any additional tags other than language and country.
47
+def is_dialect(lang_code):
48
+    lang_code = lang_code.split('-')
49
+    if len(lang_code) > 2 or len(lang_code[0]) > 3:
50
+        return True
51
+    if len(lang_code) == 2 and len(lang_code[1]) > 2:
52
+        return True
53
+
54
+    return False
55
+
56
+
57
+# Get country name in specified language.
58
+def get_country_name(locale):
59
+    if geonames_user is '':
60
+        return ''
61
+
62
+    locale = locale.split('-')
63
+    if len(locale) != 2:
64
+        return ''
65
+
66
+    url = country_names_url.format(parameters=urlencode({'lang': locale[0],
67
+                                                         'country': locale[1],
68
+                                                         'username': geonames_user}))
69
+    response = get(url)
70
+    json = loads(response.text)
71
+    content = json.get('geonames', None)
72
+    if content is None or len(content) != 1:
73
+        print "No country name found for " + locale[0] + "-" + locale[1]
74
+        return ''
75
+
76
+    return content[0].get('countryName', '')
77
+
78
+
79
+# Fetchs supported languages for each engine and writes json file with those.
80
+def fetch_supported_languages():
81
+    for engine_name in engines:
82
+        if hasattr(engines[engine_name], 'fetch_supported_languages'):
83
+            try:
84
+                engines_languages[engine_name] = engines[engine_name].fetch_supported_languages()
85
+            except Exception as e:
86
+                print e
87
+
88
+    # write json file
89
+    with io.open(engines_languages_file, "w", encoding="utf-8") as f:
90
+        f.write(unicode(dumps(engines_languages, ensure_ascii=False, encoding="utf-8")))
91
+
92
+
93
+# Join all language lists.
94
+# Iterate all languages supported by each engine.
95
+def join_language_lists():
96
+    # include wikipedia first for more accurate language names
97
+    languages.update({code: lang for code, lang
98
+                      in engines_languages['wikipedia'].iteritems()
99
+                      if valid_code(code)})
100
+
101
+    for engine_name in engines_languages:
102
+        for locale in engines_languages[engine_name]:
103
+            if not valid_code(locale):
104
+                continue
105
+
106
+            # if language is not on list or if it has no name yet
107
+            if locale not in languages or not languages[locale].get('name'):
108
+                if isinstance(engines_languages[engine_name], dict):
109
+                    languages[locale] = engines_languages[engine_name][locale]
110
+                else:
111
+                    languages[locale] = {}
112
+
113
+    # get locales that have no name or country yet
114
+    for locale in languages.keys():
115
+        # try to get language names
116
+        if not languages[locale].get('name'):
117
+            name = languages.get(locale.split('-')[0], {}).get('name', None)
118
+            if name:
119
+                languages[locale]['name'] = name
120
+            else:
121
+                # filter out locales with no name
122
+                del languages[locale]
123
+                continue
124
+
125
+        # try to get language name in english
126
+        if not languages[locale].get('english_name'):
127
+            languages[locale]['english_name'] = languages.get(locale.split('-')[0], {}).get('english_name', '')
128
+
129
+        # try to get country name
130
+        if locale.find('-') > 0 and not languages[locale].get('country'):
131
+            languages[locale]['country'] = get_country_name(locale) or ''
132
+
133
+
134
+# Remove countryless language if language is featured in only one country.
135
+def filter_single_country_languages():
136
+    prev_lang = None
137
+    for code in sorted(languages):
138
+        lang = code.split('-')[0]
139
+        if lang == prev_lang:
140
+            countries += 1
141
+        else:
142
+            if prev_lang is not None and countries == 1:
143
+                del languages[prev_lang]
144
+            countries = 0
145
+            prev_lang = lang
146
+
147
+
148
+# Write languages.py.
149
+def write_languages_file():
150
+    new_file = open(languages_file, 'w')
151
+    file_content = '# -*- coding: utf-8 -*-\n'\
152
+                   + '# list of language codes\n'\
153
+                   + '# this file is generated automatically by utils/update_search_languages.py\n'\
154
+                   + '\nlanguage_codes = ('
155
+    for code in sorted(languages):
156
+        file_content += '\n    (u"' + code + '"'\
157
+                        + ', u"' + languages[code]['name'].split(' (')[0] + '"'\
158
+                        + ', u"' + languages[code].get('country', '') + '"'\
159
+                        + ', u"' + languages[code].get('english_name', '').split(' (')[0] + '"),'
160
+    # remove last comma
161
+    file_content = file_content[:-1]
162
+    file_content += '\n)\n'
163
+    new_file.write(file_content.encode('utf8'))
164
+    new_file.close()
165
+
166
+
167
+if __name__ == "__main__":
168
+    fetch_supported_languages()
169
+    join_language_lists()
170
+    filter_single_country_languages()
171
+    write_languages_file()