Browse Source

Merge pull request #748 from a01200356/languages

[mod] Allow users to search in most engine supported languages
Adam Tauber 8 years ago
parent
commit
9743bde25e
54 changed files with 983 additions and 153 deletions
  1. 1
    2
      AUTHORS.rst
  2. 3
    3
      searx/autocomplete.py
  3. 1
    0
      searx/data/engines_languages.json
  4. 14
    0
      searx/engines/__init__.py
  5. 3
    2
      searx/engines/archlinux.py
  6. 14
    1
      searx/engines/bing.py
  7. 2
    1
      searx/engines/bing_images.py
  8. 2
    1
      searx/engines/bing_news.py
  9. 22
    0
      searx/engines/dailymotion.py
  10. 33
    5
      searx/engines/duckduckgo.py
  11. 2
    1
      searx/engines/duckduckgo_definitions.py
  12. 19
    1
      searx/engines/gigablast.py
  13. 19
    1
      searx/engines/google.py
  14. 3
    1
      searx/engines/google_news.py
  15. 1
    1
      searx/engines/mediawiki.py
  16. 2
    2
      searx/engines/photon.py
  17. 1
    1
      searx/engines/qwant.py
  18. 1
    1
      searx/engines/startpage.py
  19. 10
    4
      searx/engines/subtitleseeker.py
  20. 19
    2
      searx/engines/swisscows.py
  21. 1
    1
      searx/engines/twitter.py
  22. 4
    2
      searx/engines/wikidata.py
  23. 27
    2
      searx/engines/wikipedia.py
  24. 1
    1
      searx/engines/yacy.py
  25. 21
    1
      searx/engines/yahoo.py
  26. 1
    1
      searx/engines/yahoo_news.py
  27. 4
    2
      searx/engines/yandex.py
  28. 1
    1
      searx/engines/youtube_api.py
  29. 129
    76
      searx/languages.py
  30. 21
    2
      searx/preferences.py
  31. 7
    4
      searx/query.py
  32. 5
    1
      searx/search.py
  33. 5
    0
      searx/static/plugins/js/search_on_category_select.js
  34. 3
    3
      searx/templates/courgette/preferences.html
  35. 3
    3
      searx/templates/legacy/preferences.html
  36. 1
    0
      searx/templates/oscar/advanced.html
  37. 12
    0
      searx/templates/oscar/languages.html
  38. 5
    6
      searx/templates/oscar/preferences.html
  39. 3
    3
      searx/templates/pix-art/preferences.html
  40. 8
    4
      searx/webapp.py
  41. 3
    3
      tests/robot/test_basic.robot
  42. 32
    0
      tests/unit/engines/test_bing.py
  43. 37
    0
      tests/unit/engines/test_dailymotion.py
  44. 26
    1
      tests/unit/engines/test_duckduckgo.py
  45. 4
    0
      tests/unit/engines/test_duckduckgo_definitions.py
  46. 31
    0
      tests/unit/engines/test_gigablast.py
  47. 58
    1
      tests/unit/engines/test_google.py
  48. 1
    1
      tests/unit/engines/test_qwant.py
  49. 6
    1
      tests/unit/engines/test_subtitleseeker.py
  50. 28
    1
      tests/unit/engines/test_swisscows.py
  51. 100
    1
      tests/unit/engines/test_wikipedia.py
  52. 30
    0
      tests/unit/engines/test_yahoo.py
  53. 22
    1
      tests/unit/test_preferences.py
  54. 171
    0
      utils/fetch_languages.py

+ 1
- 2
AUTHORS.rst View File

@@ -43,7 +43,7 @@ generally made searx better:
43 43
 - Kang-min Liu
44 44
 - Kirill Isakov
45 45
 - Guilhem Bonnefille
46
-- Marc Abonce Seguin
46
+- Marc Abonce Seguin @a01200356
47 47
 - @jibe-b
48 48
 - Christian Pietsch @pietsch
49 49
 - @Maxqia
@@ -55,7 +55,6 @@ generally made searx better:
55 55
 - Ammar Najjar @ammarnajjar
56 56
 - @stepshal
57 57
 - François Revol @mmuman
58
-- marc @a01200356
59 58
 - Harry Wood @harry-wood
60 59
 - Thomas Renard @threnard
61 60
 - Pydo `<https://github.com/pydo>`_

+ 3
- 3
searx/autocomplete.py View File

@@ -81,17 +81,17 @@ def searx_bang(full_query):
81 81
             engine_query = full_query.getSearchQuery()[1:]
82 82
 
83 83
             for lc in language_codes:
84
-                lang_id, lang_name, country = map(str.lower, lc)
84
+                lang_id, lang_name, country, english_name = map(str.lower, lc)
85 85
 
86 86
                 # check if query starts with language-id
87 87
                 if lang_id.startswith(engine_query):
88 88
                     if len(engine_query) <= 2:
89
-                        results.append(':{lang_id}'.format(lang_id=lang_id.split('_')[0]))
89
+                        results.append(':{lang_id}'.format(lang_id=lang_id.split('-')[0]))
90 90
                     else:
91 91
                         results.append(':{lang_id}'.format(lang_id=lang_id))
92 92
 
93 93
                 # check if query starts with language name
94
-                if lang_name.startswith(engine_query):
94
+                if lang_name.startswith(engine_query) or english_name.startswith(engine_query):
95 95
                     results.append(':{lang_name}'.format(lang_name=lang_name))
96 96
 
97 97
                 # check if query starts with country

+ 1
- 0
searx/data/engines_languages.json
File diff suppressed because it is too large
View File


+ 14
- 0
searx/engines/__init__.py View File

@@ -20,6 +20,8 @@ from os.path import realpath, dirname
20 20
 import sys
21 21
 from flask_babel import gettext
22 22
 from operator import itemgetter
23
+from json import loads
24
+from requests import get
23 25
 from searx import settings
24 26
 from searx import logger
25 27
 from searx.utils import load_module
@@ -33,10 +35,13 @@ engines = {}
33 35
 
34 36
 categories = {'general': []}
35 37
 
38
+languages = loads(open(engine_dir + '/../data/engines_languages.json').read())
39
+
36 40
 engine_shortcuts = {}
37 41
 engine_default_args = {'paging': False,
38 42
                        'categories': ['general'],
39 43
                        'language_support': True,
44
+                       'supported_languages': [],
40 45
                        'safesearch': False,
41 46
                        'timeout': settings['outgoing']['request_timeout'],
42 47
                        'shortcut': '-',
@@ -85,6 +90,15 @@ def load_engine(engine_data):
85 90
                          .format(engine.name, engine_attr))
86 91
             sys.exit(1)
87 92
 
93
+    # assign supported languages from json file
94
+    if engine_data['name'] in languages:
95
+        setattr(engine, 'supported_languages', languages[engine_data['name']])
96
+
97
+    # assign language fetching method if auxiliary method exists
98
+    if hasattr(engine, '_fetch_supported_languages'):
99
+        setattr(engine, 'fetch_supported_languages',
100
+                lambda: engine._fetch_supported_languages(get(engine.supported_languages_url)))
101
+
88 102
     engine.stats = {
89 103
         'result_count': 0,
90 104
         'search_count': 0,

+ 3
- 2
searx/engines/archlinux.py View File

@@ -29,8 +29,8 @@ xpath_link = './/div[@class="mw-search-result-heading"]/a'
29 29
 
30 30
 # cut 'en' from 'en_US', 'de' from 'de_CH', and so on
31 31
 def locale_to_lang_code(locale):
32
-    if locale.find('_') >= 0:
33
-        locale = locale.split('_')[0]
32
+    if locale.find('-') >= 0:
33
+        locale = locale.split('-')[0]
34 34
     return locale
35 35
 
36 36
 
@@ -95,6 +95,7 @@ main_langs = {
95 95
     'uk': 'Українська',
96 96
     'zh': '简体中文'
97 97
 }
98
+supported_languages = dict(lang_urls, **main_langs)
98 99
 
99 100
 
100 101
 # do search-request

+ 14
- 1
searx/engines/bing.py View File

@@ -21,6 +21,7 @@ from searx.engines.xpath import extract_text
21 21
 categories = ['general']
22 22
 paging = True
23 23
 language_support = True
24
+supported_languages_url = 'https://www.bing.com/account/general'
24 25
 
25 26
 # search-url
26 27
 base_url = 'https://www.bing.com/'
@@ -32,7 +33,7 @@ def request(query, params):
32 33
     offset = (params['pageno'] - 1) * 10 + 1
33 34
 
34 35
     if params['language'] != 'all':
35
-        query = u'language:{} {}'.format(params['language'].split('_')[0].upper(),
36
+        query = u'language:{} {}'.format(params['language'].split('-')[0].upper(),
36 37
                                          query.decode('utf-8')).encode('utf-8')
37 38
 
38 39
     search_path = search_string.format(
@@ -81,3 +82,15 @@ def response(resp):
81 82
 
82 83
     # return results
83 84
     return results
85
+
86
+
87
+# get supported languages from their site
88
+def _fetch_supported_languages(resp):
89
+    supported_languages = []
90
+    dom = html.fromstring(resp.text)
91
+    options = dom.xpath('//div[@id="limit-languages"]//input')
92
+    for option in options:
93
+        code = option.xpath('./@id')[0].replace('_', '-')
94
+        supported_languages.append(code)
95
+
96
+    return supported_languages

+ 2
- 1
searx/engines/bing_images.py View File

@@ -19,6 +19,7 @@ from urllib import urlencode
19 19
 from lxml import html
20 20
 from json import loads
21 21
 import re
22
+from searx.engines.bing import _fetch_supported_languages, supported_languages_url
22 23
 
23 24
 # engine dependent config
24 25
 categories = ['images']
@@ -53,7 +54,7 @@ def request(query, params):
53 54
     if params['language'] == 'all':
54 55
         language = 'en-US'
55 56
     else:
56
-        language = params['language'].replace('_', '-')
57
+        language = params['language']
57 58
 
58 59
     search_path = search_string.format(
59 60
         query=urlencode({'q': query}),

+ 2
- 1
searx/engines/bing_news.py View File

@@ -17,6 +17,7 @@ from datetime import datetime
17 17
 from dateutil import parser
18 18
 from lxml import etree
19 19
 from searx.utils import list_get
20
+from searx.engines.bing import _fetch_supported_languages, supported_languages_url
20 21
 
21 22
 # engine dependent config
22 23
 categories = ['news']
@@ -74,7 +75,7 @@ def request(query, params):
74 75
     if params['language'] == 'all':
75 76
         language = 'en-US'
76 77
     else:
77
-        language = params['language'].replace('_', '-')
78
+        language = params['language']
78 79
 
79 80
     params['url'] = _get_url(query, language, offset, params['time_range'])
80 81
 

+ 22
- 0
searx/engines/dailymotion.py View File

@@ -15,6 +15,7 @@
15 15
 from urllib import urlencode
16 16
 from json import loads
17 17
 from datetime import datetime
18
+from requests import get
18 19
 
19 20
 # engine dependent config
20 21
 categories = ['videos']
@@ -27,6 +28,8 @@ search_url = 'https://api.dailymotion.com/videos?fields=created_time,title,descr
27 28
 embedded_url = '<iframe frameborder="0" width="540" height="304" ' +\
28 29
     'data-src="//www.dailymotion.com/embed/video/{videoid}" allowfullscreen></iframe>'
29 30
 
31
+supported_languages_url = 'https://api.dailymotion.com/languages'
32
+
30 33
 
31 34
 # do search-request
32 35
 def request(query, params):
@@ -74,3 +77,22 @@ def response(resp):
74 77
 
75 78
     # return results
76 79
     return results
80
+
81
+
82
+# get supported languages from their site
83
+def _fetch_supported_languages(resp):
84
+    supported_languages = {}
85
+
86
+    response_json = loads(resp.text)
87
+
88
+    for language in response_json['list']:
89
+        supported_languages[language['code']] = {}
90
+
91
+        name = language['native_name']
92
+        if name:
93
+            supported_languages[language['code']]['name'] = name
94
+        english_name = language['name']
95
+        if english_name:
96
+            supported_languages[language['code']]['english_name'] = english_name
97
+
98
+    return supported_languages

+ 33
- 5
searx/engines/duckduckgo.py View File

@@ -15,13 +15,15 @@
15 15
 
16 16
 from urllib import urlencode
17 17
 from lxml.html import fromstring
18
+from requests import get
19
+from json import loads
18 20
 from searx.engines.xpath import extract_text
19
-from searx.languages import language_codes
20 21
 
21 22
 # engine dependent config
22 23
 categories = ['general']
23 24
 paging = True
24 25
 language_support = True
26
+supported_languages_url = 'https://duckduckgo.com/d2030.js'
25 27
 time_range_support = True
26 28
 
27 29
 # search-url
@@ -46,19 +48,31 @@ def request(query, params):
46 48
 
47 49
     offset = (params['pageno'] - 1) * 30
48 50
 
51
+    # custom fixes for languages
49 52
     if params['language'] == 'all':
50 53
         locale = None
54
+    elif params['language'][:2] == 'ja':
55
+        locale = 'jp-jp'
56
+    elif params['language'][:2] == 'sl':
57
+        locale = 'sl-sl'
58
+    elif params['language'] == 'zh-TW':
59
+        locale = 'tw-tzh'
60
+    elif params['language'] == 'zh-HK':
61
+        locale = 'hk-tzh'
62
+    elif params['language'][-2:] == 'SA':
63
+        locale = 'xa-' + params['language'].split('-')[0]
64
+    elif params['language'][-2:] == 'GB':
65
+        locale = 'uk-' + params['language'].split('-')[0]
51 66
     else:
52
-        locale = params['language'].split('_')
67
+        locale = params['language'].split('-')
53 68
         if len(locale) == 2:
54 69
             # country code goes first
55 70
             locale = locale[1].lower() + '-' + locale[0].lower()
56 71
         else:
57 72
             # tries to get a country code from language
58 73
             locale = locale[0].lower()
59
-            lang_codes = [x[0] for x in language_codes]
60
-            for lc in lang_codes:
61
-                lc = lc.split('_')
74
+            for lc in supported_languages:
75
+                lc = lc.split('-')
62 76
                 if locale == lc[0]:
63 77
                     locale = lc[1].lower() + '-' + lc[0].lower()
64 78
                     break
@@ -102,3 +116,17 @@ def response(resp):
102 116
 
103 117
     # return results
104 118
     return results
119
+
120
+
121
+# get supported languages from their site
122
+def _fetch_supported_languages(resp):
123
+
124
+    # response is a js file with regions as an embedded object
125
+    response_page = resp.text
126
+    response_page = response_page[response_page.find('regions:{') + 8:]
127
+    response_page = response_page[:response_page.find('}') + 1]
128
+
129
+    regions_json = loads(response_page)
130
+    supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())
131
+
132
+    return supported_languages

+ 2
- 1
searx/engines/duckduckgo_definitions.py View File

@@ -4,6 +4,7 @@ from re import compile, sub
4 4
 from lxml import html
5 5
 from searx.utils import html_to_text
6 6
 from searx.engines.xpath import extract_text
7
+from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
7 8
 
8 9
 url = 'https://api.duckduckgo.com/'\
9 10
     + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
@@ -23,7 +24,7 @@ def result_to_text(url, text, htmlResult):
23 24
 
24 25
 def request(query, params):
25 26
     params['url'] = url.format(query=urlencode({'q': query}))
26
-    params['headers']['Accept-Language'] = params['language']
27
+    params['headers']['Accept-Language'] = params['language'].split('-')[0]
27 28
     return params
28 29
 
29 30
 

+ 19
- 1
searx/engines/gigablast.py View File

@@ -14,6 +14,7 @@ from json import loads
14 14
 from random import randint
15 15
 from time import time
16 16
 from urllib import urlencode
17
+from lxml.html import fromstring
17 18
 
18 19
 # engine dependent config
19 20
 categories = ['general']
@@ -40,6 +41,8 @@ url_xpath = './/url'
40 41
 title_xpath = './/title'
41 42
 content_xpath = './/sum'
42 43
 
44
+supported_languages_url = 'https://gigablast.com/search?&rxikd=1'
45
+
43 46
 
44 47
 # do search-request
45 48
 def request(query, params):
@@ -48,7 +51,9 @@ def request(query, params):
48 51
     if params['language'] == 'all':
49 52
         language = 'xx'
50 53
     else:
51
-        language = params['language'][0:2]
54
+        language = params['language'].replace('-', '_').lower()
55
+        if language.split('-')[0] != 'zh':
56
+            language = language.split('-')[0]
52 57
 
53 58
     if params['safesearch'] >= 1:
54 59
         safesearch = 1
@@ -82,3 +87,16 @@ def response(resp):
82 87
 
83 88
     # return results
84 89
     return results
90
+
91
+
92
+# get supported languages from their site
93
+def _fetch_supported_languages(resp):
94
+    supported_languages = []
95
+    dom = fromstring(resp.text)
96
+    links = dom.xpath('//span[@id="menu2"]/a')
97
+    for link in links:
98
+        code = link.xpath('./@href')[0][-2:]
99
+        if code != 'xx' and code not in supported_languages:
100
+            supported_languages.append(code)
101
+
102
+    return supported_languages

+ 19
- 1
searx/engines/google.py View File

@@ -103,6 +103,7 @@ map_hostname_start = 'maps.google.'
103 103
 maps_path = '/maps'
104 104
 redirect_path = '/url'
105 105
 images_path = '/images'
106
+supported_languages_url = 'https://www.google.com/preferences?#languages'
106 107
 
107 108
 # specific xpath variables
108 109
 results_xpath = '//div[@class="g"]'
@@ -167,8 +168,12 @@ def request(query, params):
167 168
         language = 'en'
168 169
         country = 'US'
169 170
         url_lang = ''
171
+    elif params['language'][:2] == 'jv':
172
+        language = 'jw'
173
+        country = 'ID'
174
+        url_lang = 'lang_jw'
170 175
     else:
171
-        language_array = params['language'].lower().split('_')
176
+        language_array = params['language'].lower().split('-')
172 177
         if len(language_array) == 2:
173 178
             country = language_array[1]
174 179
         else:
@@ -355,3 +360,16 @@ def attributes_to_html(attributes):
355 360
         retval = retval + '<tr><th>' + a.get('label') + '</th><td>' + value + '</td></tr>'
356 361
     retval = retval + '</table>'
357 362
     return retval
363
+
364
+
365
+# get supported languages from their site
366
+def _fetch_supported_languages(resp):
367
+    supported_languages = {}
368
+    dom = html.fromstring(resp.text)
369
+    options = dom.xpath('//table//td/font/label/span')
370
+    for option in options:
371
+        code = option.xpath('./@id')[0][1:]
372
+        name = option.text.title()
373
+        supported_languages[code] = {"name": name}
374
+
375
+    return supported_languages

+ 3
- 1
searx/engines/google_news.py View File

@@ -12,6 +12,8 @@
12 12
 
13 13
 from lxml import html
14 14
 from urllib import urlencode
15
+from json import loads
16
+from searx.engines.google import _fetch_supported_languages, supported_languages_url
15 17
 
16 18
 # search-url
17 19
 categories = ['news']
@@ -50,7 +52,7 @@ def request(query, params):
50 52
                                       search_options=urlencode(search_options))
51 53
 
52 54
     if params['language'] != 'all':
53
-        language_array = params['language'].lower().split('_')
55
+        language_array = params['language'].lower().split('-')
54 56
         params['url'] += '&lr=lang_' + language_array[0]
55 57
 
56 58
     return params

+ 1
- 1
searx/engines/mediawiki.py View File

@@ -46,7 +46,7 @@ def request(query, params):
46 46
     if params['language'] == 'all':
47 47
         language = 'en'
48 48
     else:
49
-        language = params['language'].split('_')[0]
49
+        language = params['language'].split('-')[0]
50 50
 
51 51
     # format_string [('https://', 'language', '', None), ('.wikipedia.org/', None, None, None)]
52 52
     if any(x[1] == 'language' for x in format_strings):

+ 2
- 2
searx/engines/photon.py View File

@@ -26,7 +26,7 @@ search_string = 'api/?{query}&limit={limit}'
26 26
 result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
27 27
 
28 28
 # list of supported languages
29
-allowed_languages = ['de', 'en', 'fr', 'it']
29
+supported_languages = ['de', 'en', 'fr', 'it']
30 30
 
31 31
 
32 32
 # do search-request
@@ -37,7 +37,7 @@ def request(query, params):
37 37
 
38 38
     if params['language'] != 'all':
39 39
         language = params['language'].split('_')[0]
40
-        if language in allowed_languages:
40
+        if language in supported_languages:
41 41
             params['url'] = params['url'] + "&lang=" + language
42 42
 
43 43
     # using searx User-Agent

+ 1
- 1
searx/engines/qwant.py View File

@@ -46,7 +46,7 @@ def request(query, params):
46 46
 
47 47
     # add language tag if specified
48 48
     if params['language'] != 'all':
49
-        params['url'] += '&locale=' + params['language'].lower()
49
+        params['url'] += '&locale=' + params['language'].replace('-', '_').lower()
50 50
 
51 51
     return params
52 52
 

+ 1
- 1
searx/engines/startpage.py View File

@@ -47,7 +47,7 @@ def request(query, params):
47 47
 
48 48
     # set language if specified
49 49
     if params['language'] != 'all':
50
-        params['data']['with_language'] = ('lang_' + params['language'].split('_')[0])
50
+        params['data']['with_language'] = ('lang_' + params['language'].split('-')[0])
51 51
 
52 52
     return params
53 53
 

+ 10
- 4
searx/engines/subtitleseeker.py View File

@@ -22,7 +22,7 @@ language = ""
22 22
 
23 23
 # search-url
24 24
 url = 'http://www.subtitleseeker.com/'
25
-search_url = url + 'search/TITLES/{query}&p={pageno}'
25
+search_url = url + 'search/TITLES/{query}?p={pageno}'
26 26
 
27 27
 # specific xpath variables
28 28
 results_xpath = '//div[@class="boxRows"]'
@@ -43,10 +43,16 @@ def response(resp):
43 43
 
44 44
     search_lang = ""
45 45
 
46
-    if resp.search_params['language'] != 'all':
47
-        search_lang = [lc[1]
46
+    # dirty fix for languages named differenly in their site
47
+    if resp.search_params['language'][:2] == 'fa':
48
+        search_lang = 'Farsi'
49
+    elif resp.search_params['language'] == 'pt-BR':
50
+        search_lang = 'Brazilian'
51
+    elif resp.search_params['language'] != 'all':
52
+        search_lang = [lc[3]
48 53
                        for lc in language_codes
49
-                       if lc[0][:2] == resp.search_params['language'].split('_')[0]][0]
54
+                       if lc[0].split('-')[0] == resp.search_params['language'].split('-')[0]]
55
+        search_lang = search_lang[0].split(' (')[0]
50 56
 
51 57
     # parse results
52 58
     for result in dom.xpath(results_xpath):

+ 19
- 2
searx/engines/swisscows.py View File

@@ -13,6 +13,7 @@
13 13
 from json import loads
14 14
 from urllib import urlencode, unquote
15 15
 import re
16
+from lxml.html import fromstring
16 17
 
17 18
 # engine dependent config
18 19
 categories = ['general', 'images']
@@ -23,6 +24,8 @@ language_support = True
23 24
 base_url = 'https://swisscows.ch/'
24 25
 search_string = '?{query}&page={page}'
25 26
 
27
+supported_languages_url = base_url
28
+
26 29
 # regex
27 30
 regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
28 31
 regex_json_remove_start = re.compile(r'^initialData:\s*')
@@ -35,9 +38,11 @@ def request(query, params):
35 38
     if params['language'] == 'all':
36 39
         ui_language = 'browser'
37 40
         region = 'browser'
41
+    elif params['language'].split('-')[0] == 'no':
42
+        region = 'nb-NO'
38 43
     else:
39
-        region = params['language'].replace('_', '-')
40
-        ui_language = params['language'].split('_')[0]
44
+        region = params['language']
45
+        ui_language = params['language'].split('-')[0]
41 46
 
42 47
     search_path = search_string.format(
43 48
         query=urlencode({'query': query,
@@ -106,3 +111,15 @@ def response(resp):
106 111
 
107 112
     # return results
108 113
     return results
114
+
115
+
116
+# get supported languages from their site
117
+def _fetch_supported_languages(resp):
118
+    supported_languages = []
119
+    dom = fromstring(resp.text)
120
+    options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
121
+    for option in options:
122
+        code = option.xpath('./@data-val')[0]
123
+        supported_languages.append(code)
124
+
125
+    return supported_languages

+ 1
- 1
searx/engines/twitter.py View File

@@ -40,7 +40,7 @@ def request(query, params):
40 40
 
41 41
     # set language if specified
42 42
     if params['language'] != 'all':
43
-        params['cookies']['lang'] = params['language'].split('_')[0]
43
+        params['cookies']['lang'] = params['language'].split('-')[0]
44 44
     else:
45 45
         params['cookies']['lang'] = 'en'
46 46
 

+ 4
- 2
searx/engines/wikidata.py View File

@@ -14,6 +14,8 @@
14 14
 from searx import logger
15 15
 from searx.poolrequests import get
16 16
 from searx.engines.xpath import extract_text
17
+from searx.utils import format_date_by_locale
18
+from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
17 19
 
18 20
 from json import loads
19 21
 from lxml.html import fromstring
@@ -55,7 +57,7 @@ calendar_name_xpath = './/sup[contains(@class,"wb-calendar-name")]'
55 57
 
56 58
 
57 59
 def request(query, params):
58
-    language = params['language'].split('_')[0]
60
+    language = params['language'].split('-')[0]
59 61
     if language == 'all':
60 62
         language = 'en'
61 63
 
@@ -70,7 +72,7 @@ def response(resp):
70 72
     html = fromstring(resp.content)
71 73
     wikidata_ids = html.xpath(wikidata_ids_xpath)
72 74
 
73
-    language = resp.search_params['language'].split('_')[0]
75
+    language = resp.search_params['language'].split('-')[0]
74 76
     if language == 'all':
75 77
         language = 'en'
76 78
 

+ 27
- 2
searx/engines/wikipedia.py View File

@@ -12,6 +12,8 @@
12 12
 
13 13
 from json import loads
14 14
 from urllib import urlencode, quote
15
+from lxml.html import fromstring
16
+
15 17
 
16 18
 # search-url
17 19
 base_url = 'https://{language}.wikipedia.org/'
@@ -24,14 +26,16 @@ search_postfix = 'w/api.php?'\
24 26
     '&explaintext'\
25 27
     '&pithumbsize=300'\
26 28
     '&redirects'
29
+supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
27 30
 
28 31
 
29 32
 # set language in base_url
30 33
 def url_lang(lang):
31
-    if lang == 'all':
34
+    lang = lang.split('-')[0]
35
+    if lang == 'all' or lang not in supported_languages:
32 36
         language = 'en'
33 37
     else:
34
-        language = lang.split('_')[0]
38
+        language = lang
35 39
 
36 40
     return base_url.format(language=language)
37 41
 
@@ -111,3 +115,24 @@ def response(resp):
111 115
                     'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}]})
112 116
 
113 117
     return results
118
+
119
+
120
+# get supported languages from their site
121
+def _fetch_supported_languages(resp):
122
+    supported_languages = {}
123
+    dom = fromstring(resp.text)
124
+    tables = dom.xpath('//table[contains(@class,"sortable")]')
125
+    for table in tables:
126
+        # exclude header row
127
+        trs = table.xpath('.//tr')[1:]
128
+        for tr in trs:
129
+            td = tr.xpath('./td')
130
+            code = td[3].xpath('./a')[0].text
131
+            name = td[2].xpath('./a')[0].text
132
+            english_name = td[1].xpath('./a')[0].text
133
+            articles = int(td[4].xpath('./a/b')[0].text.replace(',', ''))
134
+            # exclude languages with too few articles
135
+            if articles >= 100000:
136
+                supported_languages[code] = {"name": name, "english_name": english_name, "articles": articles}
137
+
138
+    return supported_languages

+ 1
- 1
searx/engines/yacy.py View File

@@ -53,7 +53,7 @@ def request(query, params):
53 53
 
54 54
     # add language tag if specified
55 55
     if params['language'] != 'all':
56
-        params['url'] += '&lr=lang_' + params['language'].split('_')[0]
56
+        params['url'] += '&lr=lang_' + params['language'].split('-')[0]
57 57
 
58 58
     return params
59 59
 

+ 21
- 1
searx/engines/yahoo.py View File

@@ -27,6 +27,8 @@ base_url = 'https://search.yahoo.com/'
27 27
 search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
28 28
 search_url_with_time = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}&age={age}&btf={btf}&fr2=time'
29 29
 
30
+supported_languages_url = 'https://search.yahoo.com/web/advanced'
31
+
30 32
 # specific xpath variables
31 33
 results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]"
32 34
 url_xpath = './/h3/a/@href'
@@ -72,7 +74,13 @@ def _get_url(query, offset, language, time_range):
72 74
 def _get_language(params):
73 75
     if params['language'] == 'all':
74 76
         return 'en'
75
-    return params['language'].split('_')[0]
77
+    elif params['language'][:2] == 'zh':
78
+        if params['language'] == 'zh' or params['language'] == 'zh-CH':
79
+            return 'szh'
80
+        else:
81
+            return 'tzh'
82
+    else:
83
+        return params['language'].split('-')[0]
76 84
 
77 85
 
78 86
 # do search-request
@@ -132,3 +140,15 @@ def response(resp):
132 140
 
133 141
     # return results
134 142
     return results
143
+
144
+
145
+# get supported languages from their site
146
+def _fetch_supported_languages(resp):
147
+    supported_languages = []
148
+    dom = html.fromstring(resp.text)
149
+    options = dom.xpath('//div[@id="yschlang"]/span/label/input')
150
+    for option in options:
151
+        code = option.xpath('./@value')[0][5:].replace('_', '-')
152
+        supported_languages.append(code)
153
+
154
+    return supported_languages

+ 1
- 1
searx/engines/yahoo_news.py View File

@@ -12,7 +12,7 @@
12 12
 from urllib import urlencode
13 13
 from lxml import html
14 14
 from searx.engines.xpath import extract_text, extract_url
15
-from searx.engines.yahoo import parse_url
15
+from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url
16 16
 from datetime import datetime, timedelta
17 17
 import re
18 18
 from dateutil import parser

+ 4
- 2
searx/engines/yandex.py View File

@@ -22,7 +22,9 @@ language_support = True  # TODO
22 22
 
23 23
 default_tld = 'com'
24 24
 language_map = {'ru': 'ru',
25
-                'ua': 'uk',
25
+                'ua': 'ua',
26
+                'be': 'by',
27
+                'kk': 'kz',
26 28
                 'tr': 'com.tr'}
27 29
 
28 30
 # search-url
@@ -36,7 +38,7 @@ content_xpath = './/div[@class="text-container typo typo_text_m typo_line_m orga
36 38
 
37 39
 
38 40
 def request(query, params):
39
-    lang = params['language'].split('_')[0]
41
+    lang = params['language'].split('-')[0]
40 42
     host = base_url.format(tld=language_map.get(lang) or default_tld)
41 43
     params['url'] = host + search_url.format(page=params['pageno'] - 1,
42 44
                                              query=urlencode({'text': query}))

+ 1
- 1
searx/engines/youtube_api.py View File

@@ -36,7 +36,7 @@ def request(query, params):
36 36
 
37 37
     # add language tag if specified
38 38
     if params['language'] != 'all':
39
-        params['url'] += '&relevanceLanguage=' + params['language'].split('_')[0]
39
+        params['url'] += '&relevanceLanguage=' + params['language'].split('-')[0]
40 40
 
41 41
     return params
42 42
 

+ 129
- 76
searx/languages.py View File

@@ -1,78 +1,131 @@
1
-'''
2
-searx is free software: you can redistribute it and/or modify
3
-it under the terms of the GNU Affero General Public License as published by
4
-the Free Software Foundation, either version 3 of the License, or
5
-(at your option) any later version.
6
-
7
-searx is distributed in the hope that it will be useful,
8
-but WITHOUT ANY WARRANTY; without even the implied warranty of
9
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
-GNU Affero General Public License for more details.
11
-
12
-You should have received a copy of the GNU Affero General Public License
13
-along with searx. If not, see < http://www.gnu.org/licenses/ >.
14
-
15
-(C) 2013- by Adam Tauber, <asciimoo@gmail.com>
16
-'''
17
-
1
+# -*- coding: utf-8 -*-
18 2
 # list of language codes
3
+# this file is generated automatically by utils/update_search_languages.py
4
+
19 5
 language_codes = (
20
-    ("ar_XA", "Arabic", "Arabia"),
21
-    ("bg_BG", "Bulgarian", "Bulgaria"),
22
-    ("cs_CZ", "Czech", "Czech Republic"),
23
-    ("da_DK", "Danish", "Denmark"),
24
-    ("de_AT", "German", "Austria"),
25
-    ("de_CH", "German", "Switzerland"),
26
-    ("de_DE", "German", "Germany"),
27
-    ("el_GR", "Greek", "Greece"),
28
-    ("en_AU", "English", "Australia"),
29
-    ("en_CA", "English", "Canada"),
30
-    ("en_GB", "English", "United Kingdom"),
31
-    ("en_ID", "English", "Indonesia"),
32
-    ("en_IE", "English", "Ireland"),
33
-    ("en_IN", "English", "India"),
34
-    ("en_MY", "English", "Malaysia"),
35
-    ("en_NZ", "English", "New Zealand"),
36
-    ("en_PH", "English", "Philippines"),
37
-    ("en_SG", "English", "Singapore"),
38
-    ("en_US", "English", "United States"),
39
-    ("en_XA", "English", "Arabia"),
40
-    ("en_ZA", "English", "South Africa"),
41
-    ("es_AR", "Spanish", "Argentina"),
42
-    ("es_CL", "Spanish", "Chile"),
43
-    ("es_ES", "Spanish", "Spain"),
44
-    ("es_MX", "Spanish", "Mexico"),
45
-    ("es_US", "Spanish", "United States"),
46
-    ("es_XL", "Spanish", "Latin America"),
47
-    ("et_EE", "Estonian", "Estonia"),
48
-    ("fi_FI", "Finnish", "Finland"),
49
-    ("fr_BE", "French", "Belgium"),
50
-    ("fr_CA", "French", "Canada"),
51
-    ("fr_CH", "French", "Switzerland"),
52
-    ("fr_FR", "French", "France"),
53
-    ("he_IL", "Hebrew", "Israel"),
54
-    ("hr_HR", "Croatian", "Croatia"),
55
-    ("hu_HU", "Hungarian", "Hungary"),
56
-    ("it_IT", "Italian", "Italy"),
57
-    ("ja_JP", "Japanese", "Japan"),
58
-    ("ko_KR", "Korean", "Korea"),
59
-    ("lt_LT", "Lithuanian", "Lithuania"),
60
-    ("lv_LV", "Latvian", "Latvia"),
61
-    ("nb_NO", "Norwegian", "Norway"),
62
-    ("nl_BE", "Dutch", "Belgium"),
63
-    ("nl_NL", "Dutch", "Netherlands"),
64
-    ("oc_OC", "Occitan", "Occitan"),
65
-    ("pl_PL", "Polish", "Poland"),
66
-    ("pt_BR", "Portuguese", "Brazil"),
67
-    ("pt_PT", "Portuguese", "Portugal"),
68
-    ("ro_RO", "Romanian", "Romania"),
69
-    ("ru_RU", "Russian", "Russia"),
70
-    ("sk_SK", "Slovak", "Slovak Republic"),
71
-    ("sl_SL", "Slovenian", "Slovenia"),
72
-    ("sv_SE", "Swedish", "Sweden"),
73
-    ("th_TH", "Thai", "Thailand"),
74
-    ("tr_TR", "Turkish", "Turkey"),
75
-    ("uk_UA", "Ukrainian", "Ukraine"),
76
-    ("zh_CN", "Chinese", "China"),
77
-    ("zh_HK", "Chinese", "Hong Kong SAR"),
78
-    ("zh_TW", "Chinese", "Taiwan"))
6
+    (u"af", u"Afrikaans", u"", u""),
7
+    (u"am", u"አማርኛ", u"", u"Amharic"),
8
+    (u"ar-SA", u"العربية", u"المملكة العربية السعودية", u"Arabic"),
9
+    (u"az", u"Azərbaycanca", u"", u"Azerbaijani"),
10
+    (u"be", u"Беларуская", u"", u"Belarusian"),
11
+    (u"bg-BG", u"Български", u"България", u"Bulgarian"),
12
+    (u"bn", u"বাংলা", u"", u"Bengali"),
13
+    (u"br", u"Brezhoneg", u"", u"Breton"),
14
+    (u"bs", u"Bosnian", u"", u"Bosnian"),
15
+    (u"ca", u"Català", u"", u"Catalan"),
16
+    (u"ca-CT", u"Català", u"", u"Catalan"),
17
+    (u"ca-ES", u"Català", u"Espanya", u"Catalan"),
18
+    (u"ce", u"Нохчийн", u"", u"Chechen"),
19
+    (u"ceb", u"Sinugboanong Binisaya", u"", u"Cebuano"),
20
+    (u"cs-CZ", u"Čeština", u"Česko", u"Czech"),
21
+    (u"cy", u"Cymraeg", u"", u"Welsh"),
22
+    (u"da-DK", u"Dansk", u"Danmark", u"Danish"),
23
+    (u"de", u"Deutsch", u"", u"German"),
24
+    (u"de-AT", u"Deutsch", u"Österreich", u"German"),
25
+    (u"de-CH", u"Deutsch", u"Schweiz", u"German"),
26
+    (u"de-DE", u"Deutsch", u"Deutschland", u"German"),
27
+    (u"el-GR", u"Ελληνικά", u"Ελλάδα", u"Greek"),
28
+    (u"en", u"English", u"", u"English"),
29
+    (u"en-AU", u"English", u"Australia", u"English"),
30
+    (u"en-CA", u"English", u"Canada", u"English"),
31
+    (u"en-GB", u"English", u"United Kingdom", u"English"),
32
+    (u"en-ID", u"English", u"Indonesia", u"English"),
33
+    (u"en-IE", u"English", u"Ireland", u"English"),
34
+    (u"en-IN", u"English", u"India", u"English"),
35
+    (u"en-MY", u"English", u"Malaysia", u"English"),
36
+    (u"en-NZ", u"English", u"New Zealand", u"English"),
37
+    (u"en-PH", u"English", u"Philippines", u"English"),
38
+    (u"en-SG", u"English", u"Singapore", u"English"),
39
+    (u"en-US", u"English", u"United States", u"English"),
40
+    (u"en-ZA", u"English", u"South Africa", u"English"),
41
+    (u"eo", u"Esperanto", u"", u"Esperanto"),
42
+    (u"es", u"Español", u"", u"Spanish"),
43
+    (u"es-AR", u"Español", u"Argentina", u"Spanish"),
44
+    (u"es-CL", u"Español", u"Chile", u"Spanish"),
45
+    (u"es-CO", u"Español", u"Colombia", u"Spanish"),
46
+    (u"es-ES", u"Español", u"España", u"Spanish"),
47
+    (u"es-MX", u"Español", u"México", u"Spanish"),
48
+    (u"es-PE", u"Español", u"Perú", u"Spanish"),
49
+    (u"es-US", u"Español", u"Estados Unidos", u"Spanish"),
50
+    (u"et-EE", u"Eesti", u"Eesti", u"Estonian"),
51
+    (u"eu", u"Euskara", u"", u"Basque"),
52
+    (u"fa", u"فارسی", u"", u"Persian"),
53
+    (u"fi-FI", u"Suomi", u"Suomi", u"Finnish"),
54
+    (u"fr", u"Français", u"", u"French"),
55
+    (u"fr-BE", u"Français", u"Belgique", u"French"),
56
+    (u"fr-CA", u"Français", u"Canada", u"French"),
57
+    (u"fr-CH", u"Français", u"Suisse", u"French"),
58
+    (u"fr-FR", u"Français", u"France", u"French"),
59
+    (u"ga", u"Gaeilge", u"", u"Irish"),
60
+    (u"gl", u"Galego", u"", u"Galician"),
61
+    (u"gu", u"ગુજરાતી", u"", u"Gujarati"),
62
+    (u"he-IL", u"עברית", u"ישראל", u"Hebrew"),
63
+    (u"hi", u"हिन्दी", u"", u"Hindi"),
64
+    (u"hr-HR", u"Hrvatski", u"Hrvatska", u"Croatian"),
65
+    (u"hu-HU", u"Magyar", u"Magyarország", u"Hungarian"),
66
+    (u"hy", u"Հայերեն", u"", u"Armenian"),
67
+    (u"id-ID", u"Bahasa Indonesia", u"Indonesia", u"Indonesian"),
68
+    (u"is", u"Íslenska", u"", u""),
69
+    (u"it", u"Italiano", u"", u"Italian"),
70
+    (u"it-CH", u"Italiano", u"Svizzera", u"Italian"),
71
+    (u"it-IT", u"Italiano", u"Italia", u"Italian"),
72
+    (u"iw", u"עברית", u"", u""),
73
+    (u"ja-JP", u"日本語", u"日本", u"Japanese"),
74
+    (u"ka", u"ქართული", u"", u"Georgian"),
75
+    (u"kk", u"Қазақша", u"", u"Kazakh"),
76
+    (u"kn", u"ಕನ್ನಡ", u"", u"Kannada"),
77
+    (u"ko-KR", u"한국어", u"대한민국", u"Korean"),
78
+    (u"la", u"Latina", u"", u"Latin"),
79
+    (u"lt-LT", u"Lietuvių", u"Lietuva", u"Lithuanian"),
80
+    (u"lv-LV", u"Latviešu", u"Latvijas Republika", u""),
81
+    (u"mi", u"Reo Māori", u"", u"Maori"),
82
+    (u"min", u"Minangkabau", u"", u"Minangkabau"),
83
+    (u"mk", u"Македонски", u"", u"Macedonian"),
84
+    (u"mn", u"Монгол", u"", u"Mongolian"),
85
+    (u"mr", u"मराठी", u"", u"Marathi"),
86
+    (u"ms-MY", u"Bahasa Melayu", u"Malaysia", u"Malay"),
87
+    (u"mt", u"Malti", u"", u"Maltese"),
88
+    (u"nb-NO", u"Norwegian Bokmål", u"Norge", u"Norwegian Bokmål"),
89
+    (u"nl", u"Nederlands", u"", u"Dutch"),
90
+    (u"nl-BE", u"Nederlands", u"België", u"Dutch"),
91
+    (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
92
+    (u"nn", u"Nynorsk", u"", u"Norwegian"),
93
+    (u"no-NO", u"Norsk", u"Norge", u"Norwegian"),
94
+    (u"oc", u"Occitan", u"", u"Occitan"),
95
+    (u"or", u"Oriya", u"", u"Oriya"),
96
+    (u"pa", u"ਪੰਜਾਬੀ", u"", u"Panjabi"),
97
+    (u"pl-PL", u"Polski", u"Rzeczpospolita Polska", u"Polish"),
98
+    (u"ps", u"Pushto", u"", u"Pushto"),
99
+    (u"pt", u"Português", u"", u"Portuguese"),
100
+    (u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
101
+    (u"pt-PT", u"Português", u"Portugal", u"Portuguese"),
102
+    (u"ro-RO", u"Română", u"România", u"Romanian"),
103
+    (u"ru-RU", u"Русский", u"Россия", u"Russian"),
104
+    (u"rw", u"Ikinyarwanda", u"", u"Kinyarwanda"),
105
+    (u"sh", u"Srpskohrvatski / Српскохрватски", u"", u"Serbo-Croatian"),
106
+    (u"sk-SK", u"Slovenčina", u"Slovenská republika", u"Slovak"),
107
+    (u"sl", u"Slovenščina", u"", u"Slovenian"),
108
+    (u"sr", u"Српски / Srpski", u"", u"Serbian"),
109
+    (u"sv-SE", u"Svenska", u"Sverige", u"Swedish"),
110
+    (u"sw", u"Kiswahili", u"", u""),
111
+    (u"ta", u"தமிழ்", u"", u"Tamil"),
112
+    (u"th-TH", u"ไทย", u"ไทย", u"Thai"),
113
+    (u"ti", u"ትግርኛ", u"", u"Tigrinya"),
114
+    (u"tl-PH", u"Filipino", u"Pilipinas", u""),
115
+    (u"tr-TR", u"Türkçe", u"Türkiye", u"Turkish"),
116
+    (u"tt", u"Татарча", u"", u"Tatar"),
117
+    (u"uk-UA", u"Українська", u"Україна", u"Ukrainian"),
118
+    (u"ur", u"اردو", u"", u"Urdu"),
119
+    (u"uz", u"O‘zbek", u"", u"Uzbek"),
120
+    (u"ve", u"Venda", u"", u"Venda"),
121
+    (u"vi-VN", u"Tiếng Việt", u"Công Hòa Xã Hội Chủ Nghĩa Việt Nam", u"Vietnamese"),
122
+    (u"vo", u"Volapük", u"", u"Volapük"),
123
+    (u"wa", u"Walon", u"", u"Walloon"),
124
+    (u"war", u"Winaray", u"", u"Waray-Waray"),
125
+    (u"xh", u"Xhosa", u"", u"Xhosa"),
126
+    (u"zh", u"中文", u"", u"Chinese"),
127
+    (u"zh-CN", u"中文", u"中国", u"Chinese"),
128
+    (u"zh-HK", u"中文", u"香港", u"Chinese"),
129
+    (u"zh-TW", u"中文", u"台湾", u"Chinese"),
130
+    (u"zu", u"Isi-Zulu", u"", u"Zulu")
131
+)

+ 21
- 2
searx/preferences.py View File

@@ -95,6 +95,25 @@ class MultipleChoiceSetting(EnumStringSetting):
95 95
         resp.set_cookie(name, ','.join(self.value), max_age=COOKIE_MAX_AGE)
96 96
 
97 97
 
98
+class SearchLanguageSetting(EnumStringSetting):
99
+    """Available choices may change, so user's value may not be in choices anymore"""
100
+
101
+    def parse(self, data):
102
+        if data not in self.choices and data != self.value:
103
+            # hack to give some backwards compatibility with old language cookies
104
+            data = str(data).replace('_', '-')
105
+            lang = data.split('-')[0]
106
+            if data in self.choices:
107
+                pass
108
+            elif lang in self.choices:
109
+                data = lang
110
+            elif data == 'ar-XA':
111
+                data = 'ar-SA'
112
+            else:
113
+                data = self.value
114
+        self.value = data
115
+
116
+
98 117
 class MapSetting(Setting):
99 118
     """Setting of a value that has to be translated in order to be storable"""
100 119
 
@@ -216,8 +235,8 @@ class Preferences(object):
216 235
         super(Preferences, self).__init__()
217 236
 
218 237
         self.key_value_settings = {'categories': MultipleChoiceSetting(['general'], choices=categories),
219
-                                   'language': EnumStringSetting(settings['search']['language'],
220
-                                                                 choices=LANGUAGE_CODES),
238
+                                   'language': SearchLanguageSetting(settings['search']['language'],
239
+                                                                     choices=LANGUAGE_CODES),
221 240
                                    'locale': EnumStringSetting(settings['ui']['default_locale'],
222 241
                                                                choices=settings['locales'].keys() + ['']),
223 242
                                    'autocomplete': EnumStringSetting(settings['search']['autocomplete'],

+ 7
- 4
searx/query.py View File

@@ -71,21 +71,24 @@ class RawTextQuery(object):
71 71
                 # check if any language-code is equal with
72 72
                 # declared language-codes
73 73
                 for lc in language_codes:
74
-                    lang_id, lang_name, country = map(str.lower, lc)
74
+                    lang_id, lang_name, country, english_name = map(unicode.lower, lc)
75 75
 
76 76
                     # if correct language-code is found
77 77
                     # set it as new search-language
78 78
                     if lang == lang_id\
79 79
                        or lang_id.startswith(lang)\
80 80
                        or lang == lang_name\
81
+                       or lang == english_name\
81 82
                        or lang.replace('_', ' ') == country:
82 83
                         parse_next = True
83
-                        self.languages.append(lang)
84
-                        break
84
+                        self.languages.append(lang_id)
85
+                        # to ensure best match (first match is not necessarily the best one)
86
+                        if lang == lang_id:
87
+                            break
85 88
 
86 89
             # this force a engine or category
87 90
             if query_part[0] == '!' or query_part[0] == '?':
88
-                prefix = query_part[1:].replace('_', ' ')
91
+                prefix = query_part[1:].replace('-', ' ')
89 92
 
90 93
                 # check if prefix is equal with engine shortcut
91 94
                 if prefix in engine_shortcuts:

+ 5
- 1
searx/search.py View File

@@ -211,10 +211,14 @@ def get_search_query_from_webapp(preferences, form):
211 211
     # set query
212 212
     query = raw_text_query.getSearchQuery()
213 213
 
214
-    # get last selected language in query, if possible
214
+    # set specific language if set on request, query or preferences
215 215
     # TODO support search with multible languages
216 216
     if len(raw_text_query.languages):
217 217
         query_lang = raw_text_query.languages[-1]
218
+    elif 'language' in form:
219
+        query_lang = form.get('language')
220
+    else:
221
+        query_lang = preferences.get_value('language')
218 222
 
219 223
     query_time_range = form.get('time_range')
220 224
 

+ 5
- 0
searx/static/plugins/js/search_on_category_select.js View File

@@ -15,5 +15,10 @@ $(document).ready(function() {
15 15
                 $('#search_form').submit();
16 16
             }
17 17
         });
18
+        $('#language').change(function(e) {
19
+            if($('#q').val()) {
20
+                $('#search_form').submit();
21
+            }
22
+        });
18 23
     }
19 24
 });

+ 3
- 3
searx/templates/courgette/preferences.html View File

@@ -13,9 +13,9 @@
13 13
         <legend>{{ _('Search language') }}</legend>
14 14
         <p>
15 15
             <select name='language'>
16
-                <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option>
17
-                {% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %}
18
-                <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option>
16
+                <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
17
+                {% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
18
+                <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option>
19 19
                 {% endfor %}
20 20
             </select>
21 21
         </p>

+ 3
- 3
searx/templates/legacy/preferences.html View File

@@ -14,9 +14,9 @@
14 14
         <legend>{{ _('Search language') }}</legend>
15 15
         <p>
16 16
         <select name='language'>
17
-            <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option>
18
-            {% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %}
19
-            <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option>
17
+            <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
18
+            {% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
19
+            <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option>
20 20
             {% endfor %}
21 21
         </select>
22 22
         </p>

+ 1
- 0
searx/templates/oscar/advanced.html View File

@@ -6,4 +6,5 @@
6 6
 <div id="advanced-search-container">
7 7
     {% include 'oscar/categories.html' %}
8 8
     {% include 'oscar/time-range.html' %}
9
+    {% include 'oscar/languages.html' %}
9 10
 </div>

+ 12
- 0
searx/templates/oscar/languages.html View File

@@ -0,0 +1,12 @@
1
+{% if preferences %}
2
+<select class="form-control" name='language'>
3
+{% else %}
4
+<select class="time_range" id='language' name='language'>
5
+{% endif %}
6
+	<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
7
+		{% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
8
+		<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>
9
+			{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}
10
+		</option>
11
+		{% endfor %}
12
+</select>

+ 5
- 6
searx/templates/oscar/preferences.html View File

@@ -40,12 +40,7 @@
40 40
                     {% set language_label = _('Search language') %}
41 41
                     {% set language_info = _('What language do you prefer for search?') %}
42 42
                     {{ preferences_item_header(language_info, language_label, rtl) }}
43
-                        <select class="form-control" name='language'>
44
-                            <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option>
45
-                            {% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %}
46
-                            <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option>
47
-                            {% endfor %}
48
-                        </select>
43
+						{% include 'oscar/languages.html' %}
49 44
                     {{ preferences_item_footer(language_info, language_label, rtl) }}
50 45
 
51 46
                     {% set locale_label = _('Interface language') %}
@@ -153,6 +148,7 @@
153 148
 				    <th>{{ _("Allow") }}</th>
154 149
 				    <th>{{ _("Engine name") }}</th>
155 150
 				    <th>{{ _("Shortcut") }}</th>
151
+				    <th>{{ _("Language support") }}</th>
156 152
 				    <th>{{ _("SafeSearch") }}</th>
157 153
 				    <th>{{ _("Time range") }}</th>
158 154
 				    <th>{{ _("Avg. time") }}</th>
@@ -161,6 +157,7 @@
161 157
 				    <th>{{ _("Max time") }}</th>
162 158
 				    <th>{{ _("Avg. time") }}</th>
163 159
 				    <th>{{ _("SafeSearch") }}</th>
160
+				    <th>{{ _("Language support") }}</th>
164 161
 				    <th>{{ _("Shortcut") }}</th>
165 162
 				    <th>{{ _("Engine name") }}</th>
166 163
 				    <th>{{ _("Allow") }}</th>
@@ -175,6 +172,7 @@
175 172
                                     </td>
176 173
                                     <th>{{ search_engine.name }}</th>
177 174
 				    <td>{{ shortcuts[search_engine.name] }}</td>
175
+				    <td><input type="checkbox" {{ "checked" if current_language == 'all' or current_language in search_engine.supported_languages or current_language.split('-')[0] in search_engine.supported_languages else ""}} readonly="readonly" disabled="disabled"></td>
178 176
 				    <td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td>
179 177
 				    <td><input type="checkbox" {{ "checked" if search_engine.time_range_support==True else ""}} readonly="readonly" disabled="disabled"></td>
180 178
 				    <td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td>
@@ -183,6 +181,7 @@
183 181
 				    <td class="{{ 'danger' if stats[search_engine.name]['warn_timeout'] else '' }}">{{ search_engine.timeout }}</td>
184 182
 				    <td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td>
185 183
 				    <td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td>
184
+				    <td><input type="checkbox" {{ "checked" if current_language == 'all' or current_language in search_engine.supported_languages or current_language.split('-')[0] in search_engine.supported_languages else ""}} readonly="readonly" disabled="disabled"></td>
186 185
 				    <td>{{ shortcuts[search_engine.name] }}</td>
187 186
                                     <th>{{ search_engine.name }}</th>
188 187
                                     <td class="onoff-checkbox">

+ 3
- 3
searx/templates/pix-art/preferences.html View File

@@ -9,9 +9,9 @@
9 9
         <legend>{{ _('Search language') }}</legend>
10 10
         <p>
11 11
         <select name='language'>
12
-            <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option>
13
-            {% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %}
14
-            <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option>
12
+            <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
13
+            {% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
14
+            <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option>
15 15
             {% endfor %}
16 16
         </select>
17 17
         </p>

+ 8
- 4
searx/webapp.py View File

@@ -330,6 +330,10 @@ def render(template_name, override_theme=None, **kwargs):
330 330
 
331 331
     kwargs['safesearch'] = str(request.preferences.get_value('safesearch'))
332 332
 
333
+    kwargs['language_codes'] = language_codes
334
+    if 'current_language' not in kwargs:
335
+        kwargs['current_language'] = request.preferences.get_value('language')
336
+
333 337
     # override url_for function in templates
334 338
     kwargs['url_for'] = url_for_theme
335 339
 
@@ -510,6 +514,7 @@ def index():
510 514
         answers=result_container.answers,
511 515
         infoboxes=result_container.infoboxes,
512 516
         paging=result_container.paging,
517
+        current_language=search_query.lang,
513 518
         base_url=get_base_url(),
514 519
         theme=get_current_theme_name(),
515 520
         favicons=global_favicons[themes.index(get_current_theme_name())]
@@ -552,7 +557,7 @@ def autocompleter():
552 557
         if not language or language == 'all':
553 558
             language = 'en'
554 559
         else:
555
-            language = language.split('_')[0]
560
+            language = language.split('-')[0]
556 561
         # run autocompletion
557 562
         raw_results.extend(completer(raw_text_query.getSearchQuery(), language))
558 563
 
@@ -615,9 +620,7 @@ def preferences():
615 620
     return render('preferences.html',
616 621
                   locales=settings['locales'],
617 622
                   current_locale=get_locale(),
618
-                  current_language=lang,
619 623
                   image_proxy=image_proxy,
620
-                  language_codes=language_codes,
621 624
                   engines_by_category=categories,
622 625
                   stats=stats,
623 626
                   answerers=[{'info': a.self_info(), 'keywords': a.keywords} for a in answerers],
@@ -627,7 +630,8 @@ def preferences():
627 630
                   themes=themes,
628 631
                   plugins=plugins,
629 632
                   allowed_plugins=allowed_plugins,
630
-                  theme=get_current_theme_name())
633
+                  theme=get_current_theme_name(),
634
+                  preferences=True)
631 635
 
632 636
 
633 637
 @app.route('/image_proxy', methods=['GET'])

+ 3
- 3
tests/robot/test_basic.robot View File

@@ -101,11 +101,11 @@ Change search language
101 101
     Page Should Contain  about
102 102
     Page Should Contain  preferences
103 103
     Go To  http://localhost:11111/preferences
104
-    List Selection Should Be  language  Automatic
105
-    Select From List  language  Turkish (Turkey) - tr_TR
104
+    List Selection Should Be  language  Default language
105
+    Select From List  language  Türkçe (Türkiye) - tr-TR
106 106
     Submit Preferences
107 107
     Go To  http://localhost:11111/preferences
108
-    List Selection Should Be  language  Turkish (Turkey) - tr_TR
108
+    List Selection Should Be  language  Türkçe (Türkiye) - tr-TR
109 109
 
110 110
 Change autocomplete
111 111
     Page Should Contain  about

+ 32
- 0
tests/unit/engines/test_bing.py View File

@@ -86,3 +86,35 @@ class TestBingEngine(SearxTestCase):
86 86
         self.assertEqual(results[0]['title'], 'This should be the title')
87 87
         self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
88 88
         self.assertEqual(results[0]['content'], 'This should be the content.')
89
+
90
+    def test_fetch_supported_languages(self):
91
+        html = """<html></html>"""
92
+        response = mock.Mock(text=html)
93
+        results = bing._fetch_supported_languages(response)
94
+        self.assertEqual(type(results), list)
95
+        self.assertEqual(len(results), 0)
96
+
97
+        html = """
98
+        <html>
99
+            <body>
100
+                <form>
101
+                    <div id="limit-languages">
102
+                        <div>
103
+                            <div><input id="es" value="es"></input></div>
104
+                        </div>
105
+                        <div>
106
+                            <div><input id="pt_BR" value="pt_BR"></input></div>
107
+                            <div><input id="pt_PT" value="pt_PT"></input></div>
108
+                        </div>
109
+                    </div>
110
+                </form>
111
+            </body>
112
+        </html>
113
+        """
114
+        response = mock.Mock(text=html)
115
+        languages = bing._fetch_supported_languages(response)
116
+        self.assertEqual(type(languages), list)
117
+        self.assertEqual(len(languages), 3)
118
+        self.assertIn('es', languages)
119
+        self.assertIn('pt-BR', languages)
120
+        self.assertIn('pt-PT', languages)

+ 37
- 0
tests/unit/engines/test_dailymotion.py View File

@@ -1,3 +1,4 @@
1
+# -*- coding: utf-8 -*-
1 2
 from collections import defaultdict
2 3
 import mock
3 4
 from searx.engines import dailymotion
@@ -72,3 +73,39 @@ class TestDailymotionEngine(SearxTestCase):
72 73
         results = dailymotion.response(response)
73 74
         self.assertEqual(type(results), list)
74 75
         self.assertEqual(len(results), 0)
76
+
77
+    def test_fetch_supported_languages(self):
78
+        json = r"""
79
+        {"list":[{"code":"af","name":"Afrikaans","native_name":"Afrikaans",
80
+                  "localized_name":"Afrikaans","display_name":"Afrikaans"},
81
+                 {"code":"ar","name":"Arabic","native_name":"\u0627\u0644\u0639\u0631\u0628\u064a\u0629",
82
+                  "localized_name":"Arabic","display_name":"Arabic"},
83
+                 {"code":"la","name":"Latin","native_name":null,
84
+                  "localized_name":"Latin","display_name":"Latin"}
85
+        ]}
86
+        """
87
+        response = mock.Mock(text=json)
88
+        languages = dailymotion._fetch_supported_languages(response)
89
+        self.assertEqual(type(languages), dict)
90
+        self.assertEqual(len(languages), 3)
91
+        self.assertIn('af', languages)
92
+        self.assertIn('ar', languages)
93
+        self.assertIn('la', languages)
94
+
95
+        self.assertEqual(type(languages['af']), dict)
96
+        self.assertEqual(type(languages['ar']), dict)
97
+        self.assertEqual(type(languages['la']), dict)
98
+
99
+        self.assertIn('name', languages['af'])
100
+        self.assertIn('name', languages['ar'])
101
+        self.assertNotIn('name', languages['la'])
102
+
103
+        self.assertIn('english_name', languages['af'])
104
+        self.assertIn('english_name', languages['ar'])
105
+        self.assertIn('english_name', languages['la'])
106
+
107
+        self.assertEqual(languages['af']['name'], 'Afrikaans')
108
+        self.assertEqual(languages['af']['english_name'], 'Afrikaans')
109
+        self.assertEqual(languages['ar']['name'], u'العربية')
110
+        self.assertEqual(languages['ar']['english_name'], 'Arabic')
111
+        self.assertEqual(languages['la']['english_name'], 'Latin')

+ 26
- 1
tests/unit/engines/test_duckduckgo.py View File

@@ -11,7 +11,7 @@ class TestDuckduckgoEngine(SearxTestCase):
11 11
         query = 'test_query'
12 12
         dicto = defaultdict(dict)
13 13
         dicto['pageno'] = 1
14
-        dicto['language'] = 'de_CH'
14
+        dicto['language'] = 'de-CH'
15 15
         dicto['time_range'] = ''
16 16
         params = duckduckgo.request(query, dicto)
17 17
         self.assertIn('url', params)
@@ -19,6 +19,17 @@ class TestDuckduckgoEngine(SearxTestCase):
19 19
         self.assertIn('duckduckgo.com', params['url'])
20 20
         self.assertIn('ch-de', params['url'])
21 21
 
22
+        # when ddg uses non standard code
23
+        dicto['language'] = 'en-GB'
24
+        params = duckduckgo.request(query, dicto)
25
+        self.assertIn('uk-en', params['url'])
26
+
27
+        # no country given
28
+        duckduckgo.supported_languages = ['de-CH', 'en-US']
29
+        dicto['language'] = 'de'
30
+        params = duckduckgo.request(query, dicto)
31
+        self.assertIn('ch-de', params['url'])
32
+
22 33
     def test_no_url_in_request_year_time_range(self):
23 34
         dicto = defaultdict(dict)
24 35
         query = 'test_query'
@@ -73,3 +84,17 @@ class TestDuckduckgoEngine(SearxTestCase):
73 84
         self.assertEqual(results[0]['title'], 'This is the title')
74 85
         self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű')
75 86
         self.assertEqual(results[0]['content'], 'This should be the content.')
87
+
88
+    def test_fetch_supported_languages(self):
89
+        js = """some code...regions:{
90
+        "wt-wt":"All Results","ar-es":"Argentina","au-en":"Australia","at-de":"Austria","be-fr":"Belgium (fr)"
91
+        }some more code..."""
92
+        response = mock.Mock(text=js)
93
+        languages = duckduckgo._fetch_supported_languages(response)
94
+        self.assertEqual(type(languages), list)
95
+        self.assertEqual(len(languages), 5)
96
+        self.assertIn('wt-WT', languages)
97
+        self.assertIn('es-AR', languages)
98
+        self.assertIn('en-AU', languages)
99
+        self.assertIn('de-AT', languages)
100
+        self.assertIn('fr-BE', languages)

+ 4
- 0
tests/unit/engines/test_duckduckgo_definitions.py View File

@@ -21,10 +21,14 @@ class TestDDGDefinitionsEngine(SearxTestCase):
21 21
         query = 'test_query'
22 22
         dicto = defaultdict(dict)
23 23
         dicto['pageno'] = 1
24
+        dicto['language'] = 'es'
24 25
         params = duckduckgo_definitions.request(query, dicto)
25 26
         self.assertIn('url', params)
26 27
         self.assertIn(query, params['url'])
27 28
         self.assertIn('duckduckgo.com', params['url'])
29
+        self.assertIn('headers', params)
30
+        self.assertIn('Accept-Language', params['headers'])
31
+        self.assertIn('es', params['headers']['Accept-Language'])
28 32
 
29 33
     def test_response(self):
30 34
         self.assertRaises(AttributeError, duckduckgo_definitions.response, None)

+ 31
- 0
tests/unit/engines/test_gigablast.py View File

@@ -15,6 +15,12 @@ class TestGigablastEngine(SearxTestCase):
15 15
         self.assertTrue('url' in params)
16 16
         self.assertTrue(query in params['url'])
17 17
         self.assertTrue('gigablast.com' in params['url'])
18
+        self.assertTrue('xx' in params['url'])
19
+
20
+        dicto['language'] = 'en-US'
21
+        params = gigablast.request(query, dicto)
22
+        self.assertTrue('en' in params['url'])
23
+        self.assertFalse('en-US' in params['url'])
18 24
 
19 25
     def test_response(self):
20 26
         self.assertRaises(AttributeError, gigablast.response, None)
@@ -83,3 +89,28 @@ class TestGigablastEngine(SearxTestCase):
83 89
         self.assertEqual(results[0]['title'], 'South by Southwest 2016')
84 90
         self.assertEqual(results[0]['url'], 'www.sxsw.com')
85 91
         self.assertEqual(results[0]['content'], 'This should be the content.')
92
+
93
+    def test_fetch_supported_languages(self):
94
+        html = """<html></html>"""
95
+        response = mock.Mock(text=html)
96
+        results = gigablast._fetch_supported_languages(response)
97
+        self.assertEqual(type(results), list)
98
+        self.assertEqual(len(results), 0)
99
+
100
+        html = """
101
+        <html>
102
+            <body>
103
+                <span id="menu2">
104
+                    <a href="/search?&rxikd=1&qlang=xx"></a>
105
+                    <a href="/search?&rxikd=1&qlang=en"></a>
106
+                    <a href="/search?&rxikd=1&qlang=fr"></a>
107
+                </span>
108
+            </body>
109
+        </html>
110
+        """
111
+        response = mock.Mock(text=html)
112
+        languages = gigablast._fetch_supported_languages(response)
113
+        self.assertEqual(type(languages), list)
114
+        self.assertEqual(len(languages), 2)
115
+        self.assertIn('en', languages)
116
+        self.assertIn('fr', languages)

+ 58
- 1
tests/unit/engines/test_google.py View File

@@ -18,7 +18,7 @@ class TestGoogleEngine(SearxTestCase):
18 18
         query = 'test_query'
19 19
         dicto = defaultdict(dict)
20 20
         dicto['pageno'] = 1
21
-        dicto['language'] = 'fr_FR'
21
+        dicto['language'] = 'fr-FR'
22 22
         dicto['time_range'] = ''
23 23
         params = google.request(query, dicto)
24 24
         self.assertIn('url', params)
@@ -177,3 +177,60 @@ class TestGoogleEngine(SearxTestCase):
177 177
         self.assertEqual(results[0]['title'], '')
178 178
         self.assertEqual(results[0]['content'], '')
179 179
         self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg')
180
+
181
+    def test_fetch_supported_languages(self):
182
+        html = """<html></html>"""
183
+        response = mock.Mock(text=html)
184
+        languages = google._fetch_supported_languages(response)
185
+        self.assertEqual(type(languages), dict)
186
+        self.assertEqual(len(languages), 0)
187
+
188
+        html = u"""
189
+        <html>
190
+            <body>
191
+                <table>
192
+                    <tbody>
193
+                        <tr>
194
+                            <td>
195
+                                <font>
196
+                                    <label>
197
+                                        <span id="ten">English</span>
198
+                                    </label>
199
+                                </font>
200
+                            </td>
201
+                            <td>
202
+                                <font>
203
+                                    <label>
204
+                                        <span id="tzh-CN">中文 (简体)</span>
205
+                                    </label>
206
+                                    <label>
207
+                                        <span id="tzh-TW">中文 (繁體)</span>
208
+                                    </label>
209
+                                </font>
210
+                            </td>
211
+                        </tr>
212
+                    </tbody>
213
+                </table>
214
+            </body>
215
+        </html>
216
+        """
217
+        response = mock.Mock(text=html)
218
+        languages = google._fetch_supported_languages(response)
219
+        self.assertEqual(type(languages), dict)
220
+        self.assertEqual(len(languages), 3)
221
+
222
+        self.assertIn('en', languages)
223
+        self.assertIn('zh-CN', languages)
224
+        self.assertIn('zh-TW', languages)
225
+
226
+        self.assertEquals(type(languages['en']), dict)
227
+        self.assertEquals(type(languages['zh-CN']), dict)
228
+        self.assertEquals(type(languages['zh-TW']), dict)
229
+
230
+        self.assertIn('name', languages['en'])
231
+        self.assertIn('name', languages['zh-CN'])
232
+        self.assertIn('name', languages['zh-TW'])
233
+
234
+        self.assertEquals(languages['en']['name'], 'English')
235
+        self.assertEquals(languages['zh-CN']['name'], u'中文 (简体)')
236
+        self.assertEquals(languages['zh-TW']['name'], u'中文 (繁體)')

+ 1
- 1
tests/unit/engines/test_qwant.py View File

@@ -10,7 +10,7 @@ class TestQwantEngine(SearxTestCase):
10 10
         query = 'test_query'
11 11
         dicto = defaultdict(dict)
12 12
         dicto['pageno'] = 0
13
-        dicto['language'] = 'fr_FR'
13
+        dicto['language'] = 'fr-FR'
14 14
         qwant.categories = ['']
15 15
         params = qwant.request(query, dicto)
16 16
         self.assertIn('url', params)

+ 6
- 1
tests/unit/engines/test_subtitleseeker.py View File

@@ -10,6 +10,7 @@ class TestSubtitleseekerEngine(SearxTestCase):
10 10
         query = 'test_query'
11 11
         dicto = defaultdict(dict)
12 12
         dicto['pageno'] = 1
13
+        dicto['language'] = 'fr-FR'
13 14
         params = subtitleseeker.request(query, dicto)
14 15
         self.assertTrue('url' in params)
15 16
         self.assertTrue(query in params['url'])
@@ -17,7 +18,7 @@ class TestSubtitleseekerEngine(SearxTestCase):
17 18
 
18 19
     def test_response(self):
19 20
         dicto = defaultdict(dict)
20
-        dicto['language'] = 'fr_FR'
21
+        dicto['language'] = 'fr-FR'
21 22
         response = mock.Mock(search_params=dicto)
22 23
 
23 24
         self.assertRaises(AttributeError, subtitleseeker.response, None)
@@ -68,6 +69,10 @@ class TestSubtitleseekerEngine(SearxTestCase):
68 69
         self.assertIn('1039 Subs', results[0]['content'])
69 70
         self.assertIn('Alternative Title', results[0]['content'])
70 71
 
72
+        dicto['language'] = 'pt-BR'
73
+        results = subtitleseeker.response(response)
74
+        self.assertEqual(results[0]['url'], 'http://this.is.the.url/Brazilian/')
75
+
71 76
         html = """
72 77
         <div class="boxRows">
73 78
             <div class="boxRowsInner" style="width:600px;">

+ 28
- 1
tests/unit/engines/test_swisscows.py View File

@@ -10,7 +10,7 @@ class TestSwisscowsEngine(SearxTestCase):
10 10
         query = 'test_query'
11 11
         dicto = defaultdict(dict)
12 12
         dicto['pageno'] = 1
13
-        dicto['language'] = 'de_DE'
13
+        dicto['language'] = 'de-DE'
14 14
         params = swisscows.request(query, dicto)
15 15
         self.assertTrue('url' in params)
16 16
         self.assertTrue(query in params['url'])
@@ -126,3 +126,30 @@ class TestSwisscowsEngine(SearxTestCase):
126 126
         self.assertEqual(results[2]['url'], 'http://de.wikipedia.org/wiki/Datei:This should.svg')
127 127
         self.assertEqual(results[2]['img_src'], 'http://ts2.mm.This/should.png')
128 128
         self.assertEqual(results[2]['template'], 'images.html')
129
+
130
+    def test_fetch_supported_languages(self):
131
+        html = """<html></html>"""
132
+        response = mock.Mock(text=html)
133
+        languages = swisscows._fetch_supported_languages(response)
134
+        self.assertEqual(type(languages), list)
135
+        self.assertEqual(len(languages), 0)
136
+
137
+        html = """
138
+        <html>
139
+            <div id="regions-popup">
140
+                <div>
141
+                    <ul>
142
+                        <li><a data-val="browser"></a></li>
143
+                        <li><a data-val="de-CH"></a></li>
144
+                        <li><a data-val="fr-CH"></a></li>
145
+                    </ul>
146
+                </div>
147
+            </div>
148
+        </html>
149
+        """
150
+        response = mock.Mock(text=html)
151
+        languages = swisscows._fetch_supported_languages(response)
152
+        self.assertEqual(type(languages), list)
153
+        self.assertEqual(len(languages), 3)
154
+        self.assertIn('de-CH', languages)
155
+        self.assertIn('fr-CH', languages)

+ 100
- 1
tests/unit/engines/test_wikipedia.py View File

@@ -8,9 +8,11 @@ from searx.testing import SearxTestCase
8 8
 class TestWikipediaEngine(SearxTestCase):
9 9
 
10 10
     def test_request(self):
11
+        wikipedia.supported_languages = ['fr', 'en']
12
+
11 13
         query = 'test_query'
12 14
         dicto = defaultdict(dict)
13
-        dicto['language'] = 'fr_FR'
15
+        dicto['language'] = 'fr-FR'
14 16
         params = wikipedia.request(query, dicto)
15 17
         self.assertIn('url', params)
16 18
         self.assertIn(query, params['url'])
@@ -27,6 +29,10 @@ class TestWikipediaEngine(SearxTestCase):
27 29
         params = wikipedia.request(query, dicto)
28 30
         self.assertIn('en', params['url'])
29 31
 
32
+        dicto['language'] = 'xx'
33
+        params = wikipedia.request(query, dicto)
34
+        self.assertIn('en', params['url'])
35
+
30 36
     def test_response(self):
31 37
         dicto = defaultdict(dict)
32 38
         dicto['language'] = 'fr'
@@ -158,3 +164,96 @@ class TestWikipediaEngine(SearxTestCase):
158 164
         self.assertEqual(len(results), 2)
159 165
         self.assertEqual(results[1]['infobox'], u'披頭四樂隊')
160 166
         self.assertIn(u'披头士乐队...', results[1]['content'])
167
+
168
+    def test_fetch_supported_languages(self):
169
+        html = u"""<html></html>"""
170
+        response = mock.Mock(text=html)
171
+        languages = wikipedia._fetch_supported_languages(response)
172
+        self.assertEqual(type(languages), dict)
173
+        self.assertEqual(len(languages), 0)
174
+
175
+        html = u"""
176
+        <html>
177
+            <body>
178
+                <div>
179
+                    <div>
180
+                        <h3>Table header</h3>
181
+                        <table class="sortable jquery-tablesorter">
182
+                            <thead>
183
+                                <tr>
184
+                                    <th>N</th>
185
+                                    <th>Language</th>
186
+                                    <th>Language (local)</th>
187
+                                    <th>Wiki</th>
188
+                                    <th>Articles</th>
189
+                                </tr>
190
+                            </thead>
191
+                            <tbody>
192
+                                <tr>
193
+                                    <td>2</td>
194
+                                    <td><a>Swedish</a></td>
195
+                                    <td><a>Svenska</a></td>
196
+                                    <td><a>sv</a></td>
197
+                                    <td><a><b>3000000</b></a></td>
198
+                                </tr>
199
+                                <tr>
200
+                                    <td>3</td>
201
+                                    <td><a>Cebuano</a></td>
202
+                                    <td><a>Sinugboanong Binisaya</a></td>
203
+                                    <td><a>ceb</a></td>
204
+                                    <td><a><b>3000000</b></a></td>
205
+                                </tr>
206
+                            </tbody>
207
+                        </table>
208
+                        <h3>Table header</h3>
209
+                        <table class="sortable jquery-tablesorter">
210
+                            <thead>
211
+                                <tr>
212
+                                    <th>N</th>
213
+                                    <th>Language</th>
214
+                                    <th>Language (local)</th>
215
+                                    <th>Wiki</th>
216
+                                    <th>Articles</th>
217
+                                </tr>
218
+                            </thead>
219
+                            <tbody>
220
+                                <tr>
221
+                                    <td>2</td>
222
+                                    <td><a>Norwegian (Bokmål)</a></td>
223
+                                    <td><a>Norsk (Bokmål)</a></td>
224
+                                    <td><a>no</a></td>
225
+                                    <td><a><b>100000</b></a></td>
226
+                                </tr>
227
+                            </tbody>
228
+                        </table>
229
+                    </div>
230
+                </div>
231
+            </body>
232
+        </html>
233
+        """
234
+        response = mock.Mock(text=html)
235
+        languages = wikipedia._fetch_supported_languages(response)
236
+        self.assertEqual(type(languages), dict)
237
+        self.assertEqual(len(languages), 3)
238
+
239
+        self.assertIn('sv', languages)
240
+        self.assertIn('ceb', languages)
241
+        self.assertIn('no', languages)
242
+
243
+        self.assertEqual(type(languages['sv']), dict)
244
+        self.assertEqual(type(languages['ceb']), dict)
245
+        self.assertEqual(type(languages['no']), dict)
246
+
247
+        self.assertIn('name', languages['sv'])
248
+        self.assertIn('english_name', languages['sv'])
249
+        self.assertIn('articles', languages['sv'])
250
+
251
+        self.assertEqual(languages['sv']['name'], 'Svenska')
252
+        self.assertEqual(languages['sv']['english_name'], 'Swedish')
253
+        self.assertEqual(languages['sv']['articles'], 3000000)
254
+        self.assertEqual(languages['ceb']['name'], 'Sinugboanong Binisaya')
255
+        self.assertEqual(languages['ceb']['english_name'], 'Cebuano')
256
+        self.assertEqual(languages['ceb']['articles'], 3000000)
257
+        self.assertEqual(languages['no']['name'], u'Norsk (Bokmål)')
258
+        self.assertEqual(languages['no']['english_name'], u'Norwegian (Bokmål)')
259
+        self.assertEqual(languages['no']['articles'], 100000)

+ 30
- 0
tests/unit/engines/test_yahoo.py View File

@@ -147,3 +147,33 @@ class TestYahooEngine(SearxTestCase):
147 147
         results = yahoo.response(response)
148 148
         self.assertEqual(type(results), list)
149 149
         self.assertEqual(len(results), 0)
150
+
151
+    def test_fetch_supported_languages(self):
152
+        html = """<html></html>"""
153
+        response = mock.Mock(text=html)
154
+        results = yahoo._fetch_supported_languages(response)
155
+        self.assertEqual(type(results), list)
156
+        self.assertEqual(len(results), 0)
157
+
158
+        html = """
159
+        <html>
160
+            <div>
161
+                <div id="yschlang">
162
+                    <span>
163
+                        <label><input value="lang_ar"></input></label>
164
+                    </span>
165
+                    <span>
166
+                        <label><input value="lang_zh_chs"></input></label>
167
+                        <label><input value="lang_zh_cht"></input></label>
168
+                    </span>
169
+                </div>
170
+            </div>
171
+        </html>
172
+        """
173
+        response = mock.Mock(text=html)
174
+        languages = yahoo._fetch_supported_languages(response)
175
+        self.assertEqual(type(languages), list)
176
+        self.assertEqual(len(languages), 3)
177
+        self.assertIn('ar', languages)
178
+        self.assertIn('zh-chs', languages)
179
+        self.assertIn('zh-cht', languages)

+ 22
- 1
tests/unit/test_preferences.py View File

@@ -1,4 +1,4 @@
1
-from searx.preferences import (EnumStringSetting, MapSetting, MissingArgumentException,
1
+from searx.preferences import (EnumStringSetting, MapSetting, MissingArgumentException, SearchLanguageSetting,
2 2
                                MultipleChoiceSetting, PluginsSetting, ValidationException)
3 3
 from searx.testing import SearxTestCase
4 4
 
@@ -88,6 +88,27 @@ class TestSettings(SearxTestCase):
88 88
         setting.parse('2')
89 89
         self.assertEquals(setting.get_value(), ['2'])
90 90
 
91
+    # search language settings
92
+    def test_lang_setting_valid_choice(self):
93
+        setting = SearchLanguageSetting('all', choices=['all', 'de', 'en'])
94
+        setting.parse('de')
95
+        self.assertEquals(setting.get_value(), 'de')
96
+
97
+    def test_lang_setting_invalid_choice(self):
98
+        setting = SearchLanguageSetting('all', choices=['all', 'de', 'en'])
99
+        setting.parse('xx')
100
+        self.assertEquals(setting.get_value(), 'all')
101
+
102
+    def test_lang_setting_old_cookie_choice(self):
103
+        setting = SearchLanguageSetting('all', choices=['all', 'es', 'es-ES'])
104
+        setting.parse('es_XA')
105
+        self.assertEquals(setting.get_value(), 'es')
106
+
107
+    def test_lang_setting_old_cookie_format(self):
108
+        setting = SearchLanguageSetting('all', choices=['all', 'es', 'es-ES'])
109
+        setting.parse('es_ES')
110
+        self.assertEquals(setting.get_value(), 'es-ES')
111
+
91 112
     # plugins settings
92 113
     def test_plugins_setting_all_default_enabled(self):
93 114
         plugin1 = PluginStub('plugin1', True)

+ 171
- 0
utils/fetch_languages.py View File

@@ -0,0 +1,171 @@
1
+# -*- coding: utf-8 -*-
2
+
3
+# This script generates languages.py from intersecting each engine's supported languages.
4
+#
5
+# The country names are obtained from http://api.geonames.org which requires registering as a user.
6
+#
7
+# Output files (engines_languages.json and languages.py)
8
+# are written in current directory to avoid overwriting in case something goes wrong.
9
+
10
+from requests import get
11
+from urllib import urlencode
12
+from lxml.html import fromstring
13
+from json import loads, dumps
14
+import io
15
+from sys import path
16
+path.append('../searx')  # noqa
17
+from searx.engines import engines
18
+
19
+# Geonames API for country names.
20
+geonames_user = ''  # ADD USER NAME HERE
21
+country_names_url = 'http://api.geonames.org/countryInfoJSON?{parameters}'
22
+
23
+# Output files.
24
+engines_languages_file = 'engines_languages.json'
25
+languages_file = 'languages.py'
26
+
27
+engines_languages = {}
28
+languages = {}
29
+
30
+
31
+# To filter out invalid codes and dialects.
32
+def valid_code(lang_code):
33
+    # filter invalid codes
34
+    # sl-SL is technically not invalid, but still a mistake
35
+    invalid_codes = ['sl-SL', 'wt-WT', 'jw']
36
+    invalid_countries = ['UK', 'XA', 'XL']
37
+    if lang_code[:2] == 'xx'\
38
+       or lang_code in invalid_codes\
39
+       or lang_code[-2:] in invalid_countries\
40
+       or is_dialect(lang_code):
41
+        return False
42
+
43
+    return True
44
+
45
+
46
+# Language codes with any additional tags other than language and country.
47
+def is_dialect(lang_code):
48
+    lang_code = lang_code.split('-')
49
+    if len(lang_code) > 2 or len(lang_code[0]) > 3:
50
+        return True
51
+    if len(lang_code) == 2 and len(lang_code[1]) > 2:
52
+        return True
53
+
54
+    return False
55
+
56
+
57
+# Get country name in specified language.
58
+def get_country_name(locale):
59
+    if geonames_user is '':
60
+        return ''
61
+
62
+    locale = locale.split('-')
63
+    if len(locale) != 2:
64
+        return ''
65
+
66
+    url = country_names_url.format(parameters=urlencode({'lang': locale[0],
67
+                                                         'country': locale[1],
68
+                                                         'username': geonames_user}))
69
+    response = get(url)
70
+    json = loads(response.text)
71
+    content = json.get('geonames', None)
72
+    if content is None or len(content) != 1:
73
+        print "No country name found for " + locale[0] + "-" + locale[1]
74
+        return ''
75
+
76
+    return content[0].get('countryName', '')
77
+
78
+
79
+# Fetchs supported languages for each engine and writes json file with those.
80
+def fetch_supported_languages():
81
+    for engine_name in engines:
82
+        if hasattr(engines[engine_name], 'fetch_supported_languages'):
83
+            try:
84
+                engines_languages[engine_name] = engines[engine_name].fetch_supported_languages()
85
+            except Exception as e:
86
+                print e
87
+
88
+    # write json file
89
+    with io.open(engines_languages_file, "w", encoding="utf-8") as f:
90
+        f.write(unicode(dumps(engines_languages, ensure_ascii=False, encoding="utf-8")))
91
+
92
+
93
+# Join all language lists.
94
+# Iterate all languages supported by each engine.
95
+def join_language_lists():
96
+    # include wikipedia first for more accurate language names
97
+    languages.update({code: lang for code, lang
98
+                      in engines_languages['wikipedia'].iteritems()
99
+                      if valid_code(code)})
100
+
101
+    for engine_name in engines_languages:
102
+        for locale in engines_languages[engine_name]:
103
+            if not valid_code(locale):
104
+                continue
105
+
106
+            # if language is not on list or if it has no name yet
107
+            if locale not in languages or not languages[locale].get('name'):
108
+                if isinstance(engines_languages[engine_name], dict):
109
+                    languages[locale] = engines_languages[engine_name][locale]
110
+                else:
111
+                    languages[locale] = {}
112
+
113
+    # get locales that have no name or country yet
114
+    for locale in languages.keys():
115
+        # try to get language names
116
+        if not languages[locale].get('name'):
117
+            name = languages.get(locale.split('-')[0], {}).get('name', None)
118
+            if name:
119
+                languages[locale]['name'] = name
120
+            else:
121
+                # filter out locales with no name
122
+                del languages[locale]
123
+                continue
124
+
125
+        # try to get language name in english
126
+        if not languages[locale].get('english_name'):
127
+            languages[locale]['english_name'] = languages.get(locale.split('-')[0], {}).get('english_name', '')
128
+
129
+        # try to get country name
130
+        if locale.find('-') > 0 and not languages[locale].get('country'):
131
+            languages[locale]['country'] = get_country_name(locale) or ''
132
+
133
+
134
+# Remove countryless language if language is featured in only one country.
135
+def filter_single_country_languages():
136
+    prev_lang = None
137
+    for code in sorted(languages):
138
+        lang = code.split('-')[0]
139
+        if lang == prev_lang:
140
+            countries += 1
141
+        else:
142
+            if prev_lang is not None and countries == 1:
143
+                del languages[prev_lang]
144
+            countries = 0
145
+            prev_lang = lang
146
+
147
+
148
+# Write languages.py.
149
+def write_languages_file():
150
+    new_file = open(languages_file, 'w')
151
+    file_content = '# -*- coding: utf-8 -*-\n'\
152
+                   + '# list of language codes\n'\
153
+                   + '# this file is generated automatically by utils/update_search_languages.py\n'\
154
+                   + '\nlanguage_codes = ('
155
+    for code in sorted(languages):
156
+        file_content += '\n    (u"' + code + '"'\
157
+                        + ', u"' + languages[code]['name'].split(' (')[0] + '"'\
158
+                        + ', u"' + languages[code].get('country', '') + '"'\
159
+                        + ', u"' + languages[code].get('english_name', '').split(' (')[0] + '"),'
160
+    # remove last comma
161
+    file_content = file_content[:-1]
162
+    file_content += '\n)\n'
163
+    new_file.write(file_content.encode('utf8'))
164
+    new_file.close()
165
+
166
+
167
+if __name__ == "__main__":
168
+    fetch_supported_languages()
169
+    join_language_lists()
170
+    filter_single_country_languages()
171
+    write_languages_file()