Browse Source

tests for _fetch_supported_languages in engines

marc 8 years ago
parent
commit
af35eee10b

+ 1
- 3256
searx/data/engines_languages.json
File diff suppressed because it is too large
View File


BIN
searx/engines/.yandex.py.swp View File


+ 10
- 3
searx/engines/__init__.py View File

@@ -21,6 +21,7 @@ import sys
21 21
 from flask_babel import gettext
22 22
 from operator import itemgetter
23 23
 from json import loads
24
+from requests import get
24 25
 from searx import settings
25 26
 from searx import logger
26 27
 from searx.utils import load_module
@@ -79,9 +80,6 @@ def load_engine(engine_data):
79 80
         if not hasattr(engine, arg_name):
80 81
             setattr(engine, arg_name, arg_value)
81 82
 
82
-    if engine_data['name'] in languages:
83
-        setattr(engine, 'supported_languages', languages[engine_data['name']])
84
-
85 83
     # checking required variables
86 84
     for engine_attr in dir(engine):
87 85
         if engine_attr.startswith('_'):
@@ -91,6 +89,15 @@ def load_engine(engine_data):
91 89
                          .format(engine.name, engine_attr))
92 90
             sys.exit(1)
93 91
 
92
+    # assign supported languages from json file
93
+    if engine_data['name'] in languages:
94
+        setattr(engine, 'supported_languages', languages[engine_data['name']])
95
+
96
+    # assign language fetching method if auxiliary method exists
97
+    if hasattr(engine, '_fetch_supported_languages'):
98
+        setattr(engine, 'fetch_supported_languages',
99
+                lambda: engine._fetch_supported_languages(get(engine.supported_languages_url)))
100
+
94 101
     engine.stats = {
95 102
         'result_count': 0,
96 103
         'search_count': 0,

+ 2
- 4
searx/engines/bing.py View File

@@ -15,7 +15,6 @@
15 15
 
16 16
 from urllib import urlencode
17 17
 from lxml import html
18
-from requests import get
19 18
 from searx.engines.xpath import extract_text
20 19
 
21 20
 # engine dependent config
@@ -86,10 +85,9 @@ def response(resp):
86 85
 
87 86
 
88 87
 # get supported languages from their site
89
-def fetch_supported_languages():
88
+def _fetch_supported_languages(resp):
90 89
     supported_languages = []
91
-    response = get(supported_languages_url)
92
-    dom = html.fromstring(response.text)
90
+    dom = html.fromstring(resp.text)
93 91
     options = dom.xpath('//div[@id="limit-languages"]//input')
94 92
     for option in options:
95 93
         code = option.xpath('./@id')[0].replace('_', '-')

+ 1
- 1
searx/engines/bing_images.py View File

@@ -19,7 +19,7 @@ from urllib import urlencode
19 19
 from lxml import html
20 20
 from json import loads
21 21
 import re
22
-from searx.engines.bing import fetch_supported_languages
22
+from searx.engines.bing import _fetch_supported_languages, supported_languages_url
23 23
 
24 24
 # engine dependent config
25 25
 categories = ['images']

+ 1
- 1
searx/engines/bing_news.py View File

@@ -17,7 +17,7 @@ from datetime import datetime
17 17
 from dateutil import parser
18 18
 from lxml import etree
19 19
 from searx.utils import list_get
20
-from searx.engines.bing import fetch_supported_languages
20
+from searx.engines.bing import _fetch_supported_languages, supported_languages_url
21 21
 
22 22
 # engine dependent config
23 23
 categories = ['news']

+ 2
- 3
searx/engines/dailymotion.py View File

@@ -80,11 +80,10 @@ def response(resp):
80 80
 
81 81
 
82 82
 # get supported languages from their site
83
-def fetch_supported_languages():
83
+def _fetch_supported_languages(resp):
84 84
     supported_languages = {}
85 85
 
86
-    response = get(supported_languages_url)
87
-    response_json = loads(response.text)
86
+    response_json = loads(resp.text)
88 87
 
89 88
     for language in response_json['list']:
90 89
         supported_languages[language['code']] = {}

+ 2
- 3
searx/engines/duckduckgo.py View File

@@ -119,11 +119,10 @@ def response(resp):
119 119
 
120 120
 
121 121
 # get supported languages from their site
122
-def fetch_supported_languages():
123
-    response = get(supported_languages_url)
122
+def _fetch_supported_languages(resp):
124 123
 
125 124
     # response is a js file with regions as an embedded object
126
-    response_page = response.text
125
+    response_page = resp.text
127 126
     response_page = response_page[response_page.find('regions:{') + 8:]
128 127
     response_page = response_page[:response_page.find('}') + 1]
129 128
 

+ 1
- 1
searx/engines/duckduckgo_definitions.py View File

@@ -4,7 +4,7 @@ from re import compile, sub
4 4
 from lxml import html
5 5
 from searx.utils import html_to_text
6 6
 from searx.engines.xpath import extract_text
7
-from searx.engines.duckduckgo import fetch_supported_languages
7
+from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
8 8
 
9 9
 url = 'https://api.duckduckgo.com/'\
10 10
     + '?{query}&format=json&pretty=0&no_redirect=1&d=1'

+ 2
- 4
searx/engines/gigablast.py View File

@@ -14,7 +14,6 @@ from json import loads
14 14
 from random import randint
15 15
 from time import time
16 16
 from urllib import urlencode
17
-from requests import get
18 17
 from lxml.html import fromstring
19 18
 
20 19
 # engine dependent config
@@ -91,10 +90,9 @@ def response(resp):
91 90
 
92 91
 
93 92
 # get supported languages from their site
94
-def fetch_supported_languages():
93
+def _fetch_supported_languages(resp):
95 94
     supported_languages = []
96
-    response = get(supported_languages_url)
97
-    dom = fromstring(response.text)
95
+    dom = fromstring(resp.text)
98 96
     links = dom.xpath('//span[@id="menu2"]/a')
99 97
     for link in links:
100 98
         code = link.xpath('./@href')[0][-2:]

+ 5
- 7
searx/engines/google.py View File

@@ -12,7 +12,6 @@ import re
12 12
 from urllib import urlencode
13 13
 from urlparse import urlparse, parse_qsl
14 14
 from lxml import html, etree
15
-from requests import get
16 15
 from searx.engines.xpath import extract_text, extract_url
17 16
 from searx.search import logger
18 17
 
@@ -364,14 +363,13 @@ def attributes_to_html(attributes):
364 363
 
365 364
 
366 365
 # get supported languages from their site
367
-def fetch_supported_languages():
366
+def _fetch_supported_languages(resp):
368 367
     supported_languages = {}
369
-    response = get(supported_languages_url)
370
-    dom = html.fromstring(response.text)
371
-    options = dom.xpath('//select[@name="hl"]/option')
368
+    dom = html.fromstring(resp.text)
369
+    options = dom.xpath('//table//td/font/label/span')
372 370
     for option in options:
373
-        code = option.xpath('./@value')[0].split('-')[0]
374
-        name = option.text[:-1].title()
371
+        code = option.xpath('./@id')[0][1:]
372
+        name = option.text.title()
375 373
         supported_languages[code] = {"name": name}
376 374
 
377 375
     return supported_languages

+ 1
- 1
searx/engines/google_news.py View File

@@ -13,7 +13,7 @@
13 13
 from lxml import html
14 14
 from urllib import urlencode
15 15
 from json import loads
16
-from searx.engines.google import fetch_supported_languages
16
+from searx.engines.google import _fetch_supported_languages, supported_languages_url
17 17
 
18 18
 # search-url
19 19
 categories = ['news']

+ 4
- 4
searx/engines/swisscows.py View File

@@ -13,7 +13,6 @@
13 13
 from json import loads
14 14
 from urllib import urlencode, unquote
15 15
 import re
16
-from requests import get
17 16
 from lxml.html import fromstring
18 17
 
19 18
 # engine dependent config
@@ -25,6 +24,8 @@ language_support = True
25 24
 base_url = 'https://swisscows.ch/'
26 25
 search_string = '?{query}&page={page}'
27 26
 
27
+supported_languages_url = base_url
28
+
28 29
 # regex
29 30
 regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
30 31
 regex_json_remove_start = re.compile(r'^initialData:\s*')
@@ -113,10 +114,9 @@ def response(resp):
113 114
 
114 115
 
115 116
 # get supported languages from their site
116
-def fetch_supported_languages():
117
+def _fetch_supported_languages(resp):
117 118
     supported_languages = []
118
-    response = get(base_url)
119
-    dom = fromstring(response.text)
119
+    dom = fromstring(resp.text)
120 120
     options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
121 121
     for option in options:
122 122
         code = option.xpath('./@data-val')[0]

+ 1
- 1
searx/engines/wikidata.py View File

@@ -15,7 +15,7 @@ from searx import logger
15 15
 from searx.poolrequests import get
16 16
 from searx.engines.xpath import extract_text
17 17
 from searx.utils import format_date_by_locale
18
-from searx.engines.wikipedia import fetch_supported_languages
18
+from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
19 19
 
20 20
 from json import loads
21 21
 from lxml.html import fromstring

+ 2
- 4
searx/engines/wikipedia.py View File

@@ -12,7 +12,6 @@
12 12
 
13 13
 from json import loads
14 14
 from urllib import urlencode, quote
15
-from requests import get
16 15
 from lxml.html import fromstring
17 16
 
18 17
 
@@ -119,10 +118,9 @@ def response(resp):
119 118
 
120 119
 
121 120
 # get supported languages from their site
122
-def fetch_supported_languages():
121
+def _fetch_supported_languages(resp):
123 122
     supported_languages = {}
124
-    response = get(supported_languages_url)
125
-    dom = fromstring(response.text)
123
+    dom = fromstring(resp.text)
126 124
     tables = dom.xpath('//table[contains(@class,"sortable")]')
127 125
     for table in tables:
128 126
         # exclude header row

+ 3
- 5
searx/engines/yahoo.py View File

@@ -14,7 +14,6 @@
14 14
 from urllib import urlencode
15 15
 from urlparse import unquote
16 16
 from lxml import html
17
-from requests import get
18 17
 from searx.engines.xpath import extract_text, extract_url
19 18
 
20 19
 # engine dependent config
@@ -144,13 +143,12 @@ def response(resp):
144 143
 
145 144
 
146 145
 # get supported languages from their site
147
-def fetch_supported_languages():
146
+def _fetch_supported_languages(resp):
148 147
     supported_languages = []
149
-    response = get(supported_languages_url)
150
-    dom = html.fromstring(response.text)
148
+    dom = html.fromstring(resp.text)
151 149
     options = dom.xpath('//div[@id="yschlang"]/span/label/input')
152 150
     for option in options:
153
-        code = option.xpath('./@value')[0][5:]
151
+        code = option.xpath('./@value')[0][5:].replace('_', '-')
154 152
         supported_languages.append(code)
155 153
 
156 154
     return supported_languages

+ 1
- 1
searx/engines/yahoo_news.py View File

@@ -12,7 +12,7 @@
12 12
 from urllib import urlencode
13 13
 from lxml import html
14 14
 from searx.engines.xpath import extract_text, extract_url
15
-from searx.engines.yahoo import parse_url, fetch_supported_languages
15
+from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url
16 16
 from datetime import datetime, timedelta
17 17
 import re
18 18
 from dateutil import parser

+ 25
- 85
searx/languages.py View File

@@ -3,36 +3,27 @@
3 3
 # this file is generated automatically by utils/update_search_languages.py
4 4
 
5 5
 language_codes = (
6
-    (u"ach", u"Acoli", u"", u""),
7 6
     (u"af", u"Afrikaans", u"", u""),
8
-    (u"ak", u"Akan", u"", u""),
9
-    (u"am", u"አማርኛ", u"", u""),
7
+    (u"am", u"አማርኛ", u"", u"Amharic"),
10 8
     (u"ar-SA", u"العربية", u"المملكة العربية السعودية", u"Arabic"),
11 9
     (u"az", u"Azərbaycanca", u"", u"Azerbaijani"),
12
-    (u"ban", u"Balinese", u"", u""),
13 10
     (u"be", u"Беларуская", u"", u"Belarusian"),
14
-    (u"bem", u"Ichibemba", u"", u""),
15 11
     (u"bg-BG", u"Български", u"България", u"Bulgarian"),
16
-    (u"bn", u"বাংলা", u"", u""),
17
-    (u"br", u"Brezhoneg", u"", u""),
18
-    (u"bs", u"Bosanski", u"", u""),
12
+    (u"bn", u"বাংলা", u"", u"Bengali"),
13
+    (u"br", u"Brezhoneg", u"", u"Breton"),
14
+    (u"bs", u"Bosnian", u"", u"Bosnian"),
19 15
     (u"ca", u"Català", u"", u"Catalan"),
20 16
     (u"ca-CT", u"Català", u"", u"Catalan"),
21 17
     (u"ca-ES", u"Català", u"Espanya", u"Catalan"),
22 18
     (u"ce", u"Нохчийн", u"", u"Chechen"),
23 19
     (u"ceb", u"Sinugboanong Binisaya", u"", u"Cebuano"),
24
-    (u"chr", u"ᏣᎳᎩ", u"", u""),
25
-    (u"ckb", u"Central Kurdish", u"", u""),
26
-    (u"co", u"Corsican", u"", u""),
27
-    (u"crs", u"Seychellois Creole", u"", u""),
28 20
     (u"cs-CZ", u"Čeština", u"Česko", u"Czech"),
29
-    (u"cy", u"Cymraeg", u"", u""),
21
+    (u"cy", u"Cymraeg", u"", u"Welsh"),
30 22
     (u"da-DK", u"Dansk", u"Danmark", u"Danish"),
31 23
     (u"de", u"Deutsch", u"", u"German"),
32 24
     (u"de-AT", u"Deutsch", u"Österreich", u"German"),
33 25
     (u"de-CH", u"Deutsch", u"Schweiz", u"German"),
34 26
     (u"de-DE", u"Deutsch", u"Deutschland", u"German"),
35
-    (u"ee", u"Eʋegbe", u"", u""),
36 27
     (u"el-GR", u"Ελληνικά", u"Ελλάδα", u"Greek"),
37 28
     (u"en", u"English", u"", u"English"),
38 29
     (u"en-AU", u"English", u"Australia", u"English"),
@@ -60,30 +51,20 @@ language_codes = (
60 51
     (u"eu", u"Euskara", u"", u"Basque"),
61 52
     (u"fa", u"فارسی", u"", u"Persian"),
62 53
     (u"fi-FI", u"Suomi", u"Suomi", u"Finnish"),
63
-    (u"fo", u"Føroyskt", u"", u""),
64 54
     (u"fr", u"Français", u"", u"French"),
65 55
     (u"fr-BE", u"Français", u"Belgique", u"French"),
66 56
     (u"fr-CA", u"Français", u"Canada", u"French"),
67 57
     (u"fr-CH", u"Français", u"Suisse", u"French"),
68 58
     (u"fr-FR", u"Français", u"France", u"French"),
69
-    (u"fy", u"West-Frysk", u"", u""),
70
-    (u"ga", u"Gaeilge", u"", u""),
71
-    (u"gaa", u"Ga", u"", u""),
72
-    (u"gd", u"Gàidhlig", u"", u""),
59
+    (u"ga", u"Gaeilge", u"", u"Irish"),
73 60
     (u"gl", u"Galego", u"", u"Galician"),
74
-    (u"gn", u"Guarani", u"", u""),
75
-    (u"gu", u"ગુજરાતી", u"", u""),
76
-    (u"ha", u"Hausa", u"", u""),
77
-    (u"haw", u"ʻŌlelo HawaiʻI", u"", u""),
61
+    (u"gu", u"ગુજરાતી", u"", u"Gujarati"),
78 62
     (u"he-IL", u"עברית", u"ישראל", u"Hebrew"),
79 63
     (u"hi", u"हिन्दी", u"", u"Hindi"),
80 64
     (u"hr-HR", u"Hrvatski", u"Hrvatska", u"Croatian"),
81
-    (u"ht", u"Haitian Creole", u"", u""),
82 65
     (u"hu-HU", u"Magyar", u"Magyarország", u"Hungarian"),
83 66
     (u"hy", u"Հայերեն", u"", u"Armenian"),
84
-    (u"ia", u"Interlingua", u"", u""),
85 67
     (u"id-ID", u"Bahasa Indonesia", u"Indonesia", u"Indonesian"),
86
-    (u"ig", u"Igbo", u"", u""),
87 68
     (u"is", u"Íslenska", u"", u""),
88 69
     (u"it", u"Italiano", u"", u"Italian"),
89 70
     (u"it-CH", u"Italiano", u"Svizzera", u"Italian"),
@@ -91,86 +72,48 @@ language_codes = (
91 72
     (u"iw", u"עברית", u"", u""),
92 73
     (u"ja-JP", u"日本語", u"日本", u"Japanese"),
93 74
     (u"ka", u"ქართული", u"", u"Georgian"),
94
-    (u"kg", u"Kongo", u"", u""),
95 75
     (u"kk", u"Қазақша", u"", u"Kazakh"),
96
-    (u"km", u"ខ្មែរ", u"", u""),
97
-    (u"kn", u"ಕನ್ನಡ", u"", u""),
76
+    (u"kn", u"ಕನ್ನಡ", u"", u"Kannada"),
98 77
     (u"ko-KR", u"한국어", u"대한민국", u"Korean"),
99
-    (u"kri", u"Krio", u"", u""),
100
-    (u"ky", u"Кыргызча", u"", u""),
101 78
     (u"la", u"Latina", u"", u"Latin"),
102
-    (u"lg", u"Luganda", u"", u""),
103
-    (u"ln", u"Lingála", u"", u""),
104
-    (u"lo", u"ລາວ", u"", u""),
105
-    (u"loz", u"Lozi", u"", u""),
106 79
     (u"lt-LT", u"Lietuvių", u"Lietuva", u"Lithuanian"),
107
-    (u"lua", u"Luba-Lulua", u"", u""),
108 80
     (u"lv-LV", u"Latviešu", u"Latvijas Republika", u""),
109
-    (u"mfe", u"Kreol Morisien", u"", u""),
110
-    (u"mg", u"Malagasy", u"", u""),
111
-    (u"mi", u"Maori", u"", u""),
81
+    (u"mi", u"Reo Māori", u"", u"Maori"),
112 82
     (u"min", u"Minangkabau", u"", u"Minangkabau"),
113
-    (u"mk", u"Македонски", u"", u""),
114
-    (u"ml", u"മലയാളം", u"", u""),
115
-    (u"mn", u"Монгол", u"", u""),
116
-    (u"mr", u"मराठी", u"", u""),
83
+    (u"mk", u"Македонски", u"", u"Macedonian"),
84
+    (u"mn", u"Монгол", u"", u"Mongolian"),
85
+    (u"mr", u"मराठी", u"", u"Marathi"),
117 86
     (u"ms-MY", u"Bahasa Melayu", u"Malaysia", u"Malay"),
118
-    (u"mt", u"Malti", u"", u""),
119
-    (u"my", u"ဗမာ", u"", u""),
87
+    (u"mt", u"Malti", u"", u"Maltese"),
120 88
     (u"nb-NO", u"Norwegian Bokmål", u"Norge", u"Norwegian Bokmål"),
121
-    (u"ne", u"नेपाली", u"", u""),
122 89
     (u"nl", u"Nederlands", u"", u"Dutch"),
123 90
     (u"nl-BE", u"Nederlands", u"België", u"Dutch"),
124 91
     (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
125 92
     (u"nn", u"Nynorsk", u"", u"Norwegian"),
126 93
     (u"no-NO", u"Norsk", u"Norge", u"Norwegian"),
127
-    (u"nso", u"Northern Sotho", u"", u""),
128
-    (u"ny", u"Nyanja", u"", u""),
129
-    (u"nyn", u"Runyankore", u"", u""),
130
-    (u"oc", u"Occitan", u"", u""),
131
-    (u"om", u"Oromoo", u"", u""),
132
-    (u"or", u"ଓଡ଼ିଆ", u"", u""),
133
-    (u"pa", u"ਪੰਜਾਬੀ", u"", u""),
134
-    (u"pcm", u"Nigerian Pidgin", u"", u""),
94
+    (u"oc", u"Occitan", u"", u"Occitan"),
95
+    (u"or", u"Oriya", u"", u"Oriya"),
96
+    (u"pa", u"ਪੰਜਾਬੀ", u"", u"Panjabi"),
135 97
     (u"pl-PL", u"Polski", u"Rzeczpospolita Polska", u"Polish"),
136
-    (u"ps", u"پښتو", u"", u""),
98
+    (u"ps", u"Pushto", u"", u"Pushto"),
137 99
     (u"pt", u"Português", u"", u"Portuguese"),
138 100
     (u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
139 101
     (u"pt-PT", u"Português", u"Portugal", u"Portuguese"),
140
-    (u"qu", u"Runasimi", u"", u""),
141
-    (u"rm", u"Rumantsch", u"", u""),
142
-    (u"rn", u"Ikirundi", u"", u""),
143 102
     (u"ro-RO", u"Română", u"România", u"Romanian"),
144 103
     (u"ru-RU", u"Русский", u"Россия", u"Russian"),
145
-    (u"rw", u"Kinyarwanda", u"", u""),
146
-    (u"sd", u"Sindhi", u"", u""),
104
+    (u"rw", u"Ikinyarwanda", u"", u"Kinyarwanda"),
147 105
     (u"sh", u"Srpskohrvatski / Српскохрватски", u"", u"Serbo-Croatian"),
148
-    (u"si", u"සිංහල", u"", u""),
149 106
     (u"sk-SK", u"Slovenčina", u"Slovenská republika", u"Slovak"),
150 107
     (u"sl", u"Slovenščina", u"", u"Slovenian"),
151
-    (u"sn", u"Chishona", u"", u""),
152
-    (u"so", u"Soomaali", u"", u""),
153
-    (u"sq", u"Shqip", u"", u""),
154 108
     (u"sr", u"Српски / Srpski", u"", u"Serbian"),
155
-    (u"st", u"Southern Sotho", u"", u""),
156
-    (u"su", u"Sundanese", u"", u""),
157 109
     (u"sv-SE", u"Svenska", u"Sverige", u"Swedish"),
158 110
     (u"sw", u"Kiswahili", u"", u""),
159
-    (u"ta", u"தமிழ்", u"", u""),
160
-    (u"te", u"తెలుగు", u"", u""),
161
-    (u"tg", u"Tajik", u"", u""),
111
+    (u"ta", u"தமிழ்", u"", u"Tamil"),
162 112
     (u"th-TH", u"ไทย", u"ไทย", u"Thai"),
163
-    (u"ti", u"ትግርኛ", u"", u""),
164
-    (u"tk", u"Turkmen", u"", u""),
113
+    (u"ti", u"ትግርኛ", u"", u"Tigrinya"),
165 114
     (u"tl-PH", u"Filipino", u"Pilipinas", u""),
166
-    (u"tlh", u"Klingon", u"", u""),
167
-    (u"tn", u"Tswana", u"", u""),
168
-    (u"to", u"Lea Fakatonga", u"", u""),
169 115
     (u"tr-TR", u"Türkçe", u"Türkiye", u"Turkish"),
170
-    (u"tt", u"Tatar", u"", u""),
171
-    (u"tum", u"Tumbuka", u"", u""),
172
-    (u"tw", u"Twi", u"", u""),
173
-    (u"ug", u"ئۇيغۇرچە", u"", u""),
116
+    (u"tt", u"Татарча", u"", u"Tatar"),
174 117
     (u"uk-UA", u"Українська", u"Україна", u"Ukrainian"),
175 118
     (u"ur", u"اردو", u"", u"Urdu"),
176 119
     (u"uz", u"O‘zbek", u"", u"Uzbek"),
@@ -179,13 +122,10 @@ language_codes = (
179 122
     (u"vo", u"Volapük", u"", u"Volapük"),
180 123
     (u"wa", u"Walon", u"", u"Walloon"),
181 124
     (u"war", u"Winaray", u"", u"Waray-Waray"),
182
-    (u"wo", u"Wolof", u"", u""),
183
-    (u"xh", u"Xhosa", u"", u""),
184
-    (u"yi", u"ייִדיש", u"", u""),
185
-    (u"yo", u"Èdè Yorùbá", u"", u""),
125
+    (u"xh", u"Xhosa", u"", u"Xhosa"),
186 126
     (u"zh", u"中文", u"", u"Chinese"),
187
-    (u"zh-CN", u"中文", u"中国", u"Chinese"),
127
+    (u"zh-CN", u"中文", u"中国", u""),
188 128
     (u"zh-HK", u"中文", u"香港", u"Chinese"),
189
-    (u"zh-TW", u"中文", u"台湾", u"Chinese"),
190
-    (u"zu", u"Isizulu", u"", u"")
129
+    (u"zh-TW", u"中文", u"台湾", u""),
130
+    (u"zu", u"Isi-Zulu", u"", u"Zulu")
191 131
 )

+ 32
- 0
tests/unit/engines/test_bing.py View File

@@ -86,3 +86,35 @@ class TestBingEngine(SearxTestCase):
86 86
         self.assertEqual(results[0]['title'], 'This should be the title')
87 87
         self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
88 88
         self.assertEqual(results[0]['content'], 'This should be the content.')
89
+
90
+    def test_fetch_supported_languages(self):
91
+        html = """<html></html>"""
92
+        response = mock.Mock(text=html)
93
+        results = bing._fetch_supported_languages(response)
94
+        self.assertEqual(type(results), list)
95
+        self.assertEqual(len(results), 0)
96
+
97
+        html = """
98
+        <html>
99
+            <body>
100
+                <form>
101
+                    <div id="limit-languages">
102
+                        <div>
103
+                            <div><input id="es" value="es"></input></div>
104
+                        </div>
105
+                        <div>
106
+                            <div><input id="pt_BR" value="pt_BR"></input></div>
107
+                            <div><input id="pt_PT" value="pt_PT"></input></div>
108
+                        </div>
109
+                    </div>
110
+                </form>
111
+            </body>
112
+        </html>
113
+        """
114
+        response = mock.Mock(text=html)
115
+        languages = bing._fetch_supported_languages(response)
116
+        self.assertEqual(type(languages), list)
117
+        self.assertEqual(len(languages), 3)
118
+        self.assertIn('es', languages)
119
+        self.assertIn('pt-BR', languages)
120
+        self.assertIn('pt-PT', languages)

+ 37
- 0
tests/unit/engines/test_dailymotion.py View File

@@ -1,3 +1,4 @@
1
+# -*- coding: utf-8 -*-
1 2
 from collections import defaultdict
2 3
 import mock
3 4
 from searx.engines import dailymotion
@@ -72,3 +73,39 @@ class TestDailymotionEngine(SearxTestCase):
72 73
         results = dailymotion.response(response)
73 74
         self.assertEqual(type(results), list)
74 75
         self.assertEqual(len(results), 0)
76
+
77
+    def test_fetch_supported_languages(self):
78
+        json = r"""
79
+        {"list":[{"code":"af","name":"Afrikaans","native_name":"Afrikaans",
80
+                  "localized_name":"Afrikaans","display_name":"Afrikaans"},
81
+                 {"code":"ar","name":"Arabic","native_name":"\u0627\u0644\u0639\u0631\u0628\u064a\u0629",
82
+                  "localized_name":"Arabic","display_name":"Arabic"},
83
+                 {"code":"la","name":"Latin","native_name":null,
84
+                  "localized_name":"Latin","display_name":"Latin"}
85
+        ]}
86
+        """
87
+        response = mock.Mock(text=json)
88
+        languages = dailymotion._fetch_supported_languages(response)
89
+        self.assertEqual(type(languages), dict)
90
+        self.assertEqual(len(languages), 3)
91
+        self.assertIn('af', languages)
92
+        self.assertIn('ar', languages)
93
+        self.assertIn('la', languages)
94
+
95
+        self.assertEqual(type(languages['af']), dict)
96
+        self.assertEqual(type(languages['ar']), dict)
97
+        self.assertEqual(type(languages['la']), dict)
98
+
99
+        self.assertIn('name', languages['af'])
100
+        self.assertIn('name', languages['ar'])
101
+        self.assertNotIn('name', languages['la'])
102
+
103
+        self.assertIn('english_name', languages['af'])
104
+        self.assertIn('english_name', languages['ar'])
105
+        self.assertIn('english_name', languages['la'])
106
+
107
+        self.assertEqual(languages['af']['name'], 'Afrikaans')
108
+        self.assertEqual(languages['af']['english_name'], 'Afrikaans')
109
+        self.assertEqual(languages['ar']['name'], u'العربية')
110
+        self.assertEqual(languages['ar']['english_name'], 'Arabic')
111
+        self.assertEqual(languages['la']['english_name'], 'Latin')

+ 14
- 0
tests/unit/engines/test_duckduckgo.py View File

@@ -84,3 +84,17 @@ class TestDuckduckgoEngine(SearxTestCase):
84 84
         self.assertEqual(results[0]['title'], 'This is the title')
85 85
         self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű')
86 86
         self.assertEqual(results[0]['content'], 'This should be the content.')
87
+
88
+    def test_fetch_supported_languages(self):
89
+        js = """some code...regions:{
90
+        "wt-wt":"All Results","ar-es":"Argentina","au-en":"Australia","at-de":"Austria","be-fr":"Belgium (fr)"
91
+        }some more code..."""
92
+        response = mock.Mock(text=js)
93
+        languages = duckduckgo._fetch_supported_languages(response)
94
+        self.assertEqual(type(languages), list)
95
+        self.assertEqual(len(languages), 5)
96
+        self.assertIn('wt-WT', languages)
97
+        self.assertIn('es-AR', languages)
98
+        self.assertIn('en-AU', languages)
99
+        self.assertIn('de-AT', languages)
100
+        self.assertIn('fr-BE', languages)

+ 25
- 0
tests/unit/engines/test_gigablast.py View File

@@ -89,3 +89,28 @@ class TestGigablastEngine(SearxTestCase):
89 89
         self.assertEqual(results[0]['title'], 'South by Southwest 2016')
90 90
         self.assertEqual(results[0]['url'], 'www.sxsw.com')
91 91
         self.assertEqual(results[0]['content'], 'This should be the content.')
92
+
93
+    def test_fetch_supported_languages(self):
94
+        html = """<html></html>"""
95
+        response = mock.Mock(text=html)
96
+        results = gigablast._fetch_supported_languages(response)
97
+        self.assertEqual(type(results), list)
98
+        self.assertEqual(len(results), 0)
99
+
100
+        html = """
101
+        <html>
102
+            <body>
103
+                <span id="menu2">
104
+                    <a href="/search?&rxikd=1&qlang=xx"></a>
105
+                    <a href="/search?&rxikd=1&qlang=en"></a>
106
+                    <a href="/search?&rxikd=1&qlang=fr"></a>
107
+                </span>
108
+            </body>
109
+        </html>
110
+        """
111
+        response = mock.Mock(text=html)
112
+        languages = gigablast._fetch_supported_languages(response)
113
+        self.assertEqual(type(languages), list)
114
+        self.assertEqual(len(languages), 2)
115
+        self.assertIn('en', languages)
116
+        self.assertIn('fr', languages)

+ 57
- 0
tests/unit/engines/test_google.py View File

@@ -177,3 +177,60 @@ class TestGoogleEngine(SearxTestCase):
177 177
         self.assertEqual(results[0]['title'], '')
178 178
         self.assertEqual(results[0]['content'], '')
179 179
         self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg')
180
+
181
+    def test_fetch_supported_languages(self):
182
+        html = """<html></html>"""
183
+        response = mock.Mock(text=html)
184
+        languages = google._fetch_supported_languages(response)
185
+        self.assertEqual(type(languages), dict)
186
+        self.assertEqual(len(languages), 0)
187
+
188
+        html = u"""
189
+        <html>
190
+            <body>
191
+                <table>
192
+                    <tbody>
193
+                        <tr>
194
+                            <td>
195
+                                <font>
196
+                                    <label>
197
+                                        <span id="ten">English</span>
198
+                                    </label>
199
+                                </font>
200
+                            </td>
201
+                            <td>
202
+                                <font>
203
+                                    <label>
204
+                                        <span id="tzh-CN">中文 (简体)</span>
205
+                                    </label>
206
+                                    <label>
207
+                                        <span id="tzh-TW">中文 (繁體)</span>
208
+                                    </label>
209
+                                </font>
210
+                            </td>
211
+                        </tr>
212
+                    </tbody>
213
+                </table>
214
+            </body>
215
+        </html>
216
+        """
217
+        response = mock.Mock(text=html)
218
+        languages = google._fetch_supported_languages(response)
219
+        self.assertEqual(type(languages), dict)
220
+        self.assertEqual(len(languages), 3)
221
+
222
+        self.assertIn('en', languages)
223
+        self.assertIn('zh-CN', languages)
224
+        self.assertIn('zh-TW', languages)
225
+
226
+        self.assertEquals(type(languages['en']), dict)
227
+        self.assertEquals(type(languages['zh-CN']), dict)
228
+        self.assertEquals(type(languages['zh-TW']), dict)
229
+
230
+        self.assertIn('name', languages['en'])
231
+        self.assertIn('name', languages['zh-CN'])
232
+        self.assertIn('name', languages['zh-TW'])
233
+
234
+        self.assertEquals(languages['en']['name'], 'English')
235
+        self.assertEquals(languages['zh-CN']['name'], u'中文 (简体)')
236
+        self.assertEquals(languages['zh-TW']['name'], u'中文 (繁體)')

+ 27
- 0
tests/unit/engines/test_swisscows.py View File

@@ -126,3 +126,30 @@ class TestSwisscowsEngine(SearxTestCase):
126 126
         self.assertEqual(results[2]['url'], 'http://de.wikipedia.org/wiki/Datei:This should.svg')
127 127
         self.assertEqual(results[2]['img_src'], 'http://ts2.mm.This/should.png')
128 128
         self.assertEqual(results[2]['template'], 'images.html')
129
+
130
+    def test_fetch_supported_languages(self):
131
+        html = """<html></html>"""
132
+        response = mock.Mock(text=html)
133
+        languages = swisscows._fetch_supported_languages(response)
134
+        self.assertEqual(type(languages), list)
135
+        self.assertEqual(len(languages), 0)
136
+
137
+        html = """
138
+        <html>
139
+            <div id="regions-popup">
140
+                <div>
141
+                    <ul>
142
+                        <li><a data-val="browser"></a></li>
143
+                        <li><a data-val="de-CH"></a></li>
144
+                        <li><a data-val="fr-CH"></a></li>
145
+                    </ul>
146
+                </div>
147
+            </div>
148
+        </html>
149
+        """
150
+        response = mock.Mock(text=html)
151
+        languages = swisscows._fetch_supported_languages(response)
152
+        self.assertEqual(type(languages), list)
153
+        self.assertEqual(len(languages), 3)
154
+        self.assertIn('de-CH', languages)
155
+        self.assertIn('fr-CH', languages)

+ 93
- 0
tests/unit/engines/test_wikipedia.py View File

@@ -164,3 +164,96 @@ class TestWikipediaEngine(SearxTestCase):
164 164
         self.assertEqual(len(results), 2)
165 165
         self.assertEqual(results[1]['infobox'], u'披頭四樂隊')
166 166
         self.assertIn(u'披头士乐队...', results[1]['content'])
167
+
168
+    def test_fetch_supported_languages(self):
169
+        html = u"""<html></html>"""
170
+        response = mock.Mock(text=html)
171
+        languages = wikipedia._fetch_supported_languages(response)
172
+        self.assertEqual(type(languages), dict)
173
+        self.assertEqual(len(languages), 0)
174
+
175
+        html = u"""
176
+        <html>
177
+            <body>
178
+                <div>
179
+                    <div>
180
+                        <h3>Table header</h3>
181
+                        <table class="sortable jquery-tablesorter">
182
+                            <thead>
183
+                                <tr>
184
+                                    <th>N</th>
185
+                                    <th>Language</th>
186
+                                    <th>Language (local)</th>
187
+                                    <th>Wiki</th>
188
+                                    <th>Articles</th>
189
+                                </tr>
190
+                            </thead>
191
+                            <tbody>
192
+                                <tr>
193
+                                    <td>2</td>
194
+                                    <td><a>Swedish</a></td>
195
+                                    <td><a>Svenska</a></td>
196
+                                    <td><a>sv</a></td>
197
+                                    <td><a><b>3000000</b></a></td>
198
+                                </tr>
199
+                                <tr>
200
+                                    <td>3</td>
201
+                                    <td><a>Cebuano</a></td>
202
+                                    <td><a>Sinugboanong Binisaya</a></td>
203
+                                    <td><a>ceb</a></td>
204
+                                    <td><a><b>3000000</b></a></td>
205
+                                </tr>
206
+                            </tbody>
207
+                        </table>
208
+                        <h3>Table header</h3>
209
+                        <table class="sortable jquery-tablesorter">
210
+                            <thead>
211
+                                <tr>
212
+                                    <th>N</th>
213
+                                    <th>Language</th>
214
+                                    <th>Language (local)</th>
215
+                                    <th>Wiki</th>
216
+                                    <th>Articles</th>
217
+                                </tr>
218
+                            </thead>
219
+                            <tbody>
220
+                                <tr>
221
+                                    <td>2</td>
222
+                                    <td><a>Norwegian (Bokmål)</a></td>
223
+                                    <td><a>Norsk (Bokmål)</a></td>
224
+                                    <td><a>no</a></td>
225
+                                    <td><a><b>100000</b></a></td>
226
+                                </tr>
227
+                            </tbody>
228
+                        </table>
229
+                    </div>
230
+                </div>
231
+            </body>
232
+        </html>
233
+        """
234
+        response = mock.Mock(text=html)
235
+        languages = wikipedia._fetch_supported_languages(response)
236
+        self.assertEqual(type(languages), dict)
237
+        self.assertEqual(len(languages), 3)
238
+
239
+        self.assertIn('sv', languages)
240
+        self.assertIn('ceb', languages)
241
+        self.assertIn('no', languages)
242
+
243
+        self.assertEqual(type(languages['sv']), dict)
244
+        self.assertEqual(type(languages['ceb']), dict)
245
+        self.assertEqual(type(languages['no']), dict)
246
+
247
+        self.assertIn('name', languages['sv'])
248
+        self.assertIn('english_name', languages['sv'])
249
+        self.assertIn('articles', languages['sv'])
250
+
251
+        self.assertEqual(languages['sv']['name'], 'Svenska')
252
+        self.assertEqual(languages['sv']['english_name'], 'Swedish')
253
+        self.assertEqual(languages['sv']['articles'], 3000000)
254
+        self.assertEqual(languages['ceb']['name'], 'Sinugboanong Binisaya')
255
+        self.assertEqual(languages['ceb']['english_name'], 'Cebuano')
256
+        self.assertEqual(languages['ceb']['articles'], 3000000)
257
+        self.assertEqual(languages['no']['name'], u'Norsk (Bokmål)')
258
+        self.assertEqual(languages['no']['english_name'], u'Norwegian (Bokmål)')
259
+        self.assertEqual(languages['no']['articles'], 100000)

+ 30
- 0
tests/unit/engines/test_yahoo.py View File

@@ -147,3 +147,33 @@ class TestYahooEngine(SearxTestCase):
147 147
         results = yahoo.response(response)
148 148
         self.assertEqual(type(results), list)
149 149
         self.assertEqual(len(results), 0)
150
+
151
+    def test_fetch_supported_languages(self):
152
+        html = """<html></html>"""
153
+        response = mock.Mock(text=html)
154
+        results = yahoo._fetch_supported_languages(response)
155
+        self.assertEqual(type(results), list)
156
+        self.assertEqual(len(results), 0)
157
+
158
+        html = """
159
+        <html>
160
+            <div>
161
+                <div id="yschlang">
162
+                    <span>
163
+                        <label><input value="lang_ar"></input></label>
164
+                    </span>
165
+                    <span>
166
+                        <label><input value="lang_zh_chs"></input></label>
167
+                        <label><input value="lang_zh_cht"></input></label>
168
+                    </span>
169
+                </div>
170
+            </div>
171
+        </html>
172
+        """
173
+        response = mock.Mock(text=html)
174
+        languages = yahoo._fetch_supported_languages(response)
175
+        self.assertEqual(type(languages), list)
176
+        self.assertEqual(len(languages), 3)
177
+        self.assertIn('ar', languages)
178
+        self.assertIn('zh-chs', languages)
179
+        self.assertIn('zh-cht', languages)

+ 8
- 4
utils/fetch_languages.py View File

@@ -84,7 +84,7 @@ def fetch_supported_languages():
84 84
 
85 85
     # write json file
86 86
     f = io.open(engines_languages_file, "w", encoding="utf-8")
87
-    f.write(unicode(dumps(engines_languages, indent=4, ensure_ascii=False, encoding="utf-8")))
87
+    f.write(unicode(dumps(engines_languages, ensure_ascii=False, encoding="utf-8")))
88 88
     f.close()
89 89
 
90 90
 
@@ -110,18 +110,22 @@ def join_language_lists():
110 110
                 else:
111 111
                     languages[locale] = {}
112 112
 
113
-    # get locales that have no name yet
113
+    # get locales that have no name or country yet
114 114
     for locale in languages.keys():
115 115
         if not languages[locale].get('name'):
116
-            # try to get language and country names
116
+            # try to get language names
117 117
             name = languages.get(locale.split('-')[0], {}).get('name', None)
118 118
             if name:
119 119
                 languages[locale]['name'] = name
120
-                languages[locale]['country'] = get_country_name(locale) or ''
121 120
                 languages[locale]['english_name'] = languages.get(locale.split('-')[0], {}).get('english_name', '')
122 121
             else:
123 122
                 # filter out locales with no name
124 123
                 del languages[locale]
124
+                continue
125
+
126
+        # try to get country name
127
+        if locale.find('-') > 0 and not languages[locale].get('country'):
128
+            languages[locale]['country'] = get_country_name(locale) or ''
125 129
 
126 130
 
127 131
 # Remove countryless language if language is featured in only one country.