浏览代码

tests for _fetch_supported_languages in engines

marc 8 年前
父节点
当前提交
af35eee10b

+ 1
- 3256
searx/data/engines_languages.json
文件差异内容过多而无法显示
查看文件


二进制
searx/engines/.yandex.py.swp 查看文件


+ 10
- 3
searx/engines/__init__.py 查看文件

21
 from flask_babel import gettext
21
 from flask_babel import gettext
22
 from operator import itemgetter
22
 from operator import itemgetter
23
 from json import loads
23
 from json import loads
24
+from requests import get
24
 from searx import settings
25
 from searx import settings
25
 from searx import logger
26
 from searx import logger
26
 from searx.utils import load_module
27
 from searx.utils import load_module
79
         if not hasattr(engine, arg_name):
80
         if not hasattr(engine, arg_name):
80
             setattr(engine, arg_name, arg_value)
81
             setattr(engine, arg_name, arg_value)
81
 
82
 
82
-    if engine_data['name'] in languages:
83
-        setattr(engine, 'supported_languages', languages[engine_data['name']])
84
-
85
     # checking required variables
83
     # checking required variables
86
     for engine_attr in dir(engine):
84
     for engine_attr in dir(engine):
87
         if engine_attr.startswith('_'):
85
         if engine_attr.startswith('_'):
91
                          .format(engine.name, engine_attr))
89
                          .format(engine.name, engine_attr))
92
             sys.exit(1)
90
             sys.exit(1)
93
 
91
 
92
+    # assign supported languages from json file
93
+    if engine_data['name'] in languages:
94
+        setattr(engine, 'supported_languages', languages[engine_data['name']])
95
+
96
+    # assign language fetching method if auxiliary method exists
97
+    if hasattr(engine, '_fetch_supported_languages'):
98
+        setattr(engine, 'fetch_supported_languages',
99
+                lambda: engine._fetch_supported_languages(get(engine.supported_languages_url)))
100
+
94
     engine.stats = {
101
     engine.stats = {
95
         'result_count': 0,
102
         'result_count': 0,
96
         'search_count': 0,
103
         'search_count': 0,

+ 2
- 4
searx/engines/bing.py 查看文件

15
 
15
 
16
 from urllib import urlencode
16
 from urllib import urlencode
17
 from lxml import html
17
 from lxml import html
18
-from requests import get
19
 from searx.engines.xpath import extract_text
18
 from searx.engines.xpath import extract_text
20
 
19
 
21
 # engine dependent config
20
 # engine dependent config
86
 
85
 
87
 
86
 
88
 # get supported languages from their site
87
 # get supported languages from their site
89
-def fetch_supported_languages():
88
+def _fetch_supported_languages(resp):
90
     supported_languages = []
89
     supported_languages = []
91
-    response = get(supported_languages_url)
92
-    dom = html.fromstring(response.text)
90
+    dom = html.fromstring(resp.text)
93
     options = dom.xpath('//div[@id="limit-languages"]//input')
91
     options = dom.xpath('//div[@id="limit-languages"]//input')
94
     for option in options:
92
     for option in options:
95
         code = option.xpath('./@id')[0].replace('_', '-')
93
         code = option.xpath('./@id')[0].replace('_', '-')

+ 1
- 1
searx/engines/bing_images.py 查看文件

19
 from lxml import html
19
 from lxml import html
20
 from json import loads
20
 from json import loads
21
 import re
21
 import re
22
-from searx.engines.bing import fetch_supported_languages
22
+from searx.engines.bing import _fetch_supported_languages, supported_languages_url
23
 
23
 
24
 # engine dependent config
24
 # engine dependent config
25
 categories = ['images']
25
 categories = ['images']

+ 1
- 1
searx/engines/bing_news.py 查看文件

17
 from dateutil import parser
17
 from dateutil import parser
18
 from lxml import etree
18
 from lxml import etree
19
 from searx.utils import list_get
19
 from searx.utils import list_get
20
-from searx.engines.bing import fetch_supported_languages
20
+from searx.engines.bing import _fetch_supported_languages, supported_languages_url
21
 
21
 
22
 # engine dependent config
22
 # engine dependent config
23
 categories = ['news']
23
 categories = ['news']

+ 2
- 3
searx/engines/dailymotion.py 查看文件

80
 
80
 
81
 
81
 
82
 # get supported languages from their site
82
 # get supported languages from their site
83
-def fetch_supported_languages():
83
+def _fetch_supported_languages(resp):
84
     supported_languages = {}
84
     supported_languages = {}
85
 
85
 
86
-    response = get(supported_languages_url)
87
-    response_json = loads(response.text)
86
+    response_json = loads(resp.text)
88
 
87
 
89
     for language in response_json['list']:
88
     for language in response_json['list']:
90
         supported_languages[language['code']] = {}
89
         supported_languages[language['code']] = {}

+ 2
- 3
searx/engines/duckduckgo.py 查看文件

119
 
119
 
120
 
120
 
121
 # get supported languages from their site
121
 # get supported languages from their site
122
-def fetch_supported_languages():
123
-    response = get(supported_languages_url)
122
+def _fetch_supported_languages(resp):
124
 
123
 
125
     # response is a js file with regions as an embedded object
124
     # response is a js file with regions as an embedded object
126
-    response_page = response.text
125
+    response_page = resp.text
127
     response_page = response_page[response_page.find('regions:{') + 8:]
126
     response_page = response_page[response_page.find('regions:{') + 8:]
128
     response_page = response_page[:response_page.find('}') + 1]
127
     response_page = response_page[:response_page.find('}') + 1]
129
 
128
 

+ 1
- 1
searx/engines/duckduckgo_definitions.py 查看文件

4
 from lxml import html
4
 from lxml import html
5
 from searx.utils import html_to_text
5
 from searx.utils import html_to_text
6
 from searx.engines.xpath import extract_text
6
 from searx.engines.xpath import extract_text
7
-from searx.engines.duckduckgo import fetch_supported_languages
7
+from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
8
 
8
 
9
 url = 'https://api.duckduckgo.com/'\
9
 url = 'https://api.duckduckgo.com/'\
10
     + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
10
     + '?{query}&format=json&pretty=0&no_redirect=1&d=1'

+ 2
- 4
searx/engines/gigablast.py 查看文件

14
 from random import randint
14
 from random import randint
15
 from time import time
15
 from time import time
16
 from urllib import urlencode
16
 from urllib import urlencode
17
-from requests import get
18
 from lxml.html import fromstring
17
 from lxml.html import fromstring
19
 
18
 
20
 # engine dependent config
19
 # engine dependent config
91
 
90
 
92
 
91
 
93
 # get supported languages from their site
92
 # get supported languages from their site
94
-def fetch_supported_languages():
93
+def _fetch_supported_languages(resp):
95
     supported_languages = []
94
     supported_languages = []
96
-    response = get(supported_languages_url)
97
-    dom = fromstring(response.text)
95
+    dom = fromstring(resp.text)
98
     links = dom.xpath('//span[@id="menu2"]/a')
96
     links = dom.xpath('//span[@id="menu2"]/a')
99
     for link in links:
97
     for link in links:
100
         code = link.xpath('./@href')[0][-2:]
98
         code = link.xpath('./@href')[0][-2:]

+ 5
- 7
searx/engines/google.py 查看文件

12
 from urllib import urlencode
12
 from urllib import urlencode
13
 from urlparse import urlparse, parse_qsl
13
 from urlparse import urlparse, parse_qsl
14
 from lxml import html, etree
14
 from lxml import html, etree
15
-from requests import get
16
 from searx.engines.xpath import extract_text, extract_url
15
 from searx.engines.xpath import extract_text, extract_url
17
 from searx.search import logger
16
 from searx.search import logger
18
 
17
 
364
 
363
 
365
 
364
 
366
 # get supported languages from their site
365
 # get supported languages from their site
367
-def fetch_supported_languages():
366
+def _fetch_supported_languages(resp):
368
     supported_languages = {}
367
     supported_languages = {}
369
-    response = get(supported_languages_url)
370
-    dom = html.fromstring(response.text)
371
-    options = dom.xpath('//select[@name="hl"]/option')
368
+    dom = html.fromstring(resp.text)
369
+    options = dom.xpath('//table//td/font/label/span')
372
     for option in options:
370
     for option in options:
373
-        code = option.xpath('./@value')[0].split('-')[0]
374
-        name = option.text[:-1].title()
371
+        code = option.xpath('./@id')[0][1:]
372
+        name = option.text.title()
375
         supported_languages[code] = {"name": name}
373
         supported_languages[code] = {"name": name}
376
 
374
 
377
     return supported_languages
375
     return supported_languages

+ 1
- 1
searx/engines/google_news.py 查看文件

13
 from lxml import html
13
 from lxml import html
14
 from urllib import urlencode
14
 from urllib import urlencode
15
 from json import loads
15
 from json import loads
16
-from searx.engines.google import fetch_supported_languages
16
+from searx.engines.google import _fetch_supported_languages, supported_languages_url
17
 
17
 
18
 # search-url
18
 # search-url
19
 categories = ['news']
19
 categories = ['news']

+ 4
- 4
searx/engines/swisscows.py 查看文件

13
 from json import loads
13
 from json import loads
14
 from urllib import urlencode, unquote
14
 from urllib import urlencode, unquote
15
 import re
15
 import re
16
-from requests import get
17
 from lxml.html import fromstring
16
 from lxml.html import fromstring
18
 
17
 
19
 # engine dependent config
18
 # engine dependent config
25
 base_url = 'https://swisscows.ch/'
24
 base_url = 'https://swisscows.ch/'
26
 search_string = '?{query}&page={page}'
25
 search_string = '?{query}&page={page}'
27
 
26
 
27
+supported_languages_url = base_url
28
+
28
 # regex
29
 # regex
29
 regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
30
 regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
30
 regex_json_remove_start = re.compile(r'^initialData:\s*')
31
 regex_json_remove_start = re.compile(r'^initialData:\s*')
113
 
114
 
114
 
115
 
115
 # get supported languages from their site
116
 # get supported languages from their site
116
-def fetch_supported_languages():
117
+def _fetch_supported_languages(resp):
117
     supported_languages = []
118
     supported_languages = []
118
-    response = get(base_url)
119
-    dom = fromstring(response.text)
119
+    dom = fromstring(resp.text)
120
     options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
120
     options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
121
     for option in options:
121
     for option in options:
122
         code = option.xpath('./@data-val')[0]
122
         code = option.xpath('./@data-val')[0]

+ 1
- 1
searx/engines/wikidata.py 查看文件

15
 from searx.poolrequests import get
15
 from searx.poolrequests import get
16
 from searx.engines.xpath import extract_text
16
 from searx.engines.xpath import extract_text
17
 from searx.utils import format_date_by_locale
17
 from searx.utils import format_date_by_locale
18
-from searx.engines.wikipedia import fetch_supported_languages
18
+from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
19
 
19
 
20
 from json import loads
20
 from json import loads
21
 from lxml.html import fromstring
21
 from lxml.html import fromstring

+ 2
- 4
searx/engines/wikipedia.py 查看文件

12
 
12
 
13
 from json import loads
13
 from json import loads
14
 from urllib import urlencode, quote
14
 from urllib import urlencode, quote
15
-from requests import get
16
 from lxml.html import fromstring
15
 from lxml.html import fromstring
17
 
16
 
18
 
17
 
119
 
118
 
120
 
119
 
121
 # get supported languages from their site
120
 # get supported languages from their site
122
-def fetch_supported_languages():
121
+def _fetch_supported_languages(resp):
123
     supported_languages = {}
122
     supported_languages = {}
124
-    response = get(supported_languages_url)
125
-    dom = fromstring(response.text)
123
+    dom = fromstring(resp.text)
126
     tables = dom.xpath('//table[contains(@class,"sortable")]')
124
     tables = dom.xpath('//table[contains(@class,"sortable")]')
127
     for table in tables:
125
     for table in tables:
128
         # exclude header row
126
         # exclude header row

+ 3
- 5
searx/engines/yahoo.py 查看文件

14
 from urllib import urlencode
14
 from urllib import urlencode
15
 from urlparse import unquote
15
 from urlparse import unquote
16
 from lxml import html
16
 from lxml import html
17
-from requests import get
18
 from searx.engines.xpath import extract_text, extract_url
17
 from searx.engines.xpath import extract_text, extract_url
19
 
18
 
20
 # engine dependent config
19
 # engine dependent config
144
 
143
 
145
 
144
 
146
 # get supported languages from their site
145
 # get supported languages from their site
147
-def fetch_supported_languages():
146
+def _fetch_supported_languages(resp):
148
     supported_languages = []
147
     supported_languages = []
149
-    response = get(supported_languages_url)
150
-    dom = html.fromstring(response.text)
148
+    dom = html.fromstring(resp.text)
151
     options = dom.xpath('//div[@id="yschlang"]/span/label/input')
149
     options = dom.xpath('//div[@id="yschlang"]/span/label/input')
152
     for option in options:
150
     for option in options:
153
-        code = option.xpath('./@value')[0][5:]
151
+        code = option.xpath('./@value')[0][5:].replace('_', '-')
154
         supported_languages.append(code)
152
         supported_languages.append(code)
155
 
153
 
156
     return supported_languages
154
     return supported_languages

+ 1
- 1
searx/engines/yahoo_news.py 查看文件

12
 from urllib import urlencode
12
 from urllib import urlencode
13
 from lxml import html
13
 from lxml import html
14
 from searx.engines.xpath import extract_text, extract_url
14
 from searx.engines.xpath import extract_text, extract_url
15
-from searx.engines.yahoo import parse_url, fetch_supported_languages
15
+from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url
16
 from datetime import datetime, timedelta
16
 from datetime import datetime, timedelta
17
 import re
17
 import re
18
 from dateutil import parser
18
 from dateutil import parser

+ 25
- 85
searx/languages.py 查看文件

3
 # this file is generated automatically by utils/update_search_languages.py
3
 # this file is generated automatically by utils/update_search_languages.py
4
 
4
 
5
 language_codes = (
5
 language_codes = (
6
-    (u"ach", u"Acoli", u"", u""),
7
     (u"af", u"Afrikaans", u"", u""),
6
     (u"af", u"Afrikaans", u"", u""),
8
-    (u"ak", u"Akan", u"", u""),
9
-    (u"am", u"አማርኛ", u"", u""),
7
+    (u"am", u"አማርኛ", u"", u"Amharic"),
10
     (u"ar-SA", u"العربية", u"المملكة العربية السعودية", u"Arabic"),
8
     (u"ar-SA", u"العربية", u"المملكة العربية السعودية", u"Arabic"),
11
     (u"az", u"Azərbaycanca", u"", u"Azerbaijani"),
9
     (u"az", u"Azərbaycanca", u"", u"Azerbaijani"),
12
-    (u"ban", u"Balinese", u"", u""),
13
     (u"be", u"Беларуская", u"", u"Belarusian"),
10
     (u"be", u"Беларуская", u"", u"Belarusian"),
14
-    (u"bem", u"Ichibemba", u"", u""),
15
     (u"bg-BG", u"Български", u"България", u"Bulgarian"),
11
     (u"bg-BG", u"Български", u"България", u"Bulgarian"),
16
-    (u"bn", u"বাংলা", u"", u""),
17
-    (u"br", u"Brezhoneg", u"", u""),
18
-    (u"bs", u"Bosanski", u"", u""),
12
+    (u"bn", u"বাংলা", u"", u"Bengali"),
13
+    (u"br", u"Brezhoneg", u"", u"Breton"),
14
+    (u"bs", u"Bosnian", u"", u"Bosnian"),
19
     (u"ca", u"Català", u"", u"Catalan"),
15
     (u"ca", u"Català", u"", u"Catalan"),
20
     (u"ca-CT", u"Català", u"", u"Catalan"),
16
     (u"ca-CT", u"Català", u"", u"Catalan"),
21
     (u"ca-ES", u"Català", u"Espanya", u"Catalan"),
17
     (u"ca-ES", u"Català", u"Espanya", u"Catalan"),
22
     (u"ce", u"Нохчийн", u"", u"Chechen"),
18
     (u"ce", u"Нохчийн", u"", u"Chechen"),
23
     (u"ceb", u"Sinugboanong Binisaya", u"", u"Cebuano"),
19
     (u"ceb", u"Sinugboanong Binisaya", u"", u"Cebuano"),
24
-    (u"chr", u"ᏣᎳᎩ", u"", u""),
25
-    (u"ckb", u"Central Kurdish", u"", u""),
26
-    (u"co", u"Corsican", u"", u""),
27
-    (u"crs", u"Seychellois Creole", u"", u""),
28
     (u"cs-CZ", u"Čeština", u"Česko", u"Czech"),
20
     (u"cs-CZ", u"Čeština", u"Česko", u"Czech"),
29
-    (u"cy", u"Cymraeg", u"", u""),
21
+    (u"cy", u"Cymraeg", u"", u"Welsh"),
30
     (u"da-DK", u"Dansk", u"Danmark", u"Danish"),
22
     (u"da-DK", u"Dansk", u"Danmark", u"Danish"),
31
     (u"de", u"Deutsch", u"", u"German"),
23
     (u"de", u"Deutsch", u"", u"German"),
32
     (u"de-AT", u"Deutsch", u"Österreich", u"German"),
24
     (u"de-AT", u"Deutsch", u"Österreich", u"German"),
33
     (u"de-CH", u"Deutsch", u"Schweiz", u"German"),
25
     (u"de-CH", u"Deutsch", u"Schweiz", u"German"),
34
     (u"de-DE", u"Deutsch", u"Deutschland", u"German"),
26
     (u"de-DE", u"Deutsch", u"Deutschland", u"German"),
35
-    (u"ee", u"Eʋegbe", u"", u""),
36
     (u"el-GR", u"Ελληνικά", u"Ελλάδα", u"Greek"),
27
     (u"el-GR", u"Ελληνικά", u"Ελλάδα", u"Greek"),
37
     (u"en", u"English", u"", u"English"),
28
     (u"en", u"English", u"", u"English"),
38
     (u"en-AU", u"English", u"Australia", u"English"),
29
     (u"en-AU", u"English", u"Australia", u"English"),
60
     (u"eu", u"Euskara", u"", u"Basque"),
51
     (u"eu", u"Euskara", u"", u"Basque"),
61
     (u"fa", u"فارسی", u"", u"Persian"),
52
     (u"fa", u"فارسی", u"", u"Persian"),
62
     (u"fi-FI", u"Suomi", u"Suomi", u"Finnish"),
53
     (u"fi-FI", u"Suomi", u"Suomi", u"Finnish"),
63
-    (u"fo", u"Føroyskt", u"", u""),
64
     (u"fr", u"Français", u"", u"French"),
54
     (u"fr", u"Français", u"", u"French"),
65
     (u"fr-BE", u"Français", u"Belgique", u"French"),
55
     (u"fr-BE", u"Français", u"Belgique", u"French"),
66
     (u"fr-CA", u"Français", u"Canada", u"French"),
56
     (u"fr-CA", u"Français", u"Canada", u"French"),
67
     (u"fr-CH", u"Français", u"Suisse", u"French"),
57
     (u"fr-CH", u"Français", u"Suisse", u"French"),
68
     (u"fr-FR", u"Français", u"France", u"French"),
58
     (u"fr-FR", u"Français", u"France", u"French"),
69
-    (u"fy", u"West-Frysk", u"", u""),
70
-    (u"ga", u"Gaeilge", u"", u""),
71
-    (u"gaa", u"Ga", u"", u""),
72
-    (u"gd", u"Gàidhlig", u"", u""),
59
+    (u"ga", u"Gaeilge", u"", u"Irish"),
73
     (u"gl", u"Galego", u"", u"Galician"),
60
     (u"gl", u"Galego", u"", u"Galician"),
74
-    (u"gn", u"Guarani", u"", u""),
75
-    (u"gu", u"ગુજરાતી", u"", u""),
76
-    (u"ha", u"Hausa", u"", u""),
77
-    (u"haw", u"ʻŌlelo HawaiʻI", u"", u""),
61
+    (u"gu", u"ગુજરાતી", u"", u"Gujarati"),
78
     (u"he-IL", u"עברית", u"ישראל", u"Hebrew"),
62
     (u"he-IL", u"עברית", u"ישראל", u"Hebrew"),
79
     (u"hi", u"हिन्दी", u"", u"Hindi"),
63
     (u"hi", u"हिन्दी", u"", u"Hindi"),
80
     (u"hr-HR", u"Hrvatski", u"Hrvatska", u"Croatian"),
64
     (u"hr-HR", u"Hrvatski", u"Hrvatska", u"Croatian"),
81
-    (u"ht", u"Haitian Creole", u"", u""),
82
     (u"hu-HU", u"Magyar", u"Magyarország", u"Hungarian"),
65
     (u"hu-HU", u"Magyar", u"Magyarország", u"Hungarian"),
83
     (u"hy", u"Հայերեն", u"", u"Armenian"),
66
     (u"hy", u"Հայերեն", u"", u"Armenian"),
84
-    (u"ia", u"Interlingua", u"", u""),
85
     (u"id-ID", u"Bahasa Indonesia", u"Indonesia", u"Indonesian"),
67
     (u"id-ID", u"Bahasa Indonesia", u"Indonesia", u"Indonesian"),
86
-    (u"ig", u"Igbo", u"", u""),
87
     (u"is", u"Íslenska", u"", u""),
68
     (u"is", u"Íslenska", u"", u""),
88
     (u"it", u"Italiano", u"", u"Italian"),
69
     (u"it", u"Italiano", u"", u"Italian"),
89
     (u"it-CH", u"Italiano", u"Svizzera", u"Italian"),
70
     (u"it-CH", u"Italiano", u"Svizzera", u"Italian"),
91
     (u"iw", u"עברית", u"", u""),
72
     (u"iw", u"עברית", u"", u""),
92
     (u"ja-JP", u"日本語", u"日本", u"Japanese"),
73
     (u"ja-JP", u"日本語", u"日本", u"Japanese"),
93
     (u"ka", u"ქართული", u"", u"Georgian"),
74
     (u"ka", u"ქართული", u"", u"Georgian"),
94
-    (u"kg", u"Kongo", u"", u""),
95
     (u"kk", u"Қазақша", u"", u"Kazakh"),
75
     (u"kk", u"Қазақша", u"", u"Kazakh"),
96
-    (u"km", u"ខ្មែរ", u"", u""),
97
-    (u"kn", u"ಕನ್ನಡ", u"", u""),
76
+    (u"kn", u"ಕನ್ನಡ", u"", u"Kannada"),
98
     (u"ko-KR", u"한국어", u"대한민국", u"Korean"),
77
     (u"ko-KR", u"한국어", u"대한민국", u"Korean"),
99
-    (u"kri", u"Krio", u"", u""),
100
-    (u"ky", u"Кыргызча", u"", u""),
101
     (u"la", u"Latina", u"", u"Latin"),
78
     (u"la", u"Latina", u"", u"Latin"),
102
-    (u"lg", u"Luganda", u"", u""),
103
-    (u"ln", u"Lingála", u"", u""),
104
-    (u"lo", u"ລາວ", u"", u""),
105
-    (u"loz", u"Lozi", u"", u""),
106
     (u"lt-LT", u"Lietuvių", u"Lietuva", u"Lithuanian"),
79
     (u"lt-LT", u"Lietuvių", u"Lietuva", u"Lithuanian"),
107
-    (u"lua", u"Luba-Lulua", u"", u""),
108
     (u"lv-LV", u"Latviešu", u"Latvijas Republika", u""),
80
     (u"lv-LV", u"Latviešu", u"Latvijas Republika", u""),
109
-    (u"mfe", u"Kreol Morisien", u"", u""),
110
-    (u"mg", u"Malagasy", u"", u""),
111
-    (u"mi", u"Maori", u"", u""),
81
+    (u"mi", u"Reo Māori", u"", u"Maori"),
112
     (u"min", u"Minangkabau", u"", u"Minangkabau"),
82
     (u"min", u"Minangkabau", u"", u"Minangkabau"),
113
-    (u"mk", u"Македонски", u"", u""),
114
-    (u"ml", u"മലയാളം", u"", u""),
115
-    (u"mn", u"Монгол", u"", u""),
116
-    (u"mr", u"मराठी", u"", u""),
83
+    (u"mk", u"Македонски", u"", u"Macedonian"),
84
+    (u"mn", u"Монгол", u"", u"Mongolian"),
85
+    (u"mr", u"मराठी", u"", u"Marathi"),
117
     (u"ms-MY", u"Bahasa Melayu", u"Malaysia", u"Malay"),
86
     (u"ms-MY", u"Bahasa Melayu", u"Malaysia", u"Malay"),
118
-    (u"mt", u"Malti", u"", u""),
119
-    (u"my", u"ဗမာ", u"", u""),
87
+    (u"mt", u"Malti", u"", u"Maltese"),
120
     (u"nb-NO", u"Norwegian Bokmål", u"Norge", u"Norwegian Bokmål"),
88
     (u"nb-NO", u"Norwegian Bokmål", u"Norge", u"Norwegian Bokmål"),
121
-    (u"ne", u"नेपाली", u"", u""),
122
     (u"nl", u"Nederlands", u"", u"Dutch"),
89
     (u"nl", u"Nederlands", u"", u"Dutch"),
123
     (u"nl-BE", u"Nederlands", u"België", u"Dutch"),
90
     (u"nl-BE", u"Nederlands", u"België", u"Dutch"),
124
     (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
91
     (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
125
     (u"nn", u"Nynorsk", u"", u"Norwegian"),
92
     (u"nn", u"Nynorsk", u"", u"Norwegian"),
126
     (u"no-NO", u"Norsk", u"Norge", u"Norwegian"),
93
     (u"no-NO", u"Norsk", u"Norge", u"Norwegian"),
127
-    (u"nso", u"Northern Sotho", u"", u""),
128
-    (u"ny", u"Nyanja", u"", u""),
129
-    (u"nyn", u"Runyankore", u"", u""),
130
-    (u"oc", u"Occitan", u"", u""),
131
-    (u"om", u"Oromoo", u"", u""),
132
-    (u"or", u"ଓଡ଼ିଆ", u"", u""),
133
-    (u"pa", u"ਪੰਜਾਬੀ", u"", u""),
134
-    (u"pcm", u"Nigerian Pidgin", u"", u""),
94
+    (u"oc", u"Occitan", u"", u"Occitan"),
95
+    (u"or", u"Oriya", u"", u"Oriya"),
96
+    (u"pa", u"ਪੰਜਾਬੀ", u"", u"Panjabi"),
135
     (u"pl-PL", u"Polski", u"Rzeczpospolita Polska", u"Polish"),
97
     (u"pl-PL", u"Polski", u"Rzeczpospolita Polska", u"Polish"),
136
-    (u"ps", u"پښتو", u"", u""),
98
+    (u"ps", u"Pushto", u"", u"Pushto"),
137
     (u"pt", u"Português", u"", u"Portuguese"),
99
     (u"pt", u"Português", u"", u"Portuguese"),
138
     (u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
100
     (u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
139
     (u"pt-PT", u"Português", u"Portugal", u"Portuguese"),
101
     (u"pt-PT", u"Português", u"Portugal", u"Portuguese"),
140
-    (u"qu", u"Runasimi", u"", u""),
141
-    (u"rm", u"Rumantsch", u"", u""),
142
-    (u"rn", u"Ikirundi", u"", u""),
143
     (u"ro-RO", u"Română", u"România", u"Romanian"),
102
     (u"ro-RO", u"Română", u"România", u"Romanian"),
144
     (u"ru-RU", u"Русский", u"Россия", u"Russian"),
103
     (u"ru-RU", u"Русский", u"Россия", u"Russian"),
145
-    (u"rw", u"Kinyarwanda", u"", u""),
146
-    (u"sd", u"Sindhi", u"", u""),
104
+    (u"rw", u"Ikinyarwanda", u"", u"Kinyarwanda"),
147
     (u"sh", u"Srpskohrvatski / Српскохрватски", u"", u"Serbo-Croatian"),
105
     (u"sh", u"Srpskohrvatski / Српскохрватски", u"", u"Serbo-Croatian"),
148
-    (u"si", u"සිංහල", u"", u""),
149
     (u"sk-SK", u"Slovenčina", u"Slovenská republika", u"Slovak"),
106
     (u"sk-SK", u"Slovenčina", u"Slovenská republika", u"Slovak"),
150
     (u"sl", u"Slovenščina", u"", u"Slovenian"),
107
     (u"sl", u"Slovenščina", u"", u"Slovenian"),
151
-    (u"sn", u"Chishona", u"", u""),
152
-    (u"so", u"Soomaali", u"", u""),
153
-    (u"sq", u"Shqip", u"", u""),
154
     (u"sr", u"Српски / Srpski", u"", u"Serbian"),
108
     (u"sr", u"Српски / Srpski", u"", u"Serbian"),
155
-    (u"st", u"Southern Sotho", u"", u""),
156
-    (u"su", u"Sundanese", u"", u""),
157
     (u"sv-SE", u"Svenska", u"Sverige", u"Swedish"),
109
     (u"sv-SE", u"Svenska", u"Sverige", u"Swedish"),
158
     (u"sw", u"Kiswahili", u"", u""),
110
     (u"sw", u"Kiswahili", u"", u""),
159
-    (u"ta", u"தமிழ்", u"", u""),
160
-    (u"te", u"తెలుగు", u"", u""),
161
-    (u"tg", u"Tajik", u"", u""),
111
+    (u"ta", u"தமிழ்", u"", u"Tamil"),
162
     (u"th-TH", u"ไทย", u"ไทย", u"Thai"),
112
     (u"th-TH", u"ไทย", u"ไทย", u"Thai"),
163
-    (u"ti", u"ትግርኛ", u"", u""),
164
-    (u"tk", u"Turkmen", u"", u""),
113
+    (u"ti", u"ትግርኛ", u"", u"Tigrinya"),
165
     (u"tl-PH", u"Filipino", u"Pilipinas", u""),
114
     (u"tl-PH", u"Filipino", u"Pilipinas", u""),
166
-    (u"tlh", u"Klingon", u"", u""),
167
-    (u"tn", u"Tswana", u"", u""),
168
-    (u"to", u"Lea Fakatonga", u"", u""),
169
     (u"tr-TR", u"Türkçe", u"Türkiye", u"Turkish"),
115
     (u"tr-TR", u"Türkçe", u"Türkiye", u"Turkish"),
170
-    (u"tt", u"Tatar", u"", u""),
171
-    (u"tum", u"Tumbuka", u"", u""),
172
-    (u"tw", u"Twi", u"", u""),
173
-    (u"ug", u"ئۇيغۇرچە", u"", u""),
116
+    (u"tt", u"Татарча", u"", u"Tatar"),
174
     (u"uk-UA", u"Українська", u"Україна", u"Ukrainian"),
117
     (u"uk-UA", u"Українська", u"Україна", u"Ukrainian"),
175
     (u"ur", u"اردو", u"", u"Urdu"),
118
     (u"ur", u"اردو", u"", u"Urdu"),
176
     (u"uz", u"O‘zbek", u"", u"Uzbek"),
119
     (u"uz", u"O‘zbek", u"", u"Uzbek"),
179
     (u"vo", u"Volapük", u"", u"Volapük"),
122
     (u"vo", u"Volapük", u"", u"Volapük"),
180
     (u"wa", u"Walon", u"", u"Walloon"),
123
     (u"wa", u"Walon", u"", u"Walloon"),
181
     (u"war", u"Winaray", u"", u"Waray-Waray"),
124
     (u"war", u"Winaray", u"", u"Waray-Waray"),
182
-    (u"wo", u"Wolof", u"", u""),
183
-    (u"xh", u"Xhosa", u"", u""),
184
-    (u"yi", u"ייִדיש", u"", u""),
185
-    (u"yo", u"Èdè Yorùbá", u"", u""),
125
+    (u"xh", u"Xhosa", u"", u"Xhosa"),
186
     (u"zh", u"中文", u"", u"Chinese"),
126
     (u"zh", u"中文", u"", u"Chinese"),
187
-    (u"zh-CN", u"中文", u"中国", u"Chinese"),
127
+    (u"zh-CN", u"中文", u"中国", u""),
188
     (u"zh-HK", u"中文", u"香港", u"Chinese"),
128
     (u"zh-HK", u"中文", u"香港", u"Chinese"),
189
-    (u"zh-TW", u"中文", u"台湾", u"Chinese"),
190
-    (u"zu", u"Isizulu", u"", u"")
129
+    (u"zh-TW", u"中文", u"台湾", u""),
130
+    (u"zu", u"Isi-Zulu", u"", u"Zulu")
191
 )
131
 )

+ 32
- 0
tests/unit/engines/test_bing.py 查看文件

86
         self.assertEqual(results[0]['title'], 'This should be the title')
86
         self.assertEqual(results[0]['title'], 'This should be the title')
87
         self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
87
         self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
88
         self.assertEqual(results[0]['content'], 'This should be the content.')
88
         self.assertEqual(results[0]['content'], 'This should be the content.')
89
+
90
+    def test_fetch_supported_languages(self):
91
+        html = """<html></html>"""
92
+        response = mock.Mock(text=html)
93
+        results = bing._fetch_supported_languages(response)
94
+        self.assertEqual(type(results), list)
95
+        self.assertEqual(len(results), 0)
96
+
97
+        html = """
98
+        <html>
99
+            <body>
100
+                <form>
101
+                    <div id="limit-languages">
102
+                        <div>
103
+                            <div><input id="es" value="es"></input></div>
104
+                        </div>
105
+                        <div>
106
+                            <div><input id="pt_BR" value="pt_BR"></input></div>
107
+                            <div><input id="pt_PT" value="pt_PT"></input></div>
108
+                        </div>
109
+                    </div>
110
+                </form>
111
+            </body>
112
+        </html>
113
+        """
114
+        response = mock.Mock(text=html)
115
+        languages = bing._fetch_supported_languages(response)
116
+        self.assertEqual(type(languages), list)
117
+        self.assertEqual(len(languages), 3)
118
+        self.assertIn('es', languages)
119
+        self.assertIn('pt-BR', languages)
120
+        self.assertIn('pt-PT', languages)

+ 37
- 0
tests/unit/engines/test_dailymotion.py 查看文件

1
+# -*- coding: utf-8 -*-
1
 from collections import defaultdict
2
 from collections import defaultdict
2
 import mock
3
 import mock
3
 from searx.engines import dailymotion
4
 from searx.engines import dailymotion
72
         results = dailymotion.response(response)
73
         results = dailymotion.response(response)
73
         self.assertEqual(type(results), list)
74
         self.assertEqual(type(results), list)
74
         self.assertEqual(len(results), 0)
75
         self.assertEqual(len(results), 0)
76
+
77
+    def test_fetch_supported_languages(self):
78
+        json = r"""
79
+        {"list":[{"code":"af","name":"Afrikaans","native_name":"Afrikaans",
80
+                  "localized_name":"Afrikaans","display_name":"Afrikaans"},
81
+                 {"code":"ar","name":"Arabic","native_name":"\u0627\u0644\u0639\u0631\u0628\u064a\u0629",
82
+                  "localized_name":"Arabic","display_name":"Arabic"},
83
+                 {"code":"la","name":"Latin","native_name":null,
84
+                  "localized_name":"Latin","display_name":"Latin"}
85
+        ]}
86
+        """
87
+        response = mock.Mock(text=json)
88
+        languages = dailymotion._fetch_supported_languages(response)
89
+        self.assertEqual(type(languages), dict)
90
+        self.assertEqual(len(languages), 3)
91
+        self.assertIn('af', languages)
92
+        self.assertIn('ar', languages)
93
+        self.assertIn('la', languages)
94
+
95
+        self.assertEqual(type(languages['af']), dict)
96
+        self.assertEqual(type(languages['ar']), dict)
97
+        self.assertEqual(type(languages['la']), dict)
98
+
99
+        self.assertIn('name', languages['af'])
100
+        self.assertIn('name', languages['ar'])
101
+        self.assertNotIn('name', languages['la'])
102
+
103
+        self.assertIn('english_name', languages['af'])
104
+        self.assertIn('english_name', languages['ar'])
105
+        self.assertIn('english_name', languages['la'])
106
+
107
+        self.assertEqual(languages['af']['name'], 'Afrikaans')
108
+        self.assertEqual(languages['af']['english_name'], 'Afrikaans')
109
+        self.assertEqual(languages['ar']['name'], u'العربية')
110
+        self.assertEqual(languages['ar']['english_name'], 'Arabic')
111
+        self.assertEqual(languages['la']['english_name'], 'Latin')

+ 14
- 0
tests/unit/engines/test_duckduckgo.py 查看文件

84
         self.assertEqual(results[0]['title'], 'This is the title')
84
         self.assertEqual(results[0]['title'], 'This is the title')
85
         self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű')
85
         self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű')
86
         self.assertEqual(results[0]['content'], 'This should be the content.')
86
         self.assertEqual(results[0]['content'], 'This should be the content.')
87
+
88
+    def test_fetch_supported_languages(self):
89
+        js = """some code...regions:{
90
+        "wt-wt":"All Results","ar-es":"Argentina","au-en":"Australia","at-de":"Austria","be-fr":"Belgium (fr)"
91
+        }some more code..."""
92
+        response = mock.Mock(text=js)
93
+        languages = duckduckgo._fetch_supported_languages(response)
94
+        self.assertEqual(type(languages), list)
95
+        self.assertEqual(len(languages), 5)
96
+        self.assertIn('wt-WT', languages)
97
+        self.assertIn('es-AR', languages)
98
+        self.assertIn('en-AU', languages)
99
+        self.assertIn('de-AT', languages)
100
+        self.assertIn('fr-BE', languages)

+ 25
- 0
tests/unit/engines/test_gigablast.py 查看文件

89
         self.assertEqual(results[0]['title'], 'South by Southwest 2016')
89
         self.assertEqual(results[0]['title'], 'South by Southwest 2016')
90
         self.assertEqual(results[0]['url'], 'www.sxsw.com')
90
         self.assertEqual(results[0]['url'], 'www.sxsw.com')
91
         self.assertEqual(results[0]['content'], 'This should be the content.')
91
         self.assertEqual(results[0]['content'], 'This should be the content.')
92
+
93
+    def test_fetch_supported_languages(self):
94
+        html = """<html></html>"""
95
+        response = mock.Mock(text=html)
96
+        results = gigablast._fetch_supported_languages(response)
97
+        self.assertEqual(type(results), list)
98
+        self.assertEqual(len(results), 0)
99
+
100
+        html = """
101
+        <html>
102
+            <body>
103
+                <span id="menu2">
104
+                    <a href="/search?&rxikd=1&qlang=xx"></a>
105
+                    <a href="/search?&rxikd=1&qlang=en"></a>
106
+                    <a href="/search?&rxikd=1&qlang=fr"></a>
107
+                </span>
108
+            </body>
109
+        </html>
110
+        """
111
+        response = mock.Mock(text=html)
112
+        languages = gigablast._fetch_supported_languages(response)
113
+        self.assertEqual(type(languages), list)
114
+        self.assertEqual(len(languages), 2)
115
+        self.assertIn('en', languages)
116
+        self.assertIn('fr', languages)

+ 57
- 0
tests/unit/engines/test_google.py 查看文件

177
         self.assertEqual(results[0]['title'], '')
177
         self.assertEqual(results[0]['title'], '')
178
         self.assertEqual(results[0]['content'], '')
178
         self.assertEqual(results[0]['content'], '')
179
         self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg')
179
         self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg')
180
+
181
+    def test_fetch_supported_languages(self):
182
+        html = """<html></html>"""
183
+        response = mock.Mock(text=html)
184
+        languages = google._fetch_supported_languages(response)
185
+        self.assertEqual(type(languages), dict)
186
+        self.assertEqual(len(languages), 0)
187
+
188
+        html = u"""
189
+        <html>
190
+            <body>
191
+                <table>
192
+                    <tbody>
193
+                        <tr>
194
+                            <td>
195
+                                <font>
196
+                                    <label>
197
+                                        <span id="ten">English</span>
198
+                                    </label>
199
+                                </font>
200
+                            </td>
201
+                            <td>
202
+                                <font>
203
+                                    <label>
204
+                                        <span id="tzh-CN">中文 (简体)</span>
205
+                                    </label>
206
+                                    <label>
207
+                                        <span id="tzh-TW">中文 (繁體)</span>
208
+                                    </label>
209
+                                </font>
210
+                            </td>
211
+                        </tr>
212
+                    </tbody>
213
+                </table>
214
+            </body>
215
+        </html>
216
+        """
217
+        response = mock.Mock(text=html)
218
+        languages = google._fetch_supported_languages(response)
219
+        self.assertEqual(type(languages), dict)
220
+        self.assertEqual(len(languages), 3)
221
+
222
+        self.assertIn('en', languages)
223
+        self.assertIn('zh-CN', languages)
224
+        self.assertIn('zh-TW', languages)
225
+
226
+        self.assertEquals(type(languages['en']), dict)
227
+        self.assertEquals(type(languages['zh-CN']), dict)
228
+        self.assertEquals(type(languages['zh-TW']), dict)
229
+
230
+        self.assertIn('name', languages['en'])
231
+        self.assertIn('name', languages['zh-CN'])
232
+        self.assertIn('name', languages['zh-TW'])
233
+
234
+        self.assertEquals(languages['en']['name'], 'English')
235
+        self.assertEquals(languages['zh-CN']['name'], u'中文 (简体)')
236
+        self.assertEquals(languages['zh-TW']['name'], u'中文 (繁體)')

+ 27
- 0
tests/unit/engines/test_swisscows.py 查看文件

126
         self.assertEqual(results[2]['url'], 'http://de.wikipedia.org/wiki/Datei:This should.svg')
126
         self.assertEqual(results[2]['url'], 'http://de.wikipedia.org/wiki/Datei:This should.svg')
127
         self.assertEqual(results[2]['img_src'], 'http://ts2.mm.This/should.png')
127
         self.assertEqual(results[2]['img_src'], 'http://ts2.mm.This/should.png')
128
         self.assertEqual(results[2]['template'], 'images.html')
128
         self.assertEqual(results[2]['template'], 'images.html')
129
+
130
+    def test_fetch_supported_languages(self):
131
+        html = """<html></html>"""
132
+        response = mock.Mock(text=html)
133
+        languages = swisscows._fetch_supported_languages(response)
134
+        self.assertEqual(type(languages), list)
135
+        self.assertEqual(len(languages), 0)
136
+
137
+        html = """
138
+        <html>
139
+            <div id="regions-popup">
140
+                <div>
141
+                    <ul>
142
+                        <li><a data-val="browser"></a></li>
143
+                        <li><a data-val="de-CH"></a></li>
144
+                        <li><a data-val="fr-CH"></a></li>
145
+                    </ul>
146
+                </div>
147
+            </div>
148
+        </html>
149
+        """
150
+        response = mock.Mock(text=html)
151
+        languages = swisscows._fetch_supported_languages(response)
152
+        self.assertEqual(type(languages), list)
153
+        self.assertEqual(len(languages), 3)
154
+        self.assertIn('de-CH', languages)
155
+        self.assertIn('fr-CH', languages)

+ 93
- 0
tests/unit/engines/test_wikipedia.py 查看文件

164
         self.assertEqual(len(results), 2)
164
         self.assertEqual(len(results), 2)
165
         self.assertEqual(results[1]['infobox'], u'披頭四樂隊')
165
         self.assertEqual(results[1]['infobox'], u'披頭四樂隊')
166
         self.assertIn(u'披头士乐队...', results[1]['content'])
166
         self.assertIn(u'披头士乐队...', results[1]['content'])
167
+
168
+    def test_fetch_supported_languages(self):
169
+        html = u"""<html></html>"""
170
+        response = mock.Mock(text=html)
171
+        languages = wikipedia._fetch_supported_languages(response)
172
+        self.assertEqual(type(languages), dict)
173
+        self.assertEqual(len(languages), 0)
174
+
175
+        html = u"""
176
+        <html>
177
+            <body>
178
+                <div>
179
+                    <div>
180
+                        <h3>Table header</h3>
181
+                        <table class="sortable jquery-tablesorter">
182
+                            <thead>
183
+                                <tr>
184
+                                    <th>N</th>
185
+                                    <th>Language</th>
186
+                                    <th>Language (local)</th>
187
+                                    <th>Wiki</th>
188
+                                    <th>Articles</th>
189
+                                </tr>
190
+                            </thead>
191
+                            <tbody>
192
+                                <tr>
193
+                                    <td>2</td>
194
+                                    <td><a>Swedish</a></td>
195
+                                    <td><a>Svenska</a></td>
196
+                                    <td><a>sv</a></td>
197
+                                    <td><a><b>3000000</b></a></td>
198
+                                </tr>
199
+                                <tr>
200
+                                    <td>3</td>
201
+                                    <td><a>Cebuano</a></td>
202
+                                    <td><a>Sinugboanong Binisaya</a></td>
203
+                                    <td><a>ceb</a></td>
204
+                                    <td><a><b>3000000</b></a></td>
205
+                                </tr>
206
+                            </tbody>
207
+                        </table>
208
+                        <h3>Table header</h3>
209
+                        <table class="sortable jquery-tablesorter">
210
+                            <thead>
211
+                                <tr>
212
+                                    <th>N</th>
213
+                                    <th>Language</th>
214
+                                    <th>Language (local)</th>
215
+                                    <th>Wiki</th>
216
+                                    <th>Articles</th>
217
+                                </tr>
218
+                            </thead>
219
+                            <tbody>
220
+                                <tr>
221
+                                    <td>2</td>
222
+                                    <td><a>Norwegian (Bokmål)</a></td>
223
+                                    <td><a>Norsk (Bokmål)</a></td>
224
+                                    <td><a>no</a></td>
225
+                                    <td><a><b>100000</b></a></td>
226
+                                </tr>
227
+                            </tbody>
228
+                        </table>
229
+                    </div>
230
+                </div>
231
+            </body>
232
+        </html>
233
+        """
234
+        response = mock.Mock(text=html)
235
+        languages = wikipedia._fetch_supported_languages(response)
236
+        self.assertEqual(type(languages), dict)
237
+        self.assertEqual(len(languages), 3)
238
+
239
+        self.assertIn('sv', languages)
240
+        self.assertIn('ceb', languages)
241
+        self.assertIn('no', languages)
242
+
243
+        self.assertEqual(type(languages['sv']), dict)
244
+        self.assertEqual(type(languages['ceb']), dict)
245
+        self.assertEqual(type(languages['no']), dict)
246
+
247
+        self.assertIn('name', languages['sv'])
248
+        self.assertIn('english_name', languages['sv'])
249
+        self.assertIn('articles', languages['sv'])
250
+
251
+        self.assertEqual(languages['sv']['name'], 'Svenska')
252
+        self.assertEqual(languages['sv']['english_name'], 'Swedish')
253
+        self.assertEqual(languages['sv']['articles'], 3000000)
254
+        self.assertEqual(languages['ceb']['name'], 'Sinugboanong Binisaya')
255
+        self.assertEqual(languages['ceb']['english_name'], 'Cebuano')
256
+        self.assertEqual(languages['ceb']['articles'], 3000000)
257
+        self.assertEqual(languages['no']['name'], u'Norsk (Bokmål)')
258
+        self.assertEqual(languages['no']['english_name'], u'Norwegian (Bokmål)')
259
+        self.assertEqual(languages['no']['articles'], 100000)

+ 30
- 0
tests/unit/engines/test_yahoo.py 查看文件

147
         results = yahoo.response(response)
147
         results = yahoo.response(response)
148
         self.assertEqual(type(results), list)
148
         self.assertEqual(type(results), list)
149
         self.assertEqual(len(results), 0)
149
         self.assertEqual(len(results), 0)
150
+
151
+    def test_fetch_supported_languages(self):
152
+        html = """<html></html>"""
153
+        response = mock.Mock(text=html)
154
+        results = yahoo._fetch_supported_languages(response)
155
+        self.assertEqual(type(results), list)
156
+        self.assertEqual(len(results), 0)
157
+
158
+        html = """
159
+        <html>
160
+            <div>
161
+                <div id="yschlang">
162
+                    <span>
163
+                        <label><input value="lang_ar"></input></label>
164
+                    </span>
165
+                    <span>
166
+                        <label><input value="lang_zh_chs"></input></label>
167
+                        <label><input value="lang_zh_cht"></input></label>
168
+                    </span>
169
+                </div>
170
+            </div>
171
+        </html>
172
+        """
173
+        response = mock.Mock(text=html)
174
+        languages = yahoo._fetch_supported_languages(response)
175
+        self.assertEqual(type(languages), list)
176
+        self.assertEqual(len(languages), 3)
177
+        self.assertIn('ar', languages)
178
+        self.assertIn('zh-chs', languages)
179
+        self.assertIn('zh-cht', languages)

+ 8
- 4
utils/fetch_languages.py 查看文件

84
 
84
 
85
     # write json file
85
     # write json file
86
     f = io.open(engines_languages_file, "w", encoding="utf-8")
86
     f = io.open(engines_languages_file, "w", encoding="utf-8")
87
-    f.write(unicode(dumps(engines_languages, indent=4, ensure_ascii=False, encoding="utf-8")))
87
+    f.write(unicode(dumps(engines_languages, ensure_ascii=False, encoding="utf-8")))
88
     f.close()
88
     f.close()
89
 
89
 
90
 
90
 
110
                 else:
110
                 else:
111
                     languages[locale] = {}
111
                     languages[locale] = {}
112
 
112
 
113
-    # get locales that have no name yet
113
+    # get locales that have no name or country yet
114
     for locale in languages.keys():
114
     for locale in languages.keys():
115
         if not languages[locale].get('name'):
115
         if not languages[locale].get('name'):
116
-            # try to get language and country names
116
+            # try to get language names
117
             name = languages.get(locale.split('-')[0], {}).get('name', None)
117
             name = languages.get(locale.split('-')[0], {}).get('name', None)
118
             if name:
118
             if name:
119
                 languages[locale]['name'] = name
119
                 languages[locale]['name'] = name
120
-                languages[locale]['country'] = get_country_name(locale) or ''
121
                 languages[locale]['english_name'] = languages.get(locale.split('-')[0], {}).get('english_name', '')
120
                 languages[locale]['english_name'] = languages.get(locale.split('-')[0], {}).get('english_name', '')
122
             else:
121
             else:
123
                 # filter out locales with no name
122
                 # filter out locales with no name
124
                 del languages[locale]
123
                 del languages[locale]
124
+                continue
125
+
126
+        # try to get country name
127
+        if locale.find('-') > 0 and not languages[locale].get('country'):
128
+            languages[locale]['country'] = get_country_name(locale) or ''
125
 
129
 
126
 
130
 
127
 # Remove countryless language if language is featured in only one country.
131
 # Remove countryless language if language is featured in only one country.