瀏覽代碼

add language support for qwant

closes issue #863
marc 8 年之前
父節點
當前提交
805fb02ed1
共有 5 個文件被更改,包括 54 次插入9 次删除
  1. 1
    1
      searx/data/engines_languages.json
  2. 25
    1
      searx/engines/qwant.py
  3. 3
    5
      searx/languages.py
  4. 21
    0
      tests/unit/engines/test_qwant.py
  5. 4
    2
      utils/fetch_languages.py

+ 1
- 1
searx/data/engines_languages.json
文件差異過大導致無法顯示
查看文件


+ 25
- 1
searx/engines/qwant.py 查看文件

@@ -20,6 +20,7 @@ from searx.utils import html_to_text
20 20
 categories = None
21 21
 paging = True
22 22
 language_support = True
23
+supported_languages_url = 'https://qwant.com/region'
23 24
 
24 25
 category_to_keyword = {'general': 'web',
25 26
                        'images': 'images',
@@ -46,6 +47,13 @@ def request(query, params):
46 47
 
47 48
     # add language tag if specified
48 49
     if params['language'] != 'all':
50
+        if params['language'].find('-') < 0:
51
+            # tries to get a country code from language
52
+            for lang in supported_languages:
53
+                lc = lang.split('-')
54
+                if params['language'] == lc[0]:
55
+                    params['language'] = lang
56
+                    break
49 57
         params['url'] += '&locale=' + params['language'].replace('-', '_').lower()
50 58
 
51 59
     return params
@@ -96,5 +104,21 @@ def response(resp):
96 104
                             'publishedDate': published_date,
97 105
                             'content': content})
98 106
 
99
-    # return results
100 107
     return results
108
+
109
+
110
+# get supported languages from their site
111
+def _fetch_supported_languages(resp):
112
+    # list of regions is embedded in page as a js object
113
+    response_text = resp.text
114
+    response_text = response_text[response_text.find('regionalisation'):]
115
+    response_text = response_text[response_text.find('{'):response_text.find(');')]
116
+
117
+    regions_json = loads(response_text)
118
+
119
+    supported_languages = []
120
+    for lang in regions_json['languages'].values():
121
+        for country in lang['countries']:
122
+            supported_languages.append(lang['code'] + '-' + country)
123
+
124
+    return supported_languages

+ 3
- 5
searx/languages.py 查看文件

@@ -5,9 +5,6 @@
5 5
 language_codes = (
6 6
     (u"ar-SA", u"العربية", u"", u"Arabic"),
7 7
     (u"bg-BG", u"Български", u"", u"Bulgarian"),
8
-    (u"ca", u"Català", u"", u"Catalan"),
9
-    (u"ca-CT", u"Català", u"", u"Catalan"),
10
-    (u"ca-ES", u"Català", u"Espanya", u"Catalan"),
11 8
     (u"cs-CZ", u"Čeština", u"", u"Czech"),
12 9
     (u"da-DK", u"Dansk", u"", u"Danish"),
13 10
     (u"de", u"Deutsch", u"", u"German"),
@@ -18,7 +15,9 @@ language_codes = (
18 15
     (u"en", u"English", u"", u"English"),
19 16
     (u"en-AU", u"English", u"Australia", u"English"),
20 17
     (u"en-CA", u"English", u"Canada", u"English"),
18
+    (u"en-CY", u"English", u"Cyprus", u"English"),
21 19
     (u"en-GB", u"English", u"United Kingdom", u"English"),
20
+    (u"en-GD", u"English", u"Grenada", u"English"),
22 21
     (u"en-ID", u"English", u"Indonesia", u"English"),
23 22
     (u"en-IE", u"English", u"Ireland", u"English"),
24 23
     (u"en-IN", u"English", u"India", u"English"),
@@ -54,10 +53,10 @@ language_codes = (
54 53
     (u"ko-KR", u"한국어", u"", u"Korean"),
55 54
     (u"lt-LT", u"Lietuvių", u"", u"Lithuanian"),
56 55
     (u"lv-LV", u"Latviešu", u"", u"Latvian"),
56
+    (u"ms-MY", u"Bahasa Melayu", u"", u"Malay"),
57 57
     (u"nl", u"Nederlands", u"", u"Dutch"),
58 58
     (u"nl-BE", u"Nederlands", u"België", u"Dutch"),
59 59
     (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
60
-    (u"no-NO", u"Norsk", u"", u"Norwegian"),
61 60
     (u"pl-PL", u"Polski", u"", u"Polish"),
62 61
     (u"pt", u"Português", u"", u"Portuguese"),
63 62
     (u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
@@ -69,7 +68,6 @@ language_codes = (
69 68
     (u"sv-SE", u"Svenska", u"", u"Swedish"),
70 69
     (u"th-TH", u"ไทย", u"", u"Thai"),
71 70
     (u"tr-TR", u"Türkçe", u"", u"Turkish"),
72
-    (u"uk-UA", u"Українська", u"", u"Ukrainian"),
73 71
     (u"vi-VN", u"Tiếng Việt", u"", u"Vietnamese"),
74 72
     (u"zh", u"中文", u"", u"Chinese"),
75 73
     (u"zh-CN", u"中文", u"中国", u"Chinese"),

+ 21
- 0
tests/unit/engines/test_qwant.py 查看文件

@@ -25,6 +25,11 @@ class TestQwantEngine(SearxTestCase):
25 25
         self.assertFalse('fr' in params['url'])
26 26
         self.assertIn('news', params['url'])
27 27
 
28
+        qwant.supported_languages = ['en', 'fr-FR', 'fr-CA']
29
+        dicto['language'] = 'fr'
30
+        params = qwant.request(query, dicto)
31
+        self.assertIn('fr_fr', params['url'])
32
+
28 33
     def test_response(self):
29 34
         self.assertRaises(AttributeError, qwant.response, None)
30 35
         self.assertRaises(AttributeError, qwant.response, [])
@@ -315,3 +320,19 @@ class TestQwantEngine(SearxTestCase):
315 320
         results = qwant.response(response)
316 321
         self.assertEqual(type(results), list)
317 322
         self.assertEqual(len(results), 0)
323
+
324
+    def test_fetch_supported_languages(self):
325
+        page = """some code...
326
+        config_set('project.regionalisation', {"continents":{},"languages":
327
+        {"de":{"code":"de","name":"Deutsch","countries":["DE","CH","AT"]},
328
+        "it":{"code":"it","name":"Italiano","countries":["IT","CH"]}}});
329
+        some more code..."""
330
+        response = mock.Mock(text=page)
331
+        languages = qwant._fetch_supported_languages(response)
332
+        self.assertEqual(type(languages), list)
333
+        self.assertEqual(len(languages), 5)
334
+        self.assertIn('de-DE', languages)
335
+        self.assertIn('de-CH', languages)
336
+        self.assertIn('de-AT', languages)
337
+        self.assertIn('it-IT', languages)
338
+        self.assertIn('it-CH', languages)

+ 4
- 2
utils/fetch_languages.py 查看文件

@@ -14,7 +14,8 @@ from json import loads, dumps
14 14
 import io
15 15
 from sys import path
16 16
 path.append('../searx')  # noqa
17
-from searx.engines import engines
17
+from searx import settings
18
+from searx.engines import initialize_engines, engines
18 19
 
19 20
 # Geonames API for country names.
20 21
 geonames_user = ''  # ADD USER NAME HERE
@@ -77,6 +78,7 @@ def get_country_name(locale):
77 78
 
78 79
 # Fetchs supported languages for each engine and writes json file with those.
79 80
 def fetch_supported_languages():
81
+    initialize_engines(settings['engines'])
80 82
     for engine_name in engines:
81 83
         if hasattr(engines[engine_name], 'fetch_supported_languages'):
82 84
             try:
@@ -117,7 +119,7 @@ def join_language_lists():
117 119
                     languages[lang]['counter'].append(engine_name)
118 120
 
119 121
     # filter list to include only languages supported by most engines
120
-    min_supported_engines = int(0.75 * len(engines_languages))
122
+    min_supported_engines = int(0.70 * len(engines_languages))
121 123
     languages = {code: lang for code, lang
122 124
                  in languages.iteritems()
123 125
                  if len(lang.get('counter', [])) >= min_supported_engines or