Parcourir la source

[enh] search language support init

asciimoo il y a 11 ans
Parent
révision
2a788c8f29

+ 11
- 1
searx/engines/__init__.py Voir le fichier

53
 for engine_data in settings['engines']:
53
 for engine_data in settings['engines']:
54
     engine_name = engine_data['engine']
54
     engine_name = engine_data['engine']
55
     engine = load_module(engine_name + '.py')
55
     engine = load_module(engine_name + '.py')
56
+
56
     if not hasattr(engine, 'paging'):
57
     if not hasattr(engine, 'paging'):
57
         engine.paging = False
58
         engine.paging = False
59
+
60
+    if not hasattr(engine, 'language_support'):
61
+        #engine.language_support = False
62
+        engine.language_support = True
63
+
58
     for param_name in engine_data:
64
     for param_name in engine_data:
59
         if param_name == 'engine':
65
         if param_name == 'engine':
60
             continue
66
             continue
158
     return sorted(results, key=itemgetter('score'), reverse=True)
164
     return sorted(results, key=itemgetter('score'), reverse=True)
159
 
165
 
160
 
166
 
161
-def search(query, request, selected_engines, pageno=1):
167
+def search(query, request, selected_engines, pageno=1, lang='all'):
162
     global engines, categories, number_of_searches
168
     global engines, categories, number_of_searches
163
     requests = []
169
     requests = []
164
     results = {}
170
     results = {}
176
         if pageno > 1 and not engine.paging:
182
         if pageno > 1 and not engine.paging:
177
             continue
183
             continue
178
 
184
 
185
+        if lang != 'all' and not engine.language_support:
186
+            continue
187
+
179
         request_params = default_request_params()
188
         request_params = default_request_params()
180
         request_params['headers']['User-Agent'] = user_agent
189
         request_params['headers']['User-Agent'] = user_agent
181
         request_params['category'] = selected_engine['category']
190
         request_params['category'] = selected_engine['category']
182
         request_params['started'] = datetime.now()
191
         request_params['started'] = datetime.now()
183
         request_params['pageno'] = pageno
192
         request_params['pageno'] = pageno
193
+        request_params['language'] = lang
184
         request_params = engine.request(query, request_params)
194
         request_params = engine.request(query, request_params)
185
 
195
 
186
         callback = make_callback(
196
         callback = make_callback(

+ 9
- 3
searx/engines/bing.py Voir le fichier

4
 
4
 
5
 base_url = 'http://www.bing.com/'
5
 base_url = 'http://www.bing.com/'
6
 search_string = 'search?{query}&first={offset}'
6
 search_string = 'search?{query}&first={offset}'
7
-locale = 'en-US'  # see http://msdn.microsoft.com/en-us/library/dd251064.aspx
8
-
9
 paging = True
7
 paging = True
8
+language_support = True
10
 
9
 
11
 
10
 
12
 def request(query, params):
11
 def request(query, params):
13
     offset = (params['pageno'] - 1) * 10 + 1
12
     offset = (params['pageno'] - 1) * 10 + 1
13
+    if params['language'] == 'all':
14
+        language = 'en-US'
15
+    else:
16
+        language = params['language'].replace('_', '-')
14
     search_path = search_string.format(
17
     search_path = search_string.format(
15
-        query=urlencode({'q': query, 'setmkt': locale}),
18
+        query=urlencode({'q': query, 'setmkt': language}),
16
         offset=offset)
19
         offset=offset)
20
+
21
+    params['cookies']['SRCHHPGUSR'] = \
22
+        'NEWWND=0&NRSLT=-1&SRCHLANG=' + language.split('-')[0]
17
     #if params['category'] == 'images':
23
     #if params['category'] == 'images':
18
     #    params['url'] = base_url + 'images/' + search_path
24
     #    params['url'] = base_url + 'images/' + search_path
19
     params['url'] = base_url + search_path
25
     params['url'] = base_url + search_path

+ 9
- 4
searx/engines/google.py Voir le fichier

5
 
5
 
6
 categories = ['general']
6
 categories = ['general']
7
 
7
 
8
-paging = True
9
-
10
 url = 'https://ajax.googleapis.com/'
8
 url = 'https://ajax.googleapis.com/'
11
-search_url = url + 'ajax/services/search/web?v=1.0&start={offset}&rsz=large&safe=off&filter=off&{query}'  # noqa
9
+search_url = url + 'ajax/services/search/web?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}'  # noqa
10
+
11
+paging = True
12
+language_support = True
12
 
13
 
13
 
14
 
14
 def request(query, params):
15
 def request(query, params):
15
     offset = (params['pageno'] - 1) * 8
16
     offset = (params['pageno'] - 1) * 8
17
+    language = 'en-US'
18
+    if params['language'] != 'all':
19
+        language = params['language'].replace('_', '-')
16
     params['url'] = search_url.format(offset=offset,
20
     params['url'] = search_url.format(offset=offset,
17
-                                      query=urlencode({'q': query}))
21
+                                      query=urlencode({'q': query}),
22
+                                      language=language)
18
     return params
23
     return params
19
 
24
 
20
 
25
 

+ 30
- 0
searx/engines/wikipedia.py Voir le fichier

1
+from json import loads
2
+from urllib import urlencode, quote
3
+
4
+url = 'https://{language}.wikipedia.org/'
5
+
6
+search_url = url + 'w/api.php?action=query&list=search&{query}&srprop=timestamp&format=json&sroffset={offset}'  # noqa
7
+
8
+number_of_results = 10
9
+
10
+language_support = True
11
+
12
+
13
+def request(query, params):
14
+    offset = (params['pageno'] - 1) * 10
15
+    if params['language'] == 'all':
16
+        language = 'en'
17
+    else:
18
+        language = params['language'].split('_')[0]
19
+    params['language'] = language
20
+    params['url'] = search_url.format(query=urlencode({'srsearch': query}),
21
+                                      offset=offset,
22
+                                      language=language)
23
+    return params
24
+
25
+
26
+def response(resp):
27
+    search_results = loads(resp.text)
28
+    res = search_results.get('query', {}).get('search', [])
29
+    return [{'url': url.format(language=resp.search_params['language']) + 'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8')),  # noqa
30
+        'title': result['title']} for result in res[:int(number_of_results)]]

+ 59
- 0
searx/languages.py Voir le fichier

1
+language_codes = (
2
+    ("ar_XA", "Arabic", "Arabia"),
3
+    ("bg_BG", "Bulgarian", "Bulgaria"),
4
+    ("cs_CZ", "Czech", "Czech Republic"),
5
+    ("de_DE", "German", "Germany"),
6
+    ("da_DK", "Danish", "Denmark"),
7
+    ("de_AT", "German", "Austria"),
8
+    ("de_CH", "German", "Switzerland"),
9
+    ("el_GR", "Greek", "Greece"),
10
+    ("en_AU", "English", "Australia"),
11
+    ("en_CA", "English", "Canada"),
12
+    ("en_GB", "English", "United Kingdom"),
13
+    ("en_ID", "English", "Indonesia"),
14
+    ("en_IE", "English", "Ireland"),
15
+    ("en_IN", "English", "India"),
16
+    ("en_MY", "English", "Malaysia"),
17
+    ("en_NZ", "English", "New Zealand"),
18
+    ("en_PH", "English", "Philippines"),
19
+    ("en_SG", "English", "Singapore"),
20
+    ("en_US", "English", "United States"),
21
+    ("en_XA", "English", "Arabia"),
22
+    ("en_ZA", "English", "South Africa"),
23
+    ("es_AR", "Spanish", "Argentina"),
24
+    ("es_CL", "Spanish", "Chile"),
25
+    ("es_ES", "Spanish", "Spain"),
26
+    ("es_MX", "Spanish", "Mexico"),
27
+    ("es_US", "Spanish", "United States"),
28
+    ("es_XL", "Spanish", "Latin America"),
29
+    ("et_EE", "Estonian", "Estonia"),
30
+    ("fi_FI", "Finnish", "Finland"),
31
+    ("fr_BE", "French", "Belgium"),
32
+    ("fr_CA", "French", "Canada"),
33
+    ("fr_CH", "French", "Switzerland"),
34
+    ("fr_FR", "French", "France"),
35
+    ("he_IL", "Hebrew", "Israel"),
36
+    ("hr_HR", "Croatian", "Croatia"),
37
+    ("hu_HU", "Hungarian", "Hungary"),
38
+    ("it_IT", "Italian", "Italy"),
39
+    ("ja_JP", "Japanese", "Japan"),
40
+    ("ko_KR", "Korean", "Korea"),
41
+    ("lt_LT", "Lithuanian", "Lithuania"),
42
+    ("lv_LV", "Latvian", "Latvia"),
43
+    ("nb_NO", "Norwegian", "Norway"),
44
+    ("nl_BE", "Dutch", "Belgium"),
45
+    ("nl_NL", "Dutch", "Netherlands"),
46
+    ("pl_PL", "Polish", "Poland"),
47
+    ("pt_BR", "Portuguese", "Brazil"),
48
+    ("pt_PT", "Portuguese", "Portugal"),
49
+    ("ro_RO", "Romanian", "Romania"),
50
+    ("ru_RU", "Russian", "Russia"),
51
+    ("sk_SK", "Slovak", "Slovak Republic"),
52
+    ("sl_SL", "Slovenian", "Slovenia"),
53
+    ("sv_SE", "Swedish", "Sweden"),
54
+    ("th_TH", "Thai", "Thailand"),
55
+    ("tr_TR", "Turkish", "Turkey"),
56
+    ("uk_UA", "Ukrainian", "Ukraine"),
57
+    ("zh_CN", "Chinese", "China"),
58
+    ("zh_HK", "Chinese", "Hong Kong SAR"),
59
+    ("zh_TW", "Chinese", "Taiwan"))

+ 1
- 2
searx/settings.yml Voir le fichier

7
 
7
 
8
 engines:
8
 engines:
9
   - name : wikipedia
9
   - name : wikipedia
10
-    engine : mediawiki
11
-    url    : https://en.wikipedia.org/
10
+    engine : wikipedia
12
     number_of_results : 1
11
     number_of_results : 1
13
     paging : False
12
     paging : False
14
 
13
 

+ 2
- 1
searx/static/css/style.css Voir le fichier

152
 #results { margin: 10px; padding: 0; margin-bottom: 20px; }
152
 #results { margin: 10px; padding: 0; margin-bottom: 20px; }
153
 
153
 
154
 #sidebar { position: absolute; left: 54em; width: 12em; margin: 0 2px 5px 5px; padding: 0 2px 2px 2px; }
154
 #sidebar { position: absolute; left: 54em; width: 12em; margin: 0 2px 5px 5px; padding: 0 2px 2px 2px; }
155
-#suggestions span { display: block; margin: 0 2px 10px 2px; padding: 0; }
155
+#suggestions span { display: block; margin: 0 2px 2px 2px; padding: 0; }
156
 #suggestions form { display: block; }
156
 #suggestions form { display: block; }
157
 #suggestions input { padding: 2px 6px; margin: 2px 4px;  font-size: 0.8em; display: inline-block; background: #3498DB; color: #FFFFFF; border-radius: 4px; border: 0; cursor: pointer; }
157
 #suggestions input { padding: 2px 6px; margin: 2px 4px;  font-size: 0.8em; display: inline-block; background: #3498DB; color: #FFFFFF; border-radius: 4px; border: 0; cursor: pointer; }
158
 
158
 
177
 }
177
 }
178
 
178
 
179
 #apis {
179
 #apis {
180
+    margin-top: 8px;
180
     clear: both;
181
     clear: both;
181
 }
182
 }
182
 
183
 

+ 11
- 0
searx/templates/preferences.html Voir le fichier

12
         </p>
12
         </p>
13
     </fieldset>
13
     </fieldset>
14
     <fieldset>
14
     <fieldset>
15
+        <legend>{{ _('Search language') }}</legend>
16
+        <p>
17
+        <select name='language'>
18
+            <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option>
19
+            {% for lang_id,lang_name,country_name in language_codes %}
20
+            <option value={{ lang_id }} {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name}} ({{ country_name }})</option>
21
+            {% endfor %}
22
+        </select>
23
+        </p>
24
+    </fieldset>
25
+    <fieldset>
15
         <legend>{{ _('Interface language') }}</legend>
26
         <legend>{{ _('Interface language') }}</legend>
16
         <p>
27
         <p>
17
         <select name='locale'>
28
         <select name='locale'>

+ 30
- 2
searx/webapp.py Voir le fichier

29
 from searx.engines import search, categories, engines, get_engines_stats
29
 from searx.engines import search, categories, engines, get_engines_stats
30
 from searx.utils import UnicodeWriter
30
 from searx.utils import UnicodeWriter
31
 from searx.utils import highlight_content, html_to_text
31
 from searx.utils import highlight_content, html_to_text
32
+from searx.languages import language_codes
32
 
33
 
33
 from flask.ext.babel import Babel
34
 from flask.ext.babel import Babel
34
 
35
 
117
 @app.route('/', methods=['GET', 'POST'])
118
 @app.route('/', methods=['GET', 'POST'])
118
 def index():
119
 def index():
119
     paging = False
120
     paging = False
121
+    lang = 'all'
122
+
123
+    if request.cookies.get('language')\
124
+       and request.cookies['language'] in (x[0] for x in language_codes):
125
+        lang = request.cookies['language']
120
 
126
 
121
     if request.method == 'POST':
127
     if request.method == 'POST':
122
         request_data = request.form
128
         request_data = request.form
159
                                      'name': x.name}
165
                                      'name': x.name}
160
                                     for x in categories[categ])
166
                                     for x in categories[categ])
161
 
167
 
162
-    results, suggestions = search(query, request, selected_engines, pageno)
168
+    results, suggestions = search(query,
169
+                                  request,
170
+                                  selected_engines,
171
+                                  pageno,
172
+                                  lang)
163
 
173
 
164
     for result in results:
174
     for result in results:
165
         if not paging and engines[result['engine']].paging:
175
         if not paging and engines[result['engine']].paging:
232
 
242
 
233
 @app.route('/preferences', methods=['GET', 'POST'])
243
 @app.route('/preferences', methods=['GET', 'POST'])
234
 def preferences():
244
 def preferences():
245
+    lang = None
246
+
247
+    if request.cookies.get('language')\
248
+       and request.cookies['language'] in (x[0] for x in language_codes):
249
+        lang = request.cookies['language']
235
 
250
 
236
     if request.method == 'POST':
251
     if request.method == 'POST':
237
         selected_categories = []
252
         selected_categories = []
244
                 selected_categories.append(category)
259
                 selected_categories.append(category)
245
             elif pd_name == 'locale' and pd in settings['locales']:
260
             elif pd_name == 'locale' and pd in settings['locales']:
246
                 locale = pd
261
                 locale = pd
262
+            elif pd_name == 'language' and (pd == 'all' or
263
+                                            pd in (x[0] for
264
+                                                   x in language_codes)):
265
+                lang = pd
247
 
266
 
248
         resp = make_response(redirect('/'))
267
         resp = make_response(redirect('/'))
249
 
268
 
254
                 max_age=60 * 60 * 24 * 7 * 4
273
                 max_age=60 * 60 * 24 * 7 * 4
255
             )
274
             )
256
 
275
 
276
+        if lang:
277
+            # cookie max age: 4 weeks
278
+            resp.set_cookie(
279
+                'language', lang,
280
+                max_age=60 * 60 * 24 * 7 * 4
281
+            )
282
+
257
         if selected_categories:
283
         if selected_categories:
258
             # cookie max age: 4 weeks
284
             # cookie max age: 4 weeks
259
             resp.set_cookie(
285
             resp.set_cookie(
263
         return resp
289
         return resp
264
     return render('preferences.html',
290
     return render('preferences.html',
265
                   locales=settings['locales'],
291
                   locales=settings['locales'],
266
-                  current_locale=get_locale())
292
+                  current_locale=get_locale(),
293
+                  current_language=lang or 'all',
294
+                  language_codes=language_codes)
267
 
295
 
268
 
296
 
269
 @app.route('/stats', methods=['GET'])
297
 @app.route('/stats', methods=['GET'])