Browse Source

[fix] url path unquoted check to avoid duplications

Adam Tauber 10 years ago
parent
commit
a07b2b514c
1 changed files with 8 additions and 2 deletions
  1. 8
    2
      searx/engines/__init__.py

+ 8
- 2
searx/engines/__init__.py View File

21
 from imp import load_source
21
 from imp import load_source
22
 from itertools import izip_longest, chain
22
 from itertools import izip_longest, chain
23
 from operator import itemgetter
23
 from operator import itemgetter
24
-from urlparse import urlparse
24
+from urlparse import urlparse, unquote
25
 from datetime import datetime
25
 from datetime import datetime
26
 import grequests
26
 import grequests
27
 from flask.ext.babel import gettext
27
 from flask.ext.babel import gettext
153
     results = []
153
     results = []
154
     # deduplication + scoring
154
     # deduplication + scoring
155
     for i, res in enumerate(flat_res):
155
     for i, res in enumerate(flat_res):
156
+
156
         res['parsed_url'] = urlparse(res['url'])
157
         res['parsed_url'] = urlparse(res['url'])
158
+
157
         res['host'] = res['parsed_url'].netloc
159
         res['host'] = res['parsed_url'].netloc
158
 
160
 
159
         if res['host'].startswith('www.'):
161
         if res['host'].startswith('www.'):
172
             p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path  # noqa
174
             p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path  # noqa
173
             p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path  # noqa
175
             p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path  # noqa
174
             if res['host'] == new_res['host'] and\
176
             if res['host'] == new_res['host'] and\
175
-               p1 == p2 and\
177
+               unquote(p1) == unquote(p2) and\
176
                res['parsed_url'].query == new_res['parsed_url'].query and\
178
                res['parsed_url'].query == new_res['parsed_url'].query and\
177
                res.get('template') == new_res.get('template'):
179
                res.get('template') == new_res.get('template'):
178
                 duplicated = new_res
180
                 duplicated = new_res
222
         request_params['language'] = lang
224
         request_params['language'] = lang
223
         request_params = engine.request(query.encode('utf-8'), request_params)
225
         request_params = engine.request(query.encode('utf-8'), request_params)
224
 
226
 
227
+        if request_params['url'] is None:
228
+            # TODO add support of offline engines
229
+            pass
230
+
225
         callback = make_callback(
231
         callback = make_callback(
226
             selected_engine['name'],
232
             selected_engine['name'],
227
             results,
233
             results,