Sfoglia il codice sorgente

[fix] url path unquoted check to avoid duplications

Adam Tauber 10 anni fa
parent
commit
a07b2b514c
1 ha cambiato i file con 8 aggiunte e 2 eliminazioni
  1. 8
    2
      searx/engines/__init__.py

+ 8
- 2
searx/engines/__init__.py Vedi File

@@ -21,7 +21,7 @@ import sys
21 21
 from imp import load_source
22 22
 from itertools import izip_longest, chain
23 23
 from operator import itemgetter
24
-from urlparse import urlparse
24
+from urlparse import urlparse, unquote
25 25
 from datetime import datetime
26 26
 import grequests
27 27
 from flask.ext.babel import gettext
@@ -153,7 +153,9 @@ def score_results(results):
153 153
     results = []
154 154
     # deduplication + scoring
155 155
     for i, res in enumerate(flat_res):
156
+
156 157
         res['parsed_url'] = urlparse(res['url'])
158
+
157 159
         res['host'] = res['parsed_url'].netloc
158 160
 
159 161
         if res['host'].startswith('www.'):
@@ -172,7 +174,7 @@ def score_results(results):
172 174
             p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path  # noqa
173 175
             p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path  # noqa
174 176
             if res['host'] == new_res['host'] and\
175
-               p1 == p2 and\
177
+               unquote(p1) == unquote(p2) and\
176 178
                res['parsed_url'].query == new_res['parsed_url'].query and\
177 179
                res.get('template') == new_res.get('template'):
178 180
                 duplicated = new_res
@@ -222,6 +224,10 @@ def search(query, request, selected_engines, pageno=1, lang='all'):
222 224
         request_params['language'] = lang
223 225
         request_params = engine.request(query.encode('utf-8'), request_params)
224 226
 
227
+        if request_params['url'] is None:
228
+            # TODO add support of offline engines
229
+            pass
230
+
225 231
         callback = make_callback(
226 232
             selected_engine['name'],
227 233
             results,