|
@@ -21,7 +21,7 @@ import sys
|
21
|
21
|
from imp import load_source
|
22
|
22
|
from itertools import izip_longest, chain
|
23
|
23
|
from operator import itemgetter
|
24
|
|
-from urlparse import urlparse
|
|
24
|
+from urlparse import urlparse, unquote
|
25
|
25
|
from datetime import datetime
|
26
|
26
|
import grequests
|
27
|
27
|
from flask.ext.babel import gettext
|
|
@@ -153,7 +153,9 @@ def score_results(results):
|
153
|
153
|
results = []
|
154
|
154
|
# deduplication + scoring
|
155
|
155
|
for i, res in enumerate(flat_res):
|
|
156
|
+
|
156
|
157
|
res['parsed_url'] = urlparse(res['url'])
|
|
158
|
+
|
157
|
159
|
res['host'] = res['parsed_url'].netloc
|
158
|
160
|
|
159
|
161
|
if res['host'].startswith('www.'):
|
|
@@ -172,7 +174,7 @@ def score_results(results):
|
172
|
174
|
p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path # noqa
|
173
|
175
|
p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path # noqa
|
174
|
176
|
if res['host'] == new_res['host'] and\
|
175
|
|
- p1 == p2 and\
|
|
177
|
+ unquote(p1) == unquote(p2) and\
|
176
|
178
|
res['parsed_url'].query == new_res['parsed_url'].query and\
|
177
|
179
|
res.get('template') == new_res.get('template'):
|
178
|
180
|
duplicated = new_res
|
|
@@ -222,6 +224,10 @@ def search(query, request, selected_engines, pageno=1, lang='all'):
|
222
|
224
|
request_params['language'] = lang
|
223
|
225
|
request_params = engine.request(query.encode('utf-8'), request_params)
|
224
|
226
|
|
|
227
|
+ if request_params['url'] is None:
|
|
228
|
+ # TODO add support of offline engines
|
|
229
|
+ pass
|
|
230
|
+
|
225
|
231
|
callback = make_callback(
|
226
|
232
|
selected_engine['name'],
|
227
|
233
|
results,
|