|
|
|
|
131
|
engines[engine_name].stats['result_count'] += len(engine_results)
|
131
|
engines[engine_name].stats['result_count'] += len(engine_results)
|
132
|
flat_res = filter(None, chain.from_iterable(izip_longest(*results.values())))
|
132
|
flat_res = filter(None, chain.from_iterable(izip_longest(*results.values())))
|
133
|
flat_len = len(flat_res)
|
133
|
flat_len = len(flat_res)
|
|
|
134
|
+ engines_len = len(selected_engines)
|
134
|
results = []
|
135
|
results = []
|
135
|
# deduplication + scoring
|
136
|
# deduplication + scoring
|
136
|
for i,res in enumerate(flat_res):
|
137
|
for i,res in enumerate(flat_res):
|
137
|
res['parsed_url'] = urlparse(res['url'])
|
138
|
res['parsed_url'] = urlparse(res['url'])
|
138
|
res['engines'] = [res['engine']]
|
139
|
res['engines'] = [res['engine']]
|
139
|
- score = (flat_len - i - flat_len%len(engines))*settings.weights.get(res['engine'], 1)
|
|
|
|
|
140
|
+ score = int((flat_len - i)/engines_len)*settings.weights.get(res['engine'], 1)
|
140
|
duplicated = False
|
141
|
duplicated = False
|
141
|
for new_res in results:
|
142
|
for new_res in results:
|
142
|
if res['parsed_url'].netloc == new_res['parsed_url'].netloc and\
|
143
|
if res['parsed_url'].netloc == new_res['parsed_url'].netloc and\
|