浏览代码

[enh] result ordering and deduplication

asciimoo 11 年前
父节点
当前提交
fa9c9e090b
共有 1 个文件被更改,包括 21 次插入1 次删除
  1. 21
    1
      searx/engines/__init__.py

+ 21
- 1
searx/engines/__init__.py 查看文件

@@ -4,6 +4,7 @@ from os import listdir
4 4
 from imp import load_source
5 5
 import grequests
6 6
 from itertools import izip_longest, chain
7
+from operator import itemgetter
7 8
 
8 9
 engine_dir = dirname(realpath(__file__))
9 10
 
@@ -56,4 +57,23 @@ def search(query, request, selected_engines):
56 57
                                 )
57 58
         requests.append(req)
58 59
     grequests.map(requests)
59
-    return list(filter(None, chain(*izip_longest(*results.values()))))
60
+    flat_res = list(filter(None, chain(*izip_longest(*results.values()))))
61
+    flat_len = len(flat_res)
62
+    results = []
63
+    # deduplication + scoring
64
+    for i,res in enumerate(flat_res):
65
+        score = flat_len - i
66
+        duplicated = False
67
+        for new_res in results:
68
+            if res['url'] == new_res['url']:
69
+                duplicated = new_res
70
+                break
71
+        if duplicated:
72
+            if len(res['content']) > len(duplicated):
73
+                duplicated['content'] = res['content']
74
+            duplicated['score'] += score
75
+        else:
76
+            res['score'] = score
77
+            results.append(res)
78
+
79
+    return sorted(results, key=itemgetter('score'), reverse=True)