Browse Source

[enh] result ordering and deduplication

asciimoo 11 years ago
parent
commit
fa9c9e090b
1 changed files with 21 additions and 1 deletions
  1. 21
    1
      searx/engines/__init__.py

+ 21
- 1
searx/engines/__init__.py View File

4
 from imp import load_source
4
 from imp import load_source
5
 import grequests
5
 import grequests
6
 from itertools import izip_longest, chain
6
 from itertools import izip_longest, chain
7
+from operator import itemgetter
7
 
8
 
8
 engine_dir = dirname(realpath(__file__))
9
 engine_dir = dirname(realpath(__file__))
9
 
10
 
56
                                 )
57
                                 )
57
         requests.append(req)
58
         requests.append(req)
58
     grequests.map(requests)
59
     grequests.map(requests)
59
-    return list(filter(None, chain(*izip_longest(*results.values()))))
60
+    flat_res = list(filter(None, chain(*izip_longest(*results.values()))))
61
+    flat_len = len(flat_res)
62
+    results = []
63
+    # deduplication + scoring
64
+    for i,res in enumerate(flat_res):
65
+        score = flat_len - i
66
+        duplicated = False
67
+        for new_res in results:
68
+            if res['url'] == new_res['url']:
69
+                duplicated = new_res
70
+                break
71
+        if duplicated:
72
+            if len(res['content']) > len(duplicated):
73
+                duplicated['content'] = res['content']
74
+            duplicated['score'] += score
75
+        else:
76
+            res['score'] = score
77
+            results.append(res)
78
+
79
+    return sorted(results, key=itemgetter('score'), reverse=True)