| 
				
			 | 
			
			
				@@ -123,6 +123,46 @@ def highlight_content(content, query): 
			 | 
		
	
		
			
			| 
				123
			 | 
			
				123
			 | 
			
			
				  
			 | 
		
	
		
			
			| 
				124
			 | 
			
				124
			 | 
			
			
				     return content 
			 | 
		
	
		
			
			| 
				125
			 | 
			
				125
			 | 
			
			
				  
			 | 
		
	
		
			
			| 
				
			 | 
			
				126
			 | 
			
			
				+def score_results(results): 
			 | 
		
	
		
			
			| 
				
			 | 
			
				127
			 | 
			
			
				+    flat_res = filter(None, chain.from_iterable(izip_longest(*results.values()))) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				128
			 | 
			
			
				+    flat_len = len(flat_res) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				129
			 | 
			
			
				+    engines_len = len(results) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				130
			 | 
			
			
				+    results = [] 
			 | 
		
	
		
			
			| 
				
			 | 
			
				131
			 | 
			
			
				+    # deduplication + scoring 
			 | 
		
	
		
			
			| 
				
			 | 
			
				132
			 | 
			
			
				+    for i,res in enumerate(flat_res): 
			 | 
		
	
		
			
			| 
				
			 | 
			
				133
			 | 
			
			
				+        res['parsed_url'] = urlparse(res['url']) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				134
			 | 
			
			
				+        res['engines'] = [res['engine']] 
			 | 
		
	
		
			
			| 
				
			 | 
			
				135
			 | 
			
			
				+        weight = 1.0 
			 | 
		
	
		
			
			| 
				
			 | 
			
				136
			 | 
			
			
				+        if hasattr(engines[res['engine']], 'weight'): 
			 | 
		
	
		
			
			| 
				
			 | 
			
				137
			 | 
			
			
				+            weight = float(engines[res['engine']].weight) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				138
			 | 
			
			
				+        elif res['engine'] in settings.weights: 
			 | 
		
	
		
			
			| 
				
			 | 
			
				139
			 | 
			
			
				+            weight = float(settings.weights[res['engine']]) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				140
			 | 
			
			
				+        score = int((flat_len - i)/engines_len)*weight+1 
			 | 
		
	
		
			
			| 
				
			 | 
			
				141
			 | 
			
			
				+        duplicated = False 
			 | 
		
	
		
			
			| 
				
			 | 
			
				142
			 | 
			
			
				+        for new_res in results: 
			 | 
		
	
		
			
			| 
				
			 | 
			
				143
			 | 
			
			
				+            p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path 
			 | 
		
	
		
			
			| 
				
			 | 
			
				144
			 | 
			
			
				+            p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path 
			 | 
		
	
		
			
			| 
				
			 | 
			
				145
			 | 
			
			
				+            if res['parsed_url'].netloc == new_res['parsed_url'].netloc and\ 
			 | 
		
	
		
			
			| 
				
			 | 
			
				146
			 | 
			
			
				+               p1 == p2 and\ 
			 | 
		
	
		
			
			| 
				
			 | 
			
				147
			 | 
			
			
				+               res['parsed_url'].query == new_res['parsed_url'].query and\ 
			 | 
		
	
		
			
			| 
				
			 | 
			
				148
			 | 
			
			
				+               res.get('template') == new_res.get('template'): 
			 | 
		
	
		
			
			| 
				
			 | 
			
				149
			 | 
			
			
				+                duplicated = new_res 
			 | 
		
	
		
			
			| 
				
			 | 
			
				150
			 | 
			
			
				+                break 
			 | 
		
	
		
			
			| 
				
			 | 
			
				151
			 | 
			
			
				+        if duplicated: 
			 | 
		
	
		
			
			| 
				
			 | 
			
				152
			 | 
			
			
				+            if len(res.get('content', '')) > len(duplicated.get('content', '')): 
			 | 
		
	
		
			
			| 
				
			 | 
			
				153
			 | 
			
			
				+                duplicated['content'] = res['content'] 
			 | 
		
	
		
			
			| 
				
			 | 
			
				154
			 | 
			
			
				+            duplicated['score'] += score 
			 | 
		
	
		
			
			| 
				
			 | 
			
				155
			 | 
			
			
				+            duplicated['engines'].append(res['engine']) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				156
			 | 
			
			
				+            if duplicated['parsed_url'].scheme == 'https': 
			 | 
		
	
		
			
			| 
				
			 | 
			
				157
			 | 
			
			
				+                continue 
			 | 
		
	
		
			
			| 
				
			 | 
			
				158
			 | 
			
			
				+            elif res['parsed_url'].scheme == 'https': 
			 | 
		
	
		
			
			| 
				
			 | 
			
				159
			 | 
			
			
				+                duplicated['url'] = res['parsed_url'].geturl() 
			 | 
		
	
		
			
			| 
				
			 | 
			
				160
			 | 
			
			
				+                duplicated['parsed_url'] = res['parsed_url'] 
			 | 
		
	
		
			
			| 
				
			 | 
			
				161
			 | 
			
			
				+        else: 
			 | 
		
	
		
			
			| 
				
			 | 
			
				162
			 | 
			
			
				+            res['score'] = score 
			 | 
		
	
		
			
			| 
				
			 | 
			
				163
			 | 
			
			
				+            results.append(res) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				164
			 | 
			
			
				+    return sorted(results, key=itemgetter('score'), reverse=True) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				165
			 | 
			
			
				+ 
			 | 
		
	
		
			
			| 
				126
			 | 
			
				166
			 | 
			
			
				 def search(query, request, selected_engines): 
			 | 
		
	
		
			
			| 
				127
			 | 
			
				167
			 | 
			
			
				     global engines, categories, number_of_searches 
			 | 
		
	
		
			
			| 
				128
			 | 
			
				168
			 | 
			
			
				     requests = [] 
			 | 
		
	
	
		
			
			| 
				
			 | 
			
			
				@@ -165,43 +205,8 @@ def search(query, request, selected_engines): 
			 | 
		
	
		
			
			| 
				165
			 | 
			
				205
			 | 
			
			
				     for engine_name,engine_results in results.items(): 
			 | 
		
	
		
			
			| 
				166
			 | 
			
				206
			 | 
			
			
				         engines[engine_name].stats['search_count'] += 1 
			 | 
		
	
		
			
			| 
				167
			 | 
			
				207
			 | 
			
			
				         engines[engine_name].stats['result_count'] += len(engine_results) 
			 | 
		
	
		
			
			| 
				168
			 | 
			
				
			 | 
			
			
				-    flat_res = filter(None, chain.from_iterable(izip_longest(*results.values()))) 
			 | 
		
	
		
			
			| 
				169
			 | 
			
				
			 | 
			
			
				-    flat_len = len(flat_res) 
			 | 
		
	
		
			
			| 
				170
			 | 
			
				
			 | 
			
			
				-    engines_len = len(selected_engines) 
			 | 
		
	
		
			
			| 
				171
			 | 
			
				
			 | 
			
			
				-    results = [] 
			 | 
		
	
		
			
			| 
				172
			 | 
			
				
			 | 
			
			
				-    # deduplication + scoring 
			 | 
		
	
		
			
			| 
				173
			 | 
			
				
			 | 
			
			
				-    for i,res in enumerate(flat_res): 
			 | 
		
	
		
			
			| 
				174
			 | 
			
				
			 | 
			
			
				-        res['parsed_url'] = urlparse(res['url']) 
			 | 
		
	
		
			
			| 
				175
			 | 
			
				
			 | 
			
			
				-        res['engines'] = [res['engine']] 
			 | 
		
	
		
			
			| 
				176
			 | 
			
				
			 | 
			
			
				-        weight = 1.0 
			 | 
		
	
		
			
			| 
				177
			 | 
			
				
			 | 
			
			
				-        if hasattr(engines[res['engine']], 'weight'): 
			 | 
		
	
		
			
			| 
				178
			 | 
			
				
			 | 
			
			
				-            weight = float(engines[res['engine']].weight) 
			 | 
		
	
		
			
			| 
				179
			 | 
			
				
			 | 
			
			
				-        elif res['engine'] in settings.weights: 
			 | 
		
	
		
			
			| 
				180
			 | 
			
				
			 | 
			
			
				-            weight = float(settings.weights[res['engine']]) 
			 | 
		
	
		
			
			| 
				181
			 | 
			
				
			 | 
			
			
				-        score = int((flat_len - i)/engines_len)*weight+1 
			 | 
		
	
		
			
			| 
				182
			 | 
			
				
			 | 
			
			
				-        duplicated = False 
			 | 
		
	
		
			
			| 
				183
			 | 
			
				
			 | 
			
			
				-        for new_res in results: 
			 | 
		
	
		
			
			| 
				184
			 | 
			
				
			 | 
			
			
				-            p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path 
			 | 
		
	
		
			
			| 
				185
			 | 
			
				
			 | 
			
			
				-            p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path 
			 | 
		
	
		
			
			| 
				186
			 | 
			
				
			 | 
			
			
				-            if res['parsed_url'].netloc == new_res['parsed_url'].netloc and\ 
			 | 
		
	
		
			
			| 
				187
			 | 
			
				
			 | 
			
			
				-               p1 == p2 and\ 
			 | 
		
	
		
			
			| 
				188
			 | 
			
				
			 | 
			
			
				-               res['parsed_url'].query == new_res['parsed_url'].query and\ 
			 | 
		
	
		
			
			| 
				189
			 | 
			
				
			 | 
			
			
				-               res.get('template') == new_res.get('template'): 
			 | 
		
	
		
			
			| 
				190
			 | 
			
				
			 | 
			
			
				-                duplicated = new_res 
			 | 
		
	
		
			
			| 
				191
			 | 
			
				
			 | 
			
			
				-                break 
			 | 
		
	
		
			
			| 
				192
			 | 
			
				
			 | 
			
			
				-        if duplicated: 
			 | 
		
	
		
			
			| 
				193
			 | 
			
				
			 | 
			
			
				-            if len(res.get('content', '')) > len(duplicated.get('content', '')): 
			 | 
		
	
		
			
			| 
				194
			 | 
			
				
			 | 
			
			
				-                duplicated['content'] = res['content'] 
			 | 
		
	
		
			
			| 
				195
			 | 
			
				
			 | 
			
			
				-            duplicated['score'] += score 
			 | 
		
	
		
			
			| 
				196
			 | 
			
				
			 | 
			
			
				-            duplicated['engines'].append(res['engine']) 
			 | 
		
	
		
			
			| 
				197
			 | 
			
				
			 | 
			
			
				-            if duplicated['parsed_url'].scheme == 'https': 
			 | 
		
	
		
			
			| 
				198
			 | 
			
				
			 | 
			
			
				-                continue 
			 | 
		
	
		
			
			| 
				199
			 | 
			
				
			 | 
			
			
				-            elif res['parsed_url'].scheme == 'https': 
			 | 
		
	
		
			
			| 
				200
			 | 
			
				
			 | 
			
			
				-                duplicated['url'] = res['parsed_url'].geturl() 
			 | 
		
	
		
			
			| 
				201
			 | 
			
				
			 | 
			
			
				-                duplicated['parsed_url'] = res['parsed_url'] 
			 | 
		
	
		
			
			| 
				202
			 | 
			
				
			 | 
			
			
				-        else: 
			 | 
		
	
		
			
			| 
				203
			 | 
			
				
			 | 
			
			
				-            res['score'] = score 
			 | 
		
	
		
			
			| 
				204
			 | 
			
				
			 | 
			
			
				-            results.append(res) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				208
			 | 
			
			
				+ 
			 | 
		
	
		
			
			| 
				
			 | 
			
				209
			 | 
			
			
				+    results = score_results(results) 
			 | 
		
	
		
			
			| 
				205
			 | 
			
				210
			 | 
			
			
				  
			 | 
		
	
		
			
			| 
				206
			 | 
			
				211
			 | 
			
			
				     for result in results: 
			 | 
		
	
		
			
			| 
				207
			 | 
			
				212
			 | 
			
			
				         if 'content' in result: 
			 | 
		
	
	
		
			
			| 
				
			 | 
			
			
				@@ -209,7 +214,7 @@ def search(query, request, selected_engines): 
			 | 
		
	
		
			
			| 
				209
			 | 
			
				214
			 | 
			
			
				         for res_engine in result['engines']: 
			 | 
		
	
		
			
			| 
				210
			 | 
			
				215
			 | 
			
			
				             engines[result['engine']].stats['score_count'] += result['score'] 
			 | 
		
	
		
			
			| 
				211
			 | 
			
				216
			 | 
			
			
				  
			 | 
		
	
		
			
			| 
				212
			 | 
			
				
			 | 
			
			
				-    return sorted(results, key=itemgetter('score'), reverse=True) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				217
			 | 
			
			
				+    return results 
			 | 
		
	
		
			
			| 
				213
			 | 
			
				218
			 | 
			
			
				  
			 | 
		
	
		
			
			| 
				214
			 | 
			
				219
			 | 
			
			
				 def get_engines_stats(): 
			 | 
		
	
		
			
			| 
				215
			 | 
			
				220
			 | 
			
			
				     pageloads = [] 
			 |