|  | @@ -16,6 +16,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
 | 
	
		
			
			| 16 | 16 |  '''
 | 
	
		
			
			| 17 | 17 |  
 | 
	
		
			
			| 18 | 18 |  import grequests
 | 
	
		
			
			|  | 19 | +import re
 | 
	
		
			
			| 19 | 20 |  from itertools import izip_longest, chain
 | 
	
		
			
			| 20 | 21 |  from datetime import datetime
 | 
	
		
			
			| 21 | 22 |  from operator import itemgetter
 | 
	
	
		
			
			|  | @@ -76,6 +77,13 @@ def make_callback(engine_name, results, suggestions, callback, params):
 | 
	
		
			
			| 76 | 77 |  
 | 
	
		
			
			| 77 | 78 |      return process_callback
 | 
	
		
			
			| 78 | 79 |  
 | 
	
		
			
			|  | 80 | +# return the meaningful length of the content for a result
 | 
	
		
			
			|  | 81 | +def content_result_len(result):
 | 
	
		
			
			|  | 82 | +    if isinstance(result.get('content'), basestring):
 | 
	
		
			
			|  | 83 | +        content = re.sub('[,;:!?\./\\\\ ()-_]', '', result.get('content'))
 | 
	
		
			
			|  | 84 | +        return len(content) 
 | 
	
		
			
			|  | 85 | +    else:
 | 
	
		
			
			|  | 86 | +        return 0
 | 
	
		
			
			| 79 | 87 |  
 | 
	
		
			
			| 80 | 88 |  # score results and remove duplications
 | 
	
		
			
			| 81 | 89 |  def score_results(results):
 | 
	
	
		
			
			|  | @@ -110,6 +118,9 @@ def score_results(results):
 | 
	
		
			
			| 110 | 118 |          duplicated = False
 | 
	
		
			
			| 111 | 119 |  
 | 
	
		
			
			| 112 | 120 |          # check for duplicates
 | 
	
		
			
			|  | 121 | +        if 'content' in res:
 | 
	
		
			
			|  | 122 | +            res['content'] = re.sub(' +', ' ', res['content'].strip().replace('\n', ''))
 | 
	
		
			
			|  | 123 | +
 | 
	
		
			
			| 113 | 124 |          for new_res in results:
 | 
	
		
			
			| 114 | 125 |              # remove / from the end of the url if required
 | 
	
		
			
			| 115 | 126 |              p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path  # noqa
 | 
	
	
		
			
			|  | @@ -126,7 +137,7 @@ def score_results(results):
 | 
	
		
			
			| 126 | 137 |          # merge duplicates together
 | 
	
		
			
			| 127 | 138 |          if duplicated:
 | 
	
		
			
			| 128 | 139 |              # using content with more text
 | 
	
		
			
			| 129 |  | -            if res.get('content') > duplicated.get('content'):
 | 
	
		
			
			|  | 140 | +            if content_result_len(res) > content_result_len(duplicated):
 | 
	
		
			
			| 130 | 141 |                  duplicated['content'] = res['content']
 | 
	
		
			
			| 131 | 142 |  
 | 
	
		
			
			| 132 | 143 |              # increase result-score
 |