|
@@ -106,8 +106,13 @@ def score_results(results):
|
106
|
106
|
res['host'] = res['host'].replace('www.', '', 1)
|
107
|
107
|
|
108
|
108
|
res['engines'] = [res['engine']]
|
|
109
|
+
|
109
|
110
|
weight = 1.0
|
110
|
111
|
|
|
112
|
+ # strip multiple spaces and cariage returns from content
|
|
113
|
+ if 'content' in res:
|
|
114
|
+ res['content'] = re.sub(' +', ' ', res['content'].strip().replace('\n', ''))
|
|
115
|
+
|
111
|
116
|
# get weight of this engine if possible
|
112
|
117
|
if hasattr(engines[res['engine']], 'weight'):
|
113
|
118
|
weight = float(engines[res['engine']].weight)
|
|
@@ -115,12 +120,8 @@ def score_results(results):
|
115
|
120
|
# calculate score for that engine
|
116
|
121
|
score = int((flat_len - i) / engines_len) * weight + 1
|
117
|
122
|
|
118
|
|
- duplicated = False
|
119
|
|
-
|
120
|
123
|
# check for duplicates
|
121
|
|
- if 'content' in res:
|
122
|
|
- res['content'] = re.sub(' +', ' ', res['content'].strip().replace('\n', ''))
|
123
|
|
-
|
|
124
|
+ duplicated = False
|
124
|
125
|
for new_res in results:
|
125
|
126
|
# remove / from the end of the url if required
|
126
|
127
|
p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path # noqa
|