Browse Source

Add a plugin to remove trackers from results URLs

Cqoicebordel 10 years ago
parent
commit
617495cca8
2 changed files with 43 additions and 1 deletions
  1. 3
    1
      searx/plugins/__init__.py
  2. 40
    0
      searx/plugins/tracker_url_remover.py

+ 3
- 1
searx/plugins/__init__.py View File

@@ -21,7 +21,8 @@ logger = logger.getChild('plugins')
21 21
 
22 22
 from searx.plugins import (https_rewrite,
23 23
                            self_ip,
24
-                           search_on_category_select)
24
+                           search_on_category_select,
25
+                           tracker_url_remover)
25 26
 
26 27
 required_attrs = (('name', str),
27 28
                   ('description', str),
@@ -73,3 +74,4 @@ plugins = PluginStore()
73 74
 plugins.register(https_rewrite)
74 75
 plugins.register(self_ip)
75 76
 plugins.register(search_on_category_select)
77
+plugins.register(tracker_url_remover)

+ 40
- 0
searx/plugins/tracker_url_remover.py View File

@@ -0,0 +1,40 @@
1
+'''
2
+searx is free software: you can redistribute it and/or modify
3
+it under the terms of the GNU Affero General Public License as published by
4
+the Free Software Foundation, either version 3 of the License, or
5
+(at your option) any later version.
6
+
7
+searx is distributed in the hope that it will be useful,
8
+but WITHOUT ANY WARRANTY; without even the implied warranty of
9
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
+GNU Affero General Public License for more details.
11
+
12
+You should have received a copy of the GNU Affero General Public License
13
+along with searx. If not, see < http://www.gnu.org/licenses/ >.
14
+
15
+(C) 2015 by Adam Tauber, <asciimoo@gmail.com>
16
+'''
17
+
18
+from flask.ext.babel import gettext
19
+import re
20
+
21
+re1 = re.compile(r'utm_[^&]+&?')
22
+re2 = re.compile(r'(wkey|wemail)[^&]+&?')
23
+re3 = re.compile(r'&$')
24
+re4 = re.compile(r'^\?$')
25
+
26
+name = gettext('Tracker URL remover')
27
+description = gettext('Remove trackers arguments from the returned URL')
28
+default_on = True
29
+
30
+
31
+def on_result(request, ctx):
32
+    url = ctx['result']['url']
33
+
34
+    url = re1.sub('', url)
35
+    url = re2.sub('', url)
36
+    url = re3.sub('', url)
37
+    url = re4.sub('', url)
38
+
39
+    ctx['result']['url'] = url
40
+    return True