Selaa lähdekoodia

Merge pull request #97 from pointhi/https

Implementing https rewrite support
Adam Tauber 10 vuotta sitten
vanhempi
commit
20400c40c3
40 muutettua tiedostoa jossa 4691 lisäystä ja 13 poistoa
  1. 10
    0
      searx/__init__.py
  2. 135
    8
      searx/https_rewrite.py
  3. 17
    0
      searx/https_rules/00README
  4. 56
    0
      searx/https_rules/Bing.xml
  5. 69
    0
      searx/https_rules/Dailymotion.xml
  6. 53
    0
      searx/https_rules/Deviantart.xml
  7. 38
    0
      searx/https_rules/DuckDuckGo.xml
  8. 44
    0
      searx/https_rules/Flickr.xml
  9. 11
    0
      searx/https_rules/Github-Pages.xml
  10. 94
    0
      searx/https_rules/Github.xml
  11. 26
    0
      searx/https_rules/Google-mismatches.xml
  12. 14
    0
      searx/https_rules/Google.org.xml
  13. 143
    0
      searx/https_rules/GoogleAPIs.xml
  14. 6
    0
      searx/https_rules/GoogleCanada.xml
  15. 65
    0
      searx/https_rules/GoogleImages.xml
  16. 78
    0
      searx/https_rules/GoogleMainSearch.xml
  17. 67
    0
      searx/https_rules/GoogleMaps.xml
  18. 6
    0
      searx/https_rules/GoogleMelange.xml
  19. 135
    0
      searx/https_rules/GoogleSearch.xml
  20. 345
    0
      searx/https_rules/GoogleServices.xml
  21. 28
    0
      searx/https_rules/GoogleShopping.xml
  22. 7
    0
      searx/https_rules/GoogleSorry.xml
  23. 8
    0
      searx/https_rules/GoogleTranslate.xml
  24. 83
    0
      searx/https_rules/GoogleVideos.xml
  25. 17
    0
      searx/https_rules/GoogleWatchBlog.xml
  26. 21
    0
      searx/https_rules/Google_App_Engine.xml
  27. 16
    0
      searx/https_rules/Googleplex.com.xml
  28. 15
    0
      searx/https_rules/OpenStreetMap.xml
  29. 14
    0
      searx/https_rules/Rawgithub.com.xml
  30. 101
    0
      searx/https_rules/Soundcloud.xml
  31. 36
    0
      searx/https_rules/ThePirateBay.xml
  32. 18
    0
      searx/https_rules/Torproject.xml
  33. 169
    0
      searx/https_rules/Twitter.xml
  34. 75
    0
      searx/https_rules/Vimeo.xml
  35. 13
    0
      searx/https_rules/WikiLeaks.xml
  36. 107
    0
      searx/https_rules/Wikimedia.xml
  37. 2450
    0
      searx/https_rules/Yahoo.xml
  38. 46
    0
      searx/https_rules/YouTube.xml
  39. 3
    0
      searx/settings_robot.yml
  40. 52
    5
      searx/webapp.py

+ 10
- 0
searx/__init__.py Näytä tiedosto

17
 
17
 
18
 from os import environ
18
 from os import environ
19
 from os.path import realpath, dirname, join, abspath
19
 from os.path import realpath, dirname, join, abspath
20
+from searx.https_rewrite import load_https_rules
20
 try:
21
 try:
21
     from yaml import load
22
     from yaml import load
22
 except:
23
 except:
34
 else:
35
 else:
35
     settings_path = join(searx_dir, 'settings.yml')
36
     settings_path = join(searx_dir, 'settings.yml')
36
 
37
 
38
+if 'SEARX_HTTPS_REWRITE_PATH' in environ:
39
+    https_rewrite_path = environ['SEARX_HTTPS_REWRITE_PATH']
40
+else:
41
+    https_rewrite_path = join(searx_dir, 'https_rules')
37
 
42
 
38
 # load settings
43
 # load settings
39
 with open(settings_path) as settings_yaml:
44
 with open(settings_path) as settings_yaml:
40
     settings = load(settings_yaml)
45
     settings = load(settings_yaml)
46
+
47
+# load https rules only if https rewrite is enabled
48
+if settings.get('server', {}).get('https_rewrite'):
49
+    # loade https rules
50
+    load_https_rules(https_rewrite_path)

+ 135
- 8
searx/https_rewrite.py Näytä tiedosto

1
+'''
2
+searx is free software: you can redistribute it and/or modify
3
+it under the terms of the GNU Affero General Public License as published by
4
+the Free Software Foundation, either version 3 of the License, or
5
+(at your option) any later version.
6
+
7
+searx is distributed in the hope that it will be useful,
8
+but WITHOUT ANY WARRANTY; without even the implied warranty of
9
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
+GNU Affero General Public License for more details.
11
+
12
+You should have received a copy of the GNU Affero General Public License
13
+along with searx. If not, see < http://www.gnu.org/licenses/ >.
14
+
15
+(C) 2013- by Adam Tauber, <asciimoo@gmail.com>
16
+'''
17
+
1
 import re
18
 import re
19
+from lxml import etree
20
+from os import listdir
21
+from os.path import isfile, join
22
+
2
 
23
 
3
 # https://gitweb.torproject.org/\
24
 # https://gitweb.torproject.org/\
4
 # pde/https-everywhere.git/tree/4.0:/src/chrome/content/rules
25
 # pde/https-everywhere.git/tree/4.0:/src/chrome/content/rules
5
 
26
 
6
 # HTTPS rewrite rules
27
 # HTTPS rewrite rules
7
-https_rules = (
8
-    # from
9
-    (re.compile(r'^http://(www\.|m\.|)?xkcd\.(?:com|org)/', re.I | re.U),
10
-     # to
11
-     r'https://\1xkcd.com/'),
12
-    (re.compile(r'^https?://(?:ssl)?imgs\.xkcd\.com/', re.I | re.U),
13
-     r'https://sslimgs.xkcd.com/'),
14
-)
28
+https_rules = []
29
+
30
+
31
+# load single ruleset from a xml file
32
+def load_single_https_ruleset(filepath):
33
+    ruleset = ()
34
+
35
+    # init parser
36
+    parser = etree.XMLParser()
37
+
38
+    # load and parse xml-file
39
+    try:
40
+        tree = etree.parse(filepath, parser)
41
+    except:
42
+        # TODO, error message
43
+        return ()
44
+
45
+    # get root node
46
+    root = tree.getroot()
47
+
48
+    #print(etree.tostring(tree))
49
+
50
+    # check if root is a node with the name ruleset
51
+    # TODO improve parsing
52
+    if root.tag != 'ruleset':        
53
+        return ()
54
+
55
+    # check if rule is deactivated by default
56
+    if root.attrib.get('default_off'):
57
+        return ()
58
+
59
+    # check if rule does only work for specific platforms
60
+    if root.attrib.get('platform'):
61
+        return ()
62
+
63
+    hosts = []
64
+    rules = []
65
+    exclusions = []
66
+
67
+    # parse childs from ruleset
68
+    for ruleset in root:
69
+        # this child define a target
70
+        if ruleset.tag == 'target':
71
+            # check if required tags available 
72
+            if not ruleset.attrib.get('host'):
73
+                continue
74
+
75
+            # convert host-rule to valid regex
76
+            host = ruleset.attrib.get('host').replace('.', '\.').replace('*', '.*')
77
+
78
+            # append to host list
79
+            hosts.append(host)
80
+
81
+        # this child define a rule
82
+        elif ruleset.tag == 'rule':
83
+            # check if required tags available 
84
+            if not ruleset.attrib.get('from')\
85
+               or not ruleset.attrib.get('to'):
86
+                continue
87
+
88
+            # TODO hack, which convert a javascript regex group into a valid python regex group
89
+            rule_from = ruleset.attrib.get('from').replace('$', '\\')
90
+            rule_to = ruleset.attrib.get('to').replace('$', '\\')
91
+
92
+            # TODO, not working yet because of the hack above, currently doing that in webapp.py
93
+            #rule_from_rgx = re.compile(rule_from, re.I)
94
+
95
+            # append rule
96
+            rules.append((rule_from, rule_to))
97
+
98
+        # this child define an exclusion
99
+        elif ruleset.tag == 'exclusion':
100
+            # check if required tags available 
101
+            if not ruleset.attrib.get('pattern'):
102
+                continue
103
+
104
+            exclusion_rgx = re.compile(ruleset.attrib.get('pattern'))
105
+
106
+            # append exclusion
107
+            exclusions.append(exclusion_rgx)
108
+
109
+    # convert list of possible hosts to a simple regex
110
+    # TODO compress regex to improve performance
111
+    try:
112
+        target_hosts = re.compile('^(' + '|'.join(hosts) + ')', re.I | re.U)
113
+    except:
114
+        return ()
115
+
116
+    # return ruleset
117
+    return (target_hosts, rules, exclusions)
118
+
119
+
120
+# load all https rewrite rules
121
+def load_https_rules(rules_path):
122
+    # add / to path if not set yet
123
+    if rules_path[-1:] != '/':
124
+        rules_path += '/'
125
+
126
+    # search all xml files which are stored in the https rule directory
127
+    xml_files = [ join(rules_path,f) for f in listdir(rules_path) if isfile(join(rules_path,f)) and f[-4:] == '.xml' ]
128
+
129
+    # load xml-files
130
+    for ruleset_file in xml_files:
131
+        # calculate rewrite-rules
132
+        ruleset = load_single_https_ruleset(ruleset_file)
133
+
134
+        # skip if no ruleset returned
135
+        if not ruleset:
136
+            continue
137
+
138
+        # append ruleset
139
+        https_rules.append(ruleset)
140
+        
141
+    print(' * {n} https-rules loaded'.format(n=len(https_rules)))

+ 17
- 0
searx/https_rules/00README Näytä tiedosto

1
+<!-- 
2
+This directory contains web site rewriting rules for the
3
+HTTPS Everywhere software, available from
4
+https://www.eff.org/https-everywhere
5
+
6
+These rules were contributed to the project by users and aim to
7
+enable routine secure access to as many different web sites as
8
+possible.  They are automatically installed together with the
9
+HTTPS Everywhere software.  The presence of these rules does not
10
+mean that an HTTPS Everywhere user accessed, or intended to
11
+access, any particular web site.
12
+
13
+For information about how to create additional HTTPS Everywhere
14
+rewriting rules to add support for new sites, please see
15
+
16
+https://www.eff.org/https-everywhere/rulesets
17
+-->

+ 56
- 0
searx/https_rules/Bing.xml Näytä tiedosto

1
+<!--
2
+	For other Microsoft coverage, see Microsoft.xml.
3
+
4
+
5
+	CDN buckets:
6
+
7
+		- a134.lm.akamai.net
8
+
9
+			- akam.bing.com
10
+			- *.mm.bing.net
11
+
12
+
13
+	Nonfunctional domains:
14
+
15
+		- m2.cn.bing.com
16
+		- origin.bj1.bing.com
17
+		- blogs.bing.com
18
+
19
+
20
+	Fully covered domains:
21
+
22
+		- bing.com subdomains:
23
+
24
+			- (www.)
25
+			- c.bing		(tracking beacons)
26
+			- cn.bing
27
+			- h.bing
28
+			- ssl
29
+			- testfamilysafety.bing
30
+			- udc.bing
31
+			- (www.)bing
32
+
33
+		- *.mm.bing.net
34
+		- api.bing.com
35
+
36
+-->
37
+<ruleset name="Bing">
38
+
39
+	<target host="bing.com" />
40
+	<target host="*.bing.com" />
41
+	<target host="*.mm.bing.net" />
42
+
43
+
44
+	<securecookie host=".*\.bing\.com$" name=".+" />
45
+
46
+
47
+	<rule from="^http://((?:c|cn|h|ssl|testfamilysafety|udc|www)\.)?bing\.com/"
48
+		to="https://$1bing.com/" />
49
+
50
+	<rule from="^http://([^/:@]*)\.mm\.bing\.net/"
51
+		to="https://$1.mm.bing.com/"/>
52
+
53
+	<rule from="^http://([^/:@]*)\.api\.bing\.net/"
54
+		to="https://$1.api.bing.com/"/>
55
+
56
+</ruleset>

+ 69
- 0
searx/https_rules/Dailymotion.xml Näytä tiedosto

1
+<!--
2
+	Nonfunctional domains:
3
+
4
+		- blog.dailymotion.com
5
+		- press.dailymotion.com		(shows steaw.com, CN: www.steaw.com)
6
+		- proxy-46.dailymotion.com
7
+		- publicite.dailymotion.com
8
+		- publisher.dailymotion.com	(reset)
9
+		- vid.ak.dmcdn.net		(403, Akamai)
10
+		- vid2.ak.dmcdn.net		(504, akamai)
11
+
12
+
13
+	Problematic domains:
14
+
15
+		- ak2.static.dailymotion.com	(mismatched, CN: *.dmcdn.net)
16
+		- support.dmcloud.net		(mismatched, CN: *.zendesk.com)
17
+
18
+
19
+	Partially covered domains:
20
+
21
+		- (www.)dailymotion.com
22
+
23
+			- cdn/manifest/video/\w+.mnft 403s
24
+			- crossdomain.xml breaks videos
25
+
26
+-->
27
+<ruleset name="Dailymotion (default off)" default_off="breaks some embedded videos">
28
+
29
+	<target host="dailymotion.com" />
30
+	<!--
31
+		* for cross-domain cookie.
32
+					-->
33
+	<target host="*.dailymotion.com" />
34
+		<!--
35
+			https://mail1.eff.org/pipermail/https-everywhere-rules/2012-July/001241.html
36
+													-->
37
+		<exclusion pattern="^http://(?:www\.)?dailymotion\.com/(?:cdn/[\w-]+/video/|crossdomain\.xml$)" />
38
+	<target host="ak2.static.dailymotion.com" />
39
+	<target host="*.dmcdn.net" />
40
+	<target host="dmcloud.net" />
41
+	<target host="*.dmcloud.net" />
42
+
43
+
44
+	<!--	Testing wrt embedded breakage.
45
+
46
+		securecookie host="^.*\.dailymotion\.com$" name=".+" /-->
47
+	<!--
48
+		Omniture tracking cookies:
49
+						-->
50
+	<securecookie host="^\.dailymotion\.com$" name="^s_\w+$" />
51
+	<securecookie host="^www\.dailymotion\.com$" name=".+" />
52
+
53
+
54
+	<rule from="^http://(erroracct\.|www\.)?dailymotion\.com/"
55
+		to="https://$1dailymotion.com/" />
56
+
57
+	<rule from="^http://(s\d|static(?:\d|s\d-ssl))\.dmcdn\.net/"
58
+		to="https://$1.dmcdn.net/" />
59
+
60
+	<rule from="^https?://ak2\.static\.dailymotion\.com/"
61
+		to="https://static1-ssl.dmcdn.net/" />
62
+
63
+	<rule from="^http://(s\.|www\.)?dmcloud\.net/"
64
+		to="https://$1dmcloud.net/" />
65
+
66
+	<rule from="^https?://support\.dmcloud\.net/"
67
+		to="https://dmcloud.zendesk.com/" />
68
+
69
+</ruleset>

+ 53
- 0
searx/https_rules/Deviantart.xml Näytä tiedosto

1
+<!--
2
+	For problematic rules, see Deviantart-mismatches.xml.
3
+
4
+
5
+	Other deviantArt rulesets:
6
+
7
+		- Sta.sh.xml
8
+
9
+
10
+	ToDo: Find edgecast URL for /(fc|th)\d+.
11
+
12
+
13
+	Mixed content:
14
+
15
+		- Images on *.....com from e.deviantart.net *
16
+
17
+	* Secured by us
18
+
19
+-->
20
+<ruleset name="DeviantArt (pending)" default_off="site operator says not ready yet">
21
+
22
+	<target host="deviantart.com" />
23
+	<target host="*.deviantart.com" />
24
+	<target host="deviantart.net" />
25
+	<target host="*.deviantart.net" />
26
+
27
+
28
+	<!--	Not secured by server:
29
+					-->
30
+	<!--securecookie host="^\.deviantart\.com$" name="^userinfo$" /-->
31
+
32
+	<securecookie host="^\.deviantart\.com$" name=".*" />
33
+
34
+
35
+	<!--	Redirects from com to net, but does so successfully by itself.
36
+										-->
37
+	<rule from="^http://([aei]|fc\d\d|s[ht]|th\d\d)\.deviantart\.(com|net)/"
38
+		to="https://$1.deviantart.$2/" />
39
+
40
+	<!--	This handles everything that isn't in the first rule.
41
+		Namely, usernames, backend, fc, th, and (www.).
42
+			These domains present a cert that is only
43
+		valid for .com.
44
+			Note that .net isn't used on DA, but.net does
45
+		redirect to .com, and we shouldn't break what would
46
+		otherwise work.
47
+			Mustn't rewrite from https here, as doing so
48
+		would conflict with the first rule.
49
+								-->
50
+	<rule from="^http://([^/:@\.]+\.)?deviantart\.(?:com|net)/"
51
+		to="https://$1deviantart.com/" />
52
+
53
+</ruleset>

+ 38
- 0
searx/https_rules/DuckDuckGo.xml Näytä tiedosto

1
+<!--
2
+	Problematic domains:
3
+
4
+		- www.dukgo.com		(mismatched, CN: dukgo.com)
5
+
6
+
7
+	Fully covered domains:
8
+
9
+		- (www.)dukgo.com	(www → ^)
10
+
11
+-->
12
+<ruleset name="DuckDuckGo">
13
+  <target host="duckduckgo.com" />
14
+  <target host="*.duckduckgo.com" />
15
+  <target host="ddg.gg" />
16
+  <target host="duck.co" />
17
+  <target host="i.duck.co" />
18
+	<target host="dukgo.com" />
19
+	<target host="www.dukgo.com" />
20
+
21
+  <exclusion pattern="^http://(help|meme)\.duckduckgo\.com/" />
22
+
23
+	<securecookie host="^duck\.co$" name=".*"/>
24
+
25
+  <rule from="^http://duckduckgo\.com/" to="https://duckduckgo.com/"/>
26
+  <rule from="^http://([^/:@\.]+)\.duckduckgo\.com/" to="https://$1.duckduckgo.com/"/>
27
+	<!-- TODO: What does ddg.gg/foo do? Runs query foo, redirects to homepage, or error? -->
28
+    <rule from="^http://ddg\.gg/$" to="https://duckduckgo.com/" />
29
+	
30
+    <rule from="^http://duck\.co/" to="https://duck.co/" />
31
+
32
+	<rule from="^http://i\.duck\.co/"
33
+		to="https://duckduckgo.com/"/>
34
+
35
+	<rule from="^http://(?:www\.)?dukgo\.com/"
36
+		to="https://dukgo.com/" />
37
+
38
+</ruleset>

+ 44
- 0
searx/https_rules/Flickr.xml Näytä tiedosto

1
+<!--
2
+	For other Yahoo coverage, see Yahoo.xml.
3
+
4
+
5
+	These altnames don't exist:
6
+
7
+		- www.blog.flickr.net
8
+		- www.code.flickr.net
9
+
10
+-->
11
+<ruleset name="Flickr">
12
+
13
+	<target host="flic.kr" />
14
+	<target host="*.flic.kr" />
15
+	<target host="flickr.com" />
16
+	<target host="*.flickr.com" />
17
+	<target host="*.flickr.net" />
18
+	<target host="*.staticflickr.com" />
19
+
20
+
21
+	<!--	Not secured by server:
22
+					-->
23
+	<!--securecookie host="^\.flic\.kr$" name="^BX$" /-->
24
+
25
+	<securecookie host="^\.flic\.kr$" name=".+" />
26
+	<securecookie host=".*\.flickr\.com$" name=".+" />
27
+
28
+
29
+	<rule from="^http://flic\.kr/"
30
+		to="https://flic.kr/" />
31
+
32
+	<rule from="^http://(api\.|www\.)?flickr\.com/"
33
+		to="https://$1flickr.com/" />
34
+
35
+	<rule from="^http://s(ecure|tatic)\.flickr\.com/"
36
+		to="https://s$1.flickr.com/" />
37
+
38
+	<rule from="^http://(c2|farm\d+)\.static(\.)?flickr\.com/"
39
+		to="https://$1.static$2flickr.com/" />
40
+
41
+	<rule from="^http://(blog|code)\.flickr\.net/"
42
+		to="https://$1.flickr.net/" />
43
+
44
+</ruleset>

+ 11
- 0
searx/https_rules/Github-Pages.xml Näytä tiedosto

1
+<!--
2
+	For other GitHub coverage, see Github.xml.
3
+-->
4
+<ruleset name="GitHub Pages">
5
+
6
+	<target host="*.github.io" />
7
+
8
+	<rule from="^http://([^/@:\.]+)\.github\.io/"
9
+		to="https://$1.github.io/" />
10
+
11
+</ruleset>

+ 94
- 0
searx/https_rules/Github.xml Näytä tiedosto

1
+<!--
2
+	Other GitHub rulesets:
3
+
4
+		- Github-Pages.xml
5
+		- Guag.es.xml
6
+		- Speaker_Deck.com.xml
7
+
8
+
9
+	CDN buckets:
10
+
11
+		- github-images.s3.amazonaws.com
12
+		- github.global.ssl.fastly.net
13
+		- a248.e.akamai.net/assets.github.com/
14
+		- a248.e.akamai.net/camo.github.com/
15
+		- s3.amazonaws.com/github/ | d24z2fz21y4fag.cloudfront.net
16
+		- github.myshopify.com
17
+
18
+
19
+	Fully covered domains:
20
+
21
+		- github.com subdomains:
22
+
23
+			- (www.)
24
+			- assets\d+
25
+			- assets-cdn
26
+			- bounty
27
+			- cloud
28
+			- f.cloud
29
+			- codeload
30
+			- developer
31
+			- eclipse
32
+			- enterprise
33
+			- gist
34
+			- gist-assets
35
+			- help
36
+			- identicons
37
+			- jobs
38
+			- mac
39
+			- mobile
40
+			- nodeload
41
+			- octodex
42
+			- pages
43
+			- raw
44
+			- rg3
45
+			- shop
46
+			- status
47
+			- support
48
+			- training
49
+			- try
50
+			- wiki
51
+			- windows
52
+
53
+		- collector.githubapp.com
54
+
55
+		- githubusercontent.com
56
+
57
+-->
58
+<ruleset name="GitHub">
59
+
60
+	<target host="github.com" />
61
+	<target host="*.github.com" />
62
+	<target host="github.io" />
63
+	<target host="*.githubusercontent.com" />
64
+	<target host="collector.githubapp.com" />
65
+
66
+
67
+	<!--	Secured by server:
68
+					-->
69
+	<!--securecookie host="^github\.com$" name="^(_gh_sess|tz|user_session)$" /-->
70
+	<!--securecookie host="^\.github\.com$" name="^(dotcom_user|logged_in)$" /-->
71
+	<!--securecookie host="^enterprise\.github\.com$" name="^(_enterprise_web|request_method)$" /-->
72
+	<!--securecookie host="^gist\.github\.com$" name="^_gist_session$" /-->
73
+	<!--securecookie host="^help\.github\.com$" name="^_help_session$" /-->
74
+	<!--
75
+		Not secured by server:
76
+					-->
77
+	<!--securecookie host="^status\.github\.com$" name="^rack\.session$" /-->
78
+
79
+	<securecookie host="^(?:.*\.)?github\.com$" name=".+" />
80
+
81
+
82
+	<rule from="^http://((?:assets\d+|assets-cdn|bounty|cloud|f\.cloud|codeload|developer|eclipse|enterprise|gist|gist-assets|help|identicons|jobs|mac|mobile|nodeload|octodex|pages|raw|rg3|shop|status|support|training|try|wiki|windows|www)\.)?github\.com/"
83
+		to="https://$1github.com/" />
84
+
85
+	<rule from="^http://collector\.githubapp\.com/"
86
+		to="https://collector.githubapp.com/" />
87
+
88
+	<rule from="^https?://github\.io/"
89
+		to="https://pages.github.com/" />
90
+
91
+	<rule from="^http://([^/@:\.]+)\.githubusercontent\.com/"
92
+		to="https://$1.githubusercontent.com/" />
93
+
94
+</ruleset>

+ 26
- 0
searx/https_rules/Google-mismatches.xml Näytä tiedosto

1
+<!--
2
+
3
+	Problematic domains:
4
+
5
+		- (www.)apture.com	(works, mismatched, CN: *.google.com)
6
+
7
+-->
8
+<ruleset name="Google (mismatches)" default_off="mismatches">
9
+
10
+	<!--	Akamai	-->
11
+	<target host="js.admeld.com"/>
12
+	<target host="apture.com" />
13
+	<target host="www.apture.com" />
14
+	<target host="googleartproject.com"/>
15
+	<target host="www.googleartproject.com"/>
16
+
17
+	<rule from="^http://js\.admeld\.com/"
18
+		to="https://js.admeld.com/"/>
19
+
20
+	<rule from="^https?://(?:www\.)?apture\.com/"
21
+		to="https://apture.com/" />
22
+
23
+	<rule from="^http://(?:www\.)?googleartproject\.com/"
24
+		to="https://www.googleartproject.com/"/>
25
+
26
+</ruleset>

+ 14
- 0
searx/https_rules/Google.org.xml Näytä tiedosto

1
+<!--
2
+	For other Google coverage, see GoogleServices.xml.
3
+
4
+-->
5
+<ruleset name="Google.org">
6
+
7
+	<target host="google.org" />
8
+	<target host="www.google.org" />
9
+
10
+
11
+	<rule from="^http://(www\.)?google\.org/"
12
+		to="https://$1google.org/" />
13
+
14
+</ruleset>

+ 143
- 0
searx/https_rules/GoogleAPIs.xml Näytä tiedosto

1
+<!--
2
+	For other Google coverage, see GoogleServices.xml.
3
+
4
+
5
+	Nonfunctional domains:
6
+
7
+		- hosted.gmodules.com *
8
+		- img0.gmodules.com *
9
+		- p.gmodules.com *
10
+
11
+	* 404; mismatched, CN: *.googleusercontent.com
12
+
13
+
14
+	Problematic domains:
15
+
16
+		- gmodules.com			(503, CN: www.google.com)
17
+		- www.gmodules.com		(503, CN: *.googleusercontent.com)
18
+		- gstatic.com			(404, valid cert)
19
+		- api.recaptcha.net		(works; mismatched, CN: google.com)
20
+
21
+
22
+	Partially covered domains:
23
+
24
+		- (www.)gmodules.com		(→ www.google.com)
25
+		- (www.)google.com
26
+		- chart.apis.google.com		(→ chart.googleapis.com)
27
+
28
+
29
+	Fully covered domains:
30
+
31
+		- api.google.com
32
+
33
+		- *.clients.google.com:
34
+
35
+			- linkhelp
36
+
37
+		- ssl.google-analytics.com
38
+		- www.google-analytics.com
39
+
40
+		- googleapis.com subdomains:
41
+
42
+			- ajax
43
+			- chart
44
+			- *.commondatastorage
45
+			- fonts
46
+			- *.storage
47
+			- www
48
+
49
+		- gstatic.com subdomains:
50
+
51
+			- (www.)	(^ → www)
52
+			- csi
53
+			- encrypted-tbn\d
54
+			- g0
55
+			- *.metric
56
+			- ssl
57
+			- t\d
58
+
59
+		- api.recaptcha.net	(→ www.google.com)
60
+		- api-secure.recaptcha.net
61
+		- gdata.youtube.com
62
+
63
+
64
+	ssl.google-analytics.com/ga.js sets __utm\w wildcard
65
+	cookies on whichever domain it is loaded from.
66
+
67
+-->
68
+<ruleset name="Google APIs">
69
+
70
+	<target host="gmodules.com" />
71
+	<target host="www.gmodules.com" />
72
+	<target host="google.com" />
73
+	<target host="apis.google.com" />
74
+	<target host="*.apis.google.com" />
75
+	<target host="*.clients.google.com" />
76
+	<target host="www.google.com" />
77
+	<target host="*.google-analytics.com" />
78
+	<target host="*.googleapis.com" />
79
+	<target host="gstatic.com" />
80
+	<target host="*.gstatic.com" />
81
+	<!--	Captive portal detection redirects to this URL, and many captive
82
+		portals break TLS, so exempt this redirect URL.
83
+		See GitHub bug #368
84
+							-->
85
+		<exclusion pattern="^http://www\.gstatic\.com/generate_204" />
86
+	<target host="*.recaptcha.net" />
87
+	<target host="gdata.youtube.com" />
88
+		<exclusion pattern="^http://gdata\.youtube\.com/crossdomain\.xml" />
89
+
90
+
91
+	<securecookie host="^ssl\.google-analytics\.com$" name=".+" />
92
+
93
+
94
+	<rule from="^http://(?:www\.)?gmodules\.com/ig/images/"
95
+		to="https://www.google.com/ig/images/" />
96
+
97
+	<!--	jsapi was causing problems on some sites that embed google maps:
98
+		https://trac.torproject.org/projects/tor/ticket/2335
99
+		Apparently now fixed; thanks, Google!
100
+							-->
101
+	<rule from="^http://(?:www\.)?google\.com/(afsonline/|chart|jsapi|recaptcha/|uds)"
102
+		to="https://www.google.com/$1" />
103
+
104
+	<rule from="^http://(api|[\w-]+\.client)s\.google\.com/"
105
+		to="https://$1s.google.com/" />
106
+
107
+	<rule from="^http://chart\.apis\.google\.com/chart"
108
+		to="https://chart.googleapis.com/chart" />
109
+
110
+	<rule from="^http://(ssl|www)\.google-analytics\.com/"
111
+		to="https://$1.google-analytics.com/" />
112
+
113
+	<rule from="^http://(ajax|chart|fonts|www)\.googleapis\.com/"
114
+		to="https://$1.googleapis.com/" />
115
+
116
+	<rule from="^http://([^@:\./]+\.)?(commondata)?storage\.googleapis\.com/"
117
+		to="https://$1$2storage.googleapis.com/" />
118
+
119
+	<!--	There is an interesting question about whether we should
120
+		append &strip=1 to all cache URLs.  This causes them to load
121
+		without images and styles, which is more secure but can look
122
+		worse.
123
+			Without &strip=1, the images and styles from the cached
124
+		pages still load from the original, typically unencrypted, page.
125
+			With &strip=1, the cached page will be text-only and
126
+		will come exclusively from Google's HTTPS server.
127
+									-->
128
+	<rule from="^http://(?:www\.)?gstatic\.com/"
129
+		to="https://www.gstatic.com/" />
130
+
131
+	<rule from="^http://(csi|encrypted-tbn\d|g0|[\w-]+\.metric|ssl|t\d)\.gstatic\.com/"
132
+		to="https://$1.gstatic.com/" />
133
+
134
+	<rule from="^http://api\.recaptcha\.net/"
135
+		to="https://www.google.com/recaptcha/api/" />
136
+
137
+	<rule from="^http://api-secure\.recaptcha\.net/"
138
+		to="https://api-secure.recaptcha.net/" />
139
+
140
+	<rule from="^http://gdata\.youtube\.com/"
141
+		to="https://gdata.youtube.com/" />
142
+
143
+</ruleset>

+ 6
- 0
searx/https_rules/GoogleCanada.xml Näytä tiedosto

1
+<ruleset name="GoogleCanada">
2
+	<target host="google.ca" />
3
+	<target host="*.google.ca" />
4
+	<rule from="^http://([^/:@\.]+)\.google\.ca/finance" to="https://$1.google.ca/finance"/>
5
+</ruleset>
6
+

+ 65
- 0
searx/https_rules/GoogleImages.xml Näytä tiedosto

1
+<!--
2
+	For other Google coverage, see GoogleServices.xml.
3
+
4
+
5
+	Problematic domains:
6
+
7
+		- www.google.bo *
8
+		- www.google.co *
9
+		- www.google.ec *
10
+		- www.google.in *
11
+		- www.google.kr *
12
+		- www.google.com.kz **
13
+		- www.google.com.lk *
14
+		- www.google.mx **
15
+		- www.google.sg *
16
+		- www.google.sl *
17
+		- www.google.ug *
18
+		- www.google.vn *
19
+
20
+	* 404; mismatched, CN: google.com
21
+	** Works; mismatched, CN: google.com
22
+
23
+-->
24
+<ruleset name="Google Images">
25
+
26
+	<target host="google.*" />
27
+	<target host="www.google.*" />
28
+	<target host="google.co.*" />
29
+	<target host="www.google.co.*" />
30
+	<target host="google.com" />
31
+	<target host="images.google.com" />
32
+	<target host="google.com.*" />
33
+	<target host="www.google.com.*" />
34
+		<!--
35
+			Only handle image-related paths in this ruleset:
36
+										-->
37
+		<exclusion pattern="^http://(?:www\.)?google(?:\.com?)?\.\w{2,3}/(?!(?:advanced_image_search|imghp|.*tb(?:m=isch|s=sbi)))" />
38
+
39
+
40
+	<rule from="^http://(?:www\.)?google\.com/"
41
+		to="https://www.google.com/" />
42
+
43
+	<rule from="^http://images\.google\.com/"
44
+		to="https://images.google.com/" />
45
+
46
+	<!--	First handle problematic domains:
47
+							-->
48
+	<rule from="^http://(?:www\.)?google\.co/"
49
+		to="https://www.google.com/" />
50
+
51
+	<rule from="^http://(?:www\.)?google\.(?:co\.)?(in|kr|ug)/"
52
+		to="https://www.google.co.$1/" />
53
+
54
+	<rule from="^http://(?:www\.)?google\.(?:com\.)?(kz|lk)/"
55
+		to="https://www.google.$1/" />
56
+
57
+	<rule from="^http://(?:www\.)?google\.(?:com\.)?(bo|ec|mx|sg|sl|vn)/"
58
+		to="https://www.google.com.$1/" />
59
+
60
+	<!--	And then the rest:
61
+					-->
62
+	<rule from="^http://(?:www\.)?google\.(com?\.)?(ae|ar|at|au|bg|bh|br|ca|ch|cl|co|cr|cu|de|eg|es|fi|fr|gh|gt|hr|id|ie|il|it|jo|jp|jm|ke|kw|lb|ly|my|na|ng|nl|no|nz|om|pa|pe|pk|pl|pt|py|qa|ro|ru|rw|sa|se|sv|th|tr|uk|uy|ve|za|zw)/"
63
+		to="https://www.google.$1$2/" />
64
+
65
+</ruleset>

+ 78
- 0
searx/https_rules/GoogleMainSearch.xml Näytä tiedosto

1
+<ruleset name="Search www.google.com">
2
+
3
+<!-- 
4
+Enabling this ruleset should cause searches to go to
5
+https://www.google.com rather than https://encrypted.google.com.  Note that
6
+the filename is important; it must be before GoogleSearch.xml in a bash
7
+expansion of src/chrome/content/rules/*.xml in order to take precedence. 
8
+-->
9
+
10
+  <target host="*.google.com" />
11
+  <target host="google.com" />
12
+  <target host="www.google.com.*" />
13
+  <target host="google.com.*" />
14
+  <target host="www.google.co.*" />
15
+  <target host="google.co.*" />
16
+  <target host="www.google.*" />
17
+  <target host="google.*" />
18
+  <!-- beyond clients1 these do not currently exist in the ccTLDs,
19
+       but just in case... -->
20
+  <target host="clients1.google.com.*" />
21
+  <target host="clients2.google.com.*" />
22
+  <target host="clients3.google.com.*" />
23
+  <target host="clients4.google.com.*" />
24
+  <target host="clients5.google.com.*" />
25
+  <target host="clients6.google.com.*" />
26
+  <target host="clients1.google.co.*" />
27
+  <target host="clients2.google.co.*" />
28
+  <target host="clients3.google.co.*" />
29
+  <target host="clients4.google.co.*" />
30
+  <target host="clients5.google.co.*" />
31
+  <target host="clients6.google.co.*" />
32
+  <target host="clients1.google.*" />
33
+  <target host="clients2.google.*" />
34
+  <target host="clients3.google.*" />
35
+  <target host="clients4.google.*" />
36
+  <target host="clients5.google.*" />
37
+  <target host="clients6.google.*" />
38
+
39
+  <rule from="^http://www\.google\.com/$"
40
+          to="https://www.google.com/"/>
41
+
42
+  <!-- The most basic case. -->
43
+
44
+  <rule from="^http://(?:www\.)?google\.com/search"
45
+          to="https://www.google.com/search"/>
46
+
47
+  <!-- A very annoying exception that we seem to need for the basic case -->
48
+
49
+  <exclusion pattern="^http://(?:www\.)?google\.com/search.*tbs=shop" />
50
+  <exclusion pattern="^http://clients[0-9]\.google\.com/.*client=products.*" />
51
+  <exclusion pattern="^http://suggestqueries\.google\.com/.*client=.*" />
52
+
53
+  <!-- https://trac.torproject.org/projects/tor/ticket/9713 -->
54
+
55
+  <exclusion pattern="^http://clients[0-9]\.google\.com/ocsp" />
56
+
57
+  <!-- This is necessary for image results links from web search results -->
58
+
59
+  <exclusion pattern="^http://(?:www\.)?google\.com/search.*tbm=isch.*" />
60
+
61
+  <rule from="^http://(?:www\.)?google\.com/webhp"
62
+          to="https://www.google.com/webhp"/>
63
+
64
+  <rule from="^http://(?:www\.)?google\.com/#"
65
+          to="https://www.google.com/#"/>
66
+
67
+  <rule from="^http://(?:www\.)?google\.com/$"
68
+          to="https://www.google.com/"/>
69
+
70
+   <!-- Completion urls look like this:
71
+
72
+http://clients2.google.co.jp/complete/search?hl=ja&client=hp&expIds=17259,24660,24729,24745&q=m&cp=1 HTTP/1.1\r\n
73
+
74
+   -->
75
+  <rule from="^http://clients[0-9]\.google\.com/complete/search"
76
+          to="https://clients1.google.com/complete/search"/>
77
+
78
+</ruleset>

+ 67
- 0
searx/https_rules/GoogleMaps.xml Näytä tiedosto

1
+<!--
2
+	Problematic domains:
3
+
4
+		- khms *
5
+		- khms[0-3] *
6
+
7
+	* $ 404s
8
+
9
+
10
+	Fully covered domains:
11
+
12
+		- google.com subdomains:
13
+
14
+			- khms
15
+			- khms[0-3]
16
+
17
+-->
18
+<ruleset name="Google Maps">
19
+
20
+	<target host="maps.google.*" />
21
+		<!--
22
+			https://trac.torproject.org/projects/tor/ticket/8627
23
+										-->
24
+		<exclusion pattern="^http://maps\.google\.com/local_url" />
25
+		<exclusion pattern="^http://maps\.google\.gr/transitathens" />
26
+	<target host="maps.google.co.*" />
27
+	<target host="khms.google.com" />
28
+	<target host="khms0.google.com" />
29
+	<target host="khms1.google.com" />
30
+	<target host="khms2.google.com" />
31
+	<target host="khms3.google.com" />
32
+	<target host="maps-api-ssl.google.com" />
33
+	<target host="mw2.google.com" />
34
+	<target host="maps.google.com.*" />
35
+	<target host="maps.googleapis.com" />
36
+		<!--
37
+			https://mail1.eff.org/pipermail/https-everywhere-rules/2012-September/001317.html
38
+														-->
39
+		<!--exclusion pattern="^http://maps\.googleapis\.com/map(files/lib/map_1_20\.swf|sapi/publicapi\?file=flashapi)" /-->
40
+		<exclusion pattern="^http://maps\.googleapis\.com/map(?:files/lib/map_\d+_\d+\.swf|sapi/publicapi\?file=flashapi)" />
41
+	<target host="maps.gstatic.com" />
42
+
43
+
44
+	<!--securecookie host="^maps\.google\.(com?\.)?(au|ca|gh|ie|in|jm|ke|lk|my|n[agz]|pk|rw|sl|sg|ug|uk|za|zw)$" name=".+" /-->
45
+	<securecookie host="^maps\.google\.[\w.]{2,6}$" name=".+" />
46
+	<securecookie host="^maps\.g(?:oogle|oogleapis|static)\.com$" name=".+" />
47
+	<securecookie host="^maps-api-ssl\.google\.com$" name=".+" />
48
+
49
+
50
+	<rule from="^http://maps\.google\.([^/]+)/"
51
+		to="https://maps.google.$1/" />
52
+
53
+	<!--	http://khms.../$ 404s:
54
+					-->
55
+	<rule from="^http://khms\d?\.google\.com/+\??$"
56
+		to="https://www.google.com/" />
57
+
58
+	<rule from="^http://(khms\d?|maps-api-ssl|mw2)\.google\.com/"
59
+		to="https://$1.google.com/" />
60
+
61
+	<rule from="^http://maps\.g(oogleapis|static)\.com/"
62
+		to="https://maps.g$1.com/" />
63
+
64
+	<rule from="^https://maps\.googleapis\.com/map(?=files/lib/map_\d+_\d+\.swf|sapi/publicapi\?file=flashapi)"
65
+		to="http://maps.googleapis.com/map" downgrade="1" />
66
+
67
+</ruleset>

+ 6
- 0
searx/https_rules/GoogleMelange.xml Näytä tiedosto

1
+<ruleset name="GoogleMelange">
2
+  <target host="www.google-melange.com" />
3
+  <target host="google-melange.com" />
4
+
5
+  <rule from="^http://(www\.)?google-melange\.com/" to="https://www.google-melange.com/" />
6
+</ruleset>

+ 135
- 0
searx/https_rules/GoogleSearch.xml Näytä tiedosto

1
+<ruleset name="Google Search">
2
+
3
+	<target host="google.com" />
4
+	<target host="*.google.com" />
5
+	<target host="google.com.*" />
6
+	<target host="www.google.com.*" />
7
+	<target host="google.co.*" />
8
+	<target host="www.google.co.*" />
9
+	<target host="google.*" />
10
+	<target host="www.google.*" />
11
+	<!--
12
+		Beyond clients1 these do not currently
13
+		exist in the ccTLDs, but just in case...
14
+							-->
15
+	<target host="clients1.google.com.*" />
16
+	<target host="clients2.google.com.*" />
17
+	<target host="clients3.google.com.*" />
18
+	<target host="clients4.google.com.*" />
19
+	<target host="clients5.google.com.*" />
20
+	<target host="clients6.google.com.*" />
21
+	<target host="clients1.google.co.*" />
22
+	<target host="clients2.google.co.*" />
23
+	<target host="clients3.google.co.*" />
24
+	<target host="clients4.google.co.*" />
25
+	<target host="clients5.google.co.*" />
26
+	<target host="clients6.google.co.*" />
27
+	<target host="clients1.google.*" />
28
+	<target host="clients2.google.*" />
29
+	<target host="clients3.google.*" />
30
+	<target host="clients4.google.*" />
31
+	<target host="clients5.google.*" />
32
+	<target host="clients6.google.*" />
33
+
34
+
35
+	<!--	Some Google pages can generate naive links back to the
36
+		unencrypted version of encrypted.google.com, which is
37
+		a 301 but theoretically vulnerable to SSL stripping.
38
+									-->
39
+	<rule from="^http://encrypted\.google\.com/"
40
+		to="https://encrypted.google.com/" />
41
+
42
+	<!--	The most basic case.
43
+					-->
44
+	<rule from="^http://(?:www\.)?google\.com/search"
45
+		to="https://encrypted.google.com/search" />
46
+
47
+	<!--	A very annoying exception that we
48
+		seem to need for the basic case
49
+						-->
50
+	<exclusion pattern="^http://(?:www\.)?google\.com/search.*tbs=shop" />
51
+	<exclusion pattern="^http://clients\d\.google\.com/.*client=products.*" />
52
+	<exclusion pattern="^http://suggestqueries\.google\.com/.*client=.*" />
53
+
54
+  <!-- https://trac.torproject.org/projects/tor/ticket/9713 
55
+         -->
56
+
57
+  <exclusion pattern="^http://clients[0-9]\.google\.com/ocsp" />
58
+
59
+
60
+	<!--	This is necessary for image results
61
+		links from web search results
62
+						-->
63
+	<exclusion pattern="^http://(?:www\.)?google\.com/search.*tbm=isch.*" />
64
+
65
+	<rule from="^http://(?:www\.)?google\.com/about"
66
+		to="https://www.google.com/about" />
67
+
68
+	<!--	There are two distinct cases for these firefox searches	-->
69
+
70
+	<rule from="^http://(?:www\.)?google(?:\.com?)?\.[a-z]{2}/firefox/?$"
71
+		to="https://encrypted.google.com/" />
72
+
73
+	<rule from="^http://(?:www\.)?google(?:\.com?)?\.[a-z]{2}/firefox"
74
+		to="https://encrypted.google.com/webhp" />
75
+
76
+	<rule from="^http://(?:www\.)?google\.com/webhp"
77
+		to="https://encrypted.google.com/webhp" />
78
+
79
+	<rule from="^http://codesearch\.google\.com/"
80
+		to="https://codesearch.google.com/" />
81
+
82
+	<rule from="^http://(?:www\.)?google\.com/codesearch"
83
+		to="https://www.google.com/codesearch" />
84
+
85
+	<rule from="^http://(?:www\.)?google\.com/#"
86
+		to="https://encrypted.google.com/#" />
87
+
88
+	<rule from="^http://(?:www\.)?google\.com/$"
89
+		to="https://encrypted.google.com/" />
90
+
91
+	<!--	Google supports IPv6 search, including
92
+		HTTPS with a valid certificate!	-->
93
+	<rule from="^http://ipv6\.google\.com/"
94
+		to="https://ipv6.google.com/" />
95
+
96
+	<!--	most google international sites look like
97
+		"google.fr", some look like "google.co.jp",
98
+		and some crazy ones like "google.com.au"	-->
99
+
100
+	<rule from="^http://(www\.)?google(\.com?)?\.([a-z]{2})/(search\?|#)"
101
+		to="https://$1google$2.$3/$4" />
102
+
103
+	<!--	Language preference setting	-->
104
+	<rule from="^http://(www\.)?google(\.com?)?\.([a-z]{2})/setprefs"
105
+	to="https://$1google$2.$3/setprefs" />
106
+
107
+	<!--	Completion urls look like this:
108
+
109
+http://clients2.google.co.jp/complete/search?hl=ja&client=hp&expIds=17259,24660,24729,24745&q=m&cp=1 HTTP/1.1\r\n
110
+
111
+		-->
112
+	<rule from="^http://clients\d\.google\.com/complete/search"
113
+		to="https://clients1.google.com/complete/search" />
114
+
115
+	<rule from="^http://clients\d\.google(\.com?\.[a-z]{2})/complete/search"
116
+		to="https://clients1.google.$1/complete/search" />
117
+
118
+	<rule from="^http://clients\d\.google\.([a-z]{2})/complete/search"
119
+		to="https://clients1.google.$1/complete/search" />
120
+
121
+	<rule from="^http://suggestqueries\.google\.com/complete/search"
122
+		to="https://clients1.google.com/complete/search" />
123
+
124
+	<rule from="^http://(www\.)?google\.(com?\.)?([a-z]{2})/(?:webhp)?$"
125
+		to="https://$1google.$2$3/" />
126
+
127
+	<!--	If there are URL parameters, keep them.	-->
128
+	<rule from="^http://(www\.)?google\.(com?\.)?([a-z]{2})/(?:webhp)?\?"
129
+		to="https://$1google.$2$3/webhp?" />
130
+
131
+	<!-- teapot -->
132
+	<rule from="^http://(www\.)?google(\.com?)?\.([a-z]{2})/teapot"
133
+		to="https://$1google$2.$3/teapot" />
134
+
135
+</ruleset>

+ 345
- 0
searx/https_rules/GoogleServices.xml Näytä tiedosto

1
+<!--
2
+	Other Google rulesets:
3
+
4
+		- 2mdn.net.xml
5
+		- Admeld.xml
6
+		- ChannelIntelligence.com.xml
7
+		- Doubleclick.net.xml
8
+		- FeedBurner.xml
9
+		- Google.org.xml
10
+		- GoogleAPIs.xml
11
+		- Google_App_Engine.xml
12
+		- GoogleImages.xml
13
+		- GoogleShopping.xml
14
+		- Ingress.xml
15
+		- Meebo.xml
16
+		- Orkut.xml
17
+		- Postini.xml
18
+		- WebM_Project.org.xml
19
+
20
+
21
+	Nonfunctional domains:
22
+
23
+		- feedproxy.google.com			(404, valid cert)
24
+		- partnerpage.google.com *
25
+		- safebrowsing.clients.google.com	(404, mismatched)
26
+		- (www.)googlesyndicatedsearch.com	(404; mismatched, CN: google.com)
27
+		- buttons.googlesyndication.com *
28
+
29
+	* 404, valid cert
30
+
31
+
32
+	Nonfunctional google.com paths:
33
+
34
+		- analytics	(redirects to http)
35
+		- imgres
36
+		- gadgets *
37
+		- hangouts	(404)
38
+		- u/		(404)
39
+
40
+	* Redirects to http
41
+
42
+
43
+	Problematic domains:
44
+
45
+		- www.goo.gl		(404; mismatched, CN: *.google.com)
46
+
47
+		- google.com subdomains:
48
+
49
+			- books		(googlebooks/, images/, & intl/ 404, but works when rewritten to www)
50
+			- cbks0 ****
51
+			- earth *
52
+			- gg		($ 404s)
53
+			- knoll *
54
+			- scholar **
55
+			- trends *
56
+
57
+		- news.google.cctld **
58
+		- scholar.google.cctld **
59
+		- *-opensocial.googleusercontent.com ***
60
+
61
+	**** $ 404s
62
+	* 404, valid cert
63
+	** Redirects to http, valid cert
64
+	*** Breaks followers widget - https://trac.torproject.org/projects/tor/ticket/7294
65
+
66
+
67
+	Partially covered domains:
68
+
69
+		- google.cctld subdomains:
70
+
71
+			- scholar	(→ www)
72
+
73
+		- google.com subdomains:
74
+
75
+			- (www.)
76
+			- cbks0		($ 404s)
77
+			- gg		($ 404s)
78
+			- news		(→ www)
79
+			- scholar	(→ www)
80
+
81
+		- *.googleusercontent.com	(*-opensocial excluded)
82
+
83
+
84
+	Fully covered domains:
85
+
86
+		- lh[3-6].ggpht.com
87
+		- (www.)goo.gl		(www → ^)
88
+
89
+		- google.com subdomains:
90
+
91
+			- accounts
92
+			- adwords
93
+			- apis
94
+			- appengine
95
+			- books		(→ encrypted)
96
+			- calendar
97
+			- checkout
98
+			- chrome
99
+			- clients[12]
100
+			- code
101
+			- *.corp
102
+			- developers
103
+			- dl
104
+			- docs
105
+			- docs\d
106
+			- \d.docs
107
+			- drive
108
+			- earth		(→ www)
109
+			- encrypted
110
+			- encrypted-tbn[123]
111
+			- feedburner
112
+			- fiber
113
+			- finance
114
+			- glass
115
+			- groups
116
+			- health
117
+			- helpouts
118
+			- history
119
+			- hostedtalkgadget
120
+			- id
121
+			- investor
122
+			- knol
123
+			- knoll		(→ knol)
124
+			- lh\d
125
+			- mail
126
+			- chatenabled.mail
127
+			- pack
128
+			- picasaweb
129
+			- pki
130
+			- play
131
+			- plus
132
+			- plusone
133
+			- productforums
134
+			- profiles
135
+			- safebrowsing-cache
136
+			- cert-test.sandbox
137
+			- plus.sandbox
138
+			- sb-ssl
139
+			- script
140
+			- security
141
+			- services
142
+			- servicessites
143
+			- sites
144
+			- spreadsheets
145
+			- spreadsheets\d
146
+			- support
147
+			- talk
148
+			- talkgadget
149
+			- tbn2			(→ encrypted-tbn2)
150
+			- tools
151
+			- trends		(→ www)
152
+
153
+		- partner.googleadservices.com
154
+		- (www.)googlecode.com
155
+		- *.googlecode.com	(per-project subdomains)
156
+		- googlesource.com
157
+		- *.googlesource.com
158
+		- pagead2.googlesyndication.com
159
+		- tpc.googlesyndication.com
160
+		- mail-attachment.googleusercontent.com
161
+		- webcache.googleusercontent.com
162
+
163
+
164
+	XXX: Needs more testing
165
+
166
+-->
167
+<ruleset name="Google Services">
168
+
169
+	<target host="*.ggpht.com" />
170
+	<target host="gmail.com" />
171
+	<target host="www.gmail.com" />
172
+	<target host="goo.gl" />
173
+	<target host="www.goo.gl" />
174
+	<target host="google.*" />
175
+	<target host="accounts.google.*" />
176
+	<target host="adwords.google.*" />
177
+	<target host="finance.google.*" />
178
+	<target host="groups.google.*" />
179
+	<target host="it.google.*" />
180
+	<target host="news.google.*" />
181
+		<exclusion pattern="^http://(?:news\.)?google\.com/(?:archivesearch|newspapers)" />
182
+	<target host="picasaweb.google.*" />
183
+	<target host="scholar.google.*" />
184
+	<target host="www.google.*" />
185
+	<target host="*.google.ca" />
186
+	<target host="google.co.*" />
187
+	<target host="accounts.google.co.*" />
188
+	<target host="adwords.google.co.*" />
189
+	<target host="finance.google.co.*" />
190
+	<target host="groups.google.co.*" />
191
+	<target host="id.google.co.*" />
192
+	<target host="news.google.co.*" />
193
+	<target host="picasaweb.google.co.*" />
194
+	<target host="scholar.google.co.*" />
195
+	<target host="www.google.co.*" />
196
+	<target host="google.com" />
197
+	<target host="*.google.com" />
198
+		<exclusion pattern="^http://(?:www\.)?google\.com/analytics/*(?:/[^/]+)?(?:\?.*)?$" />
199
+		<!--exclusion pattern="^http://books\.google\.com/(?!books/(\w+\.js|css/|javascript/)|favicon\.ico|googlebooks/|images/|intl/)" /-->
200
+		<exclusion pattern="^http://cbks0\.google\.com/(?:$|\?)" />
201
+		<exclusion pattern="^http://gg\.google\.com/(?!csi(?:$|\?))" />
202
+	<target host="google.com.*" />
203
+	<target host="accounts.google.com.*" />
204
+	<target host="adwords.google.com.*" />
205
+	<target host="groups.google.com.*" />
206
+	<target host="id.google.com.*" />
207
+	<target host="news.google.com.*" />
208
+	<target host="picasaweb.google.com.*" />
209
+	<target host="scholar.google.com.*" />
210
+	<target host="www.google.com.*" />
211
+	<target host="partner.googleadservices.com" />
212
+	<target host="googlecode.com" />
213
+	<target host="*.googlecode.com" />
214
+	<target host="googlemail.com" />
215
+	<target host="www.googlemail.com" />
216
+	<target host="googlesource.com" />
217
+	<target host="*.googlesource.com" />
218
+	<target host="*.googlesyndication.com" />
219
+	<target host="www.googletagservices.com" />
220
+	<target host="googleusercontent.com" />
221
+	<target host="*.googleusercontent.com" />
222
+		<!--
223
+			Necessary for the Followers widget:
224
+
225
+				 https://trac.torproject.org/projects/tor/ticket/7294
226
+											-->
227
+		<exclusion pattern="http://[^@:\./]+-opensocial\.googleusercontent\.com" />
228
+
229
+
230
+	<!--	Can we secure any of these wildcard cookies safely?
231
+									-->
232
+	<!--securecookie host="^\.google\.com$" name="^(hl|I4SUserLocale|NID|PREF|S)$" /-->
233
+	<!--securecookie host="^\.google\.[\w.]{2,6}$" name="^(hl|I4SUserLocale|NID|PREF|S|S_awfe)$" /-->
234
+	<securecookie host="^(?:accounts|adwords|\.code|login\.corp|developers|docs|\d\.docs|fiber|mail|picasaweb|plus|\.?productforums|support)\.google\.[\w.]{2,6}$" name=".+" />
235
+	<securecookie host="^www\.google\.com$" name="^GoogleAccountsLocale_session$" />
236
+	<securecookie host="^mail-attachment\.googleusercontent\.com$" name=".+" />
237
+	<securecookie host="^gmail\.com$" name=".+" />
238
+	<securecookie host="^www\.gmail\.com$" name=".+" />
239
+	<securecookie host="^googlemail\.com$" name=".+" />
240
+	<securecookie host="^www\.googlemail\.com$" name=".+" />
241
+
242
+
243
+	<!--    - lh 3-6 exist
244
+		- All appear identical
245
+		- Identical to lh\d.googleusercontent.com
246
+					-->
247
+	<rule from="^http://lh(\d)\.ggpht\.com/"
248
+		to="https://lh$1.ggpht.com/" />
249
+
250
+	<rule from="^http://lh(\d)\.google\.ca/"
251
+		to="https://lh$1.google.ca/" />
252
+
253
+
254
+	<rule from="^http://(www\.)?g(oogle)?mail\.com/"
255
+		to="https://$1g$2mail.com/" />
256
+
257
+	<rule from="^http://(?:www\.)?goo\.gl/"
258
+		to="https://goo.gl/" />
259
+
260
+
261
+	<!--	Redirects to http when rewritten to www:
262
+							-->
263
+	<rule from="^http://books\.google\.com/"
264
+		to="https://encrypted.google.com/" />
265
+
266
+	<!--	tisp$ 404s:
267
+				-->
268
+	<rule from="^http://(?:www\.)?google\.((?:com?\.)?\w{2,3})/tisp(?=$|\?)"
269
+		to="https://www.google.$1/tisp/" />
270
+
271
+	<!--	Paths that work on all in google.*
272
+							-->
273
+	<rule from="^http://(?:www\.)?google\.((?:com?\.)?\w{2,3})/(accounts|adplanner|ads|adsense|adwords|analytics|bookmarks|chrome|contacts|coop|cse|css|culturalinstitute|doodles|earth|favicon\.ico|finance|get|goodtoknow|googleblogs|grants|green|hostednews|images|intl|js|landing|logos|mapmaker|newproducts|news|nexus|patents|policies|prdhp|profiles|products|reader|s2|settings|shopping|support|tisp|tools|transparencyreport|trends|urchin|webmasters)(?=$|[?/])"
274
+		 to="https://www.google.$1/$2" />
275
+
276
+	<!--	Paths that 404 on .ccltd, but work on .com:
277
+								-->
278
+	<rule from="^http://(?:www\.)?google\.(?:com?\.)?\w{2,3}/(?=calendar|dictionary|doubleclick|help|ideas|pacman|postini|powermeter|url)"
279
+		 to="https://www.google.com/" />
280
+
281
+	<rule from="^http://(?:www\.)?google\.(?:com?\.)?\w{2,3}/custom"
282
+		 to="https://www.google.com/cse" />
283
+
284
+	<!--	Paths that only exist/work on .com
285
+							-->
286
+	<rule from="^http://(?:www\.)?google\.com/(\+|appsstatus|books|buzz|extern_js|glass|googlebooks|ig|insights|moderator|phone|safebrowsing|videotargetting|webfonts)(?=$|[?/])"
287
+		to="https://www.google.com/$1" />
288
+
289
+	<!--	Subdomains that work on all in google.*
290
+							-->
291
+	<rule from="^http://(accounts|adwords|finance|groups|id|picasaweb|)\.google\.((?:com?\.)?\w{2,3})/"
292
+		to="https://$1.google.$2/" />
293
+
294
+	<!--	Subdomains that only exist/work on .com
295
+							-->
296
+	<rule from="^http://(apis|appengine|books|calendar|cbks0|chat|checkout|chrome|clients[12]|code|[\w-]+\.corp|developers|dl|docs\d?|\d\.docs|drive|encrypted|encrypted-tbn[123]|feedburner|fiber|fonts|gg|glass||health|helpouts|history|(?:hosted)?talkgadget|investor|lh\d|(?:chatenabled\.)?mail|pack|pki|play|plus(?:\.sandbox)?|plusone|productforums|profiles|safebrowsing-cache|cert-test\.sandbox|sb-ssl|script|security|services|servicessites|sites|spreadsheets\d?|support|talk|tools)\.google\.com/"
297
+		to="https://$1.google.com/" />
298
+
299
+	<exclusion pattern="^http://clients[0-9]\.google\.com/ocsp"/>
300
+
301
+	<rule from="^http://earth\.google\.com/"
302
+		to="https://www.google.com/earth/" />
303
+
304
+	<rule from="^http://scholar\.google\.((?:com?\.)?\w{2,3})/intl/"
305
+		to="https://www.google.$1/intl/" />
306
+
307
+	<rule from="^http://(?:encrypted-)?tbn2\.google\.com/"
308
+		to="https://encrypted-tbn2.google.com/" />
309
+
310
+
311
+	<rule from="^http://knoll?\.google\.com/"
312
+		to="https://knol.google.com/" />
313
+
314
+
315
+	<rule from="^http://news\.google\.(?:com?\.)?\w{2,3}/(?:$|news|newshp)"
316
+		to="https://www.google.com/news" />
317
+
318
+	<rule from="^http://trends\.google\.com/"
319
+		 to="https://www.google.com/trends" />
320
+
321
+
322
+	<rule from="^http://([^/:@\.]+\.)?googlecode\.com/"
323
+		 to="https://$1googlecode.com/" />
324
+
325
+	<rule from="^http://([^\./]\.)?googlesource\.com/"
326
+		to="https://$1googlesource.com/" />
327
+
328
+
329
+	<rule from="^http://partner\.googleadservices\.com/"
330
+		 to="https://partner.googleadservices.com/" />
331
+
332
+	<rule from="^http://(pagead2|tpc)\.googlesyndication\.com/"
333
+		 to="https://$1.googlesyndication.com/" />
334
+
335
+	<!--	!www doesn't exist.
336
+					-->
337
+	<rule from="^http://www\.googletagservices\.com/tag/js/"
338
+		to="https://www.googletagservices.com/tag/js/" />
339
+
340
+
341
+	<rule from="^http://([^@:\./]+)\.googleusercontent\.com/"
342
+		to="https://$1.googleusercontent.com/" />
343
+	
344
+
345
+</ruleset>

+ 28
- 0
searx/https_rules/GoogleShopping.xml Näytä tiedosto

1
+<!--
2
+	For other Google coverage, see GoogleServices.xml.
3
+
4
+-->
5
+<ruleset name="Google Shopping">
6
+
7
+	<target host="google.*" />
8
+	<target host="www.google.*" />
9
+	<target host="google.co.*" />
10
+	<target host="www.google.co.*" />
11
+	<target host="*.google.com" />
12
+	<target host="google.com.*" />
13
+	<target host="www.google.com.*" />
14
+
15
+
16
+	<rule from="^http://encrypted\.google\.com/(prdhp|shopping)" 
17
+		to="https://www.google.com/$1" />
18
+
19
+	<rule from="^http://shopping\.google\.com/"
20
+		to="https://shopping.google.com/" />
21
+
22
+	<rule from="^http://(?:encrypted|www)\.google\.com/(.*tbm=shop)"
23
+		to="https://www.google.com/$1" />
24
+
25
+	<rule from="^http://(?:www\.)?google\.((?:com?\.)?(?:ae|ar|at|au|bg|bh|bo|br|ca|ch|cl|cr|co|cu|de|ec|eg|es|fi|fr|gh|gt|hr|id|ie|il|in|it|jm|jo|jp|ke|kr|kw|kz|lb|lk|ly|mx|my|na|ng|nl|no|nz|om|pa|pe|pk|pl|pt|py|qa|ro|ru|rw|sa|sg|sl|se|sv|th|tr|ug|uk|uy|ve|vn|za|zw))/(?=prdhp|shopping)"
26
+		to="https://www.google.com/$1" />
27
+
28
+</ruleset>

+ 7
- 0
searx/https_rules/GoogleSorry.xml Näytä tiedosto

1
+<ruleset name="GoogleSorry">
2
+  <target host="sorry.google.com" />
3
+  <target host="www.google.com" />
4
+  <target host="google.com" />
5
+
6
+  <rule from="^http://((sorry|www)\.)?google\.com/sorry/" to="https://sorry.google.com/sorry/" />
7
+</ruleset>

+ 8
- 0
searx/https_rules/GoogleTranslate.xml Näytä tiedosto

1
+<ruleset name="Google Translate (broken)" default_off="redirect loops">
2
+  <target host="translate.googleapis.com" />
3
+  <target host="translate.google.com" />
4
+
5
+  <rule from="^http://translate\.googleapis\.com/" to="https://translate.googleapis.com/"/>
6
+  <rule from="^http://translate\.google\.com/"
7
+      to="https://translate.google.com/" />
8
+</ruleset>

+ 83
- 0
searx/https_rules/GoogleVideos.xml Näytä tiedosto

1
+<ruleset name="Google Videos">
2
+  <target host="*.google.com" />
3
+  <target host="google.com" />
4
+  <target host="www.google.com.*" />
5
+  <target host="google.com.*" />
6
+  <target host="www.google.co.*" />
7
+  <target host="google.co.*" />
8
+  <target host="www.google.*" />
9
+  <target host="google.*" />
10
+
11
+  <rule from="^http://encrypted\.google\.com/videohp" 
12
+          to="https://encrypted.google.com/videohp" />
13
+
14
+  <!-- https://videos.google.com is currently broken; work around that... -->
15
+  <rule from="^https?://videos?\.google\.com/$"
16
+          to="https://encrypted.google.com/videohp" />
17
+  <rule from="^http://(?:www\.)?google\.com/videohp"
18
+	  to="https://encrypted.google.com/videohp" />
19
+  <rule from="^http://(?:images|www|encrypted)\.google\.com/(.*tbm=isch)"
20
+          to="https://encrypted.google.com/$1" />
21
+
22
+  <rule
23
+   from="^http://(?:www\.)?google\.(?:com?\.)?(?:au|ca|gh|ie|in|jm|ke|lk|my|na|ng|nz|pk|rw|sl|sg|ug|uk|za|zw)/videohp"
24
+     to="https://encrypted.google.com/videohp" />
25
+  <rule
26
+   from="^http://(?:www\.)?google\.(?:com?\.)?(?:ar|bo|cl|co|cu|cr|ec|es|gt|mx|pa|pe|py|sv|uy|ve)/videohp$"
27
+    to="https://encrypted.google.com/videohp?hl=es" />
28
+  <rule
29
+   from="^http://(?:www\.)?google\.(?:com\.)?(?:ae|bh|eg|jo|kw|lb|ly|om|qa|sa)/videohp$"
30
+     to="https://encrypted.google.com/videohp?hl=ar" />
31
+  <rule from="^http://(?:www\.)?google\.(?:at|ch|de)/videohp$"
32
+          to="https://encrypted.google.com/videohp?hl=de" />
33
+  <rule from="^http://(?:www\.)?google\.(fr|nl|it|pl|ru|bg|pt|ro|hr|fi|no)/videohp$"
34
+          to="https://encrypted.google.com/videohp?hl=$1" />
35
+  <rule from="^http://(?:www\.)?google\.com?\.(id|th|tr)/videohp$"
36
+          to="https://encrypted.google.com/videohp?hl=$1" />
37
+  <rule from="^http://(?:www\.)?google\.com\.il/videohp$"
38
+          to="https://encrypted.google.com/videohp?hl=he" />
39
+  <rule from="^http://(?:www\.)?google\.com\.kr/videohp$"
40
+          to="https://encrypted.google.com/videohp?hl=ko" />
41
+  <rule from="^http://(?:www\.)?google\.com\.kz/videohp$"
42
+          to="https://encrypted.google.com/videohp?hl=kk" />
43
+  <rule from="^http://(?:www\.)?google\.com\.jp/videohp$"
44
+          to="https://encrypted.google.com/videohp?hl=ja" />
45
+  <rule from="^http://(?:www\.)?google\.com\.vn/videohp$"
46
+          to="https://encrypted.google.com/videohp?hl=vi" />
47
+  <rule from="^http://(?:www\.)?google\.com\.br/videohp$"
48
+          to="https://encrypted.google.com/videohp?hl=pt-BR" />
49
+  <rule from="^http://(?:www\.)?google\.se/videohp$"
50
+          to="https://encrypted.google.com/videohp?hl=sv" />
51
+
52
+<!-- If there are URL parameters, keep them. -->
53
+  <rule
54
+   from="^http://(?:www\.)?google\.(?:com?\.)?(?:ar|bo|cl|co|cu|cr|ec|es|gt|mx|pa|pe|py|sv|uy|ve)/videohp\?"
55
+    to="https://encrypted.google.com/videohp?hl=es&#38;" />
56
+  <rule
57
+   from="^http://(?:www\.)?google\.(?:com\.)?(?:ae|bh|eg|jo|kw|lb|ly|om|qa|sa)/videohp\?"
58
+     to="https://encrypted.google.com/videohp?hl=ar&#38;" />
59
+  <rule from="^http://(?:www\.)?google\.(?:at|ch|de)/videohp\?"
60
+          to="https://encrypted.google.com/videohp?hl=de&#38;" />
61
+  <rule from="^http://(?:www\.)?google\.(fr|nl|it|pl|ru|bg|pt|ro|hr|fi|no)/videohp\?"
62
+          to="https://encrypted.google.com/videohp?hl=$1&#38;" />
63
+  <rule from="^http://(?:www\.)?google\.com?\.(id|th|tr)/videohp\?"
64
+          to="https://encrypted.google.com/videohp?hl=$1&#38;" />
65
+  <rule from="^http://(?:www\.)?google\.com\.il/videohp\?"
66
+          to="https://encrypted.google.com/videohp?hl=he&#38;" />
67
+  <rule from="^http://(?:www\.)?google\.com\.kr/videohp\?"
68
+          to="https://encrypted.google.com/videohp?hl=ko&#38;" />
69
+  <rule from="^http://(?:www\.)?google\.com\.kz/videohp\?"
70
+          to="https://encrypted.google.com/videohp?hl=kk&#38;" />
71
+  <rule from="^http://(?:www\.)?google\.com\.jp/videohp\?"
72
+          to="https://encrypted.google.com/videohp?hl=ja&#38;" />
73
+  <rule from="^http://(?:www\.)?google\.com\.vn/videohp\?"
74
+          to="https://encrypted.google.com/videohp?hl=vi&#38;" />
75
+  <rule from="^http://(?:www\.)?google\.com\.br/videohp\?"
76
+          to="https://encrypted.google.com/videohp?hl=pt-BR&#38;" />
77
+  <rule from="^http://(?:www\.)?google\.se/videohp\?"
78
+          to="https://encrypted.google.com/videohp?hl=sv&#38;" />
79
+
80
+	<rule from="^http://video\.google\.com/ThumbnailServer2"
81
+		to="https://video.google.com/ThumbnailServer2" />
82
+
83
+</ruleset>

+ 17
- 0
searx/https_rules/GoogleWatchBlog.xml Näytä tiedosto

1
+<!--
2
+	gwbhrd.appspot.com
3
+
4
+-->
5
+<ruleset name="GoogleWatchBlog">
6
+
7
+	<target host="googlewatchblog.de" />
8
+	<target host="*.googlewatchblog.de" />
9
+
10
+
11
+	<securecookie host="^(?:www)?\.googlewatchblog\.de$" name=".+" />
12
+
13
+
14
+	<rule from="^http://(static\.|www\.)?googlewatchblog\.de/"
15
+		to="https://$1googlewatchblog.de/" />
16
+
17
+</ruleset>

+ 21
- 0
searx/https_rules/Google_App_Engine.xml Näytä tiedosto

1
+<!--
2
+	For other Google coverage, see GoogleServices.xml.
3
+
4
+-->
5
+<ruleset name="Google App Engine">
6
+
7
+	<target host="appspot.com" />
8
+	<target host="*.appspot.com" />
9
+		<!--
10
+			Redirects to http for some reason.
11
+								-->
12
+		<exclusion pattern="^http://photomunchers\.appspot\.com/" />
13
+
14
+
15
+	<securecookie host="^.+\.appspot\.com$" name=".+" />
16
+
17
+
18
+	<rule from="^http://([^@:\./]+\.)?appspot\.com/"
19
+		 to="https://$1appspot.com/" />
20
+
21
+</ruleset>

+ 16
- 0
searx/https_rules/Googleplex.com.xml Näytä tiedosto

1
+<!-- This rule was automatically generated based on an HSTS
2
+     preload rule in the Chromium browser.  See 
3
+     https://src.chromium.org/viewvc/chrome/trunk/src/net/base/transport_security_state.cc
4
+     for the list of preloads.  Sites are added to the Chromium HSTS
5
+     preload list on request from their administrators, so HTTPS should
6
+     work properly everywhere on this site.
7
+ 
8
+     Because Chromium and derived browsers automatically force HTTPS for
9
+     every access to this site, this rule applies only to Firefox. -->
10
+<ruleset name="Googleplex.com (default off)" platform="firefox" default_off="Certificate error">
11
+  <target host="googleplex.com" />
12
+
13
+  <securecookie host="^googleplex\.com$" name=".+" />
14
+
15
+  <rule from="^http://googleplex\.com/" to="https://googleplex.com/" />
16
+</ruleset>

+ 15
- 0
searx/https_rules/OpenStreetMap.xml Näytä tiedosto

1
+<ruleset name="OpenStreetMap">
2
+
3
+	<target host="openstreetmap.org"/>
4
+	<target host="*.openstreetmap.org"/>
5
+
6
+	<rule from="^http://(?:www\.)?openstreetmap\.org/"
7
+		to="https://www.openstreetmap.org/"/>
8
+
9
+	<rule from="^http://tile\.openstreetmap\.org/"
10
+		to="https://a.tile.openstreetmap.org/"/>
11
+
12
+	<rule from="^http://(blog|help|lists|nominatim|piwik|taginfo|[abc]\.tile|trac|wiki)\.openstreetmap\.org/"
13
+		to="https://$1.openstreetmap.org/"/>
14
+
15
+</ruleset>

+ 14
- 0
searx/https_rules/Rawgithub.com.xml Näytä tiedosto

1
+<!--
2
+	www: cert only matches ^rawgithub.com
3
+
4
+-->
5
+<ruleset name="rawgithub.com">
6
+
7
+	<target host="rawgithub.com" />
8
+	<target host="www.rawgithub.com" />
9
+
10
+
11
+	<rule from="^http://(?:www\.)?rawgithub\.com/"
12
+		to="https://rawgithub.com/" />
13
+
14
+</ruleset>

+ 101
- 0
searx/https_rules/Soundcloud.xml Näytä tiedosto

1
+<!--
2
+
3
+	CDN buckets:
4
+
5
+		- akmedia-a.akamaihd.net
6
+
7
+		- soundcloud.assistly.com
8
+
9
+			- help.soundcloud.com
10
+
11
+		- cs70.wac.edgecastcdn.net
12
+
13
+			- a1.sndcdn.com
14
+			- i1.sndcdn.com
15
+			- w1.sndcdn.com
16
+
17
+		- wpc.658D.edgecastcdn.net
18
+		- m-a.sndcdn.com.edgesuite.net 
19
+		- soundcloud.gettyimages.com
20
+
21
+		- scbackstage.wpengine.netdna-cdn.com
22
+
23
+			- ssl doesn't exist
24
+			- backstage.soundcloud.com
25
+
26
+		- soundcloud.wpengine.netdna-cdn.com
27
+
28
+			- -ssl doesn't exist
29
+			- blog.soundcloud.com
30
+
31
+		- gs1.wpc.v2cdn.netcdn.net
32
+		- gs1.wpc.v2cdn.net
33
+
34
+			- ec-media.soundcloud.com
35
+
36
+	Nonfunctional soundcloud.com subdomains:
37
+
38
+		- help		(redirects to http, mismatched, CN: *.assistly.com)
39
+		- m		(redirects to http)
40
+		- media
41
+		- status	(times out)
42
+
43
+
44
+	Problematic domains:
45
+
46
+		- m-a.sndcdn.com	(works, akamai)
47
+
48
+
49
+	Partially covered domains:
50
+
51
+		- backstage.soundcloud.com
52
+
53
+
54
+	Fully covered domains:
55
+
56
+		- sndcdn.com subdomains:
57
+
58
+			- a[12]
59
+			- api
60
+			- i[1-4]
61
+			- w[12]
62
+			- wis
63
+
64
+		- soundcloud.com subdomains:
65
+
66
+			- (www.)
67
+			- api
68
+			- blog
69
+			- connect
70
+			- developers
71
+			- ec-media
72
+			- eventlogger
73
+			- help-assets
74
+			- media
75
+			- visuals
76
+			- w
77
+
78
+-->
79
+<ruleset name="Soundcloud (partial)">
80
+
81
+	<target host="scbackstage.wpengine.netdna-cdn.com" />
82
+	<target host="soundcloud.wpengine.netdna-cdn.com" />
83
+	<target host="*.sndcdn.com" />
84
+	<target host="soundcloud.com" />
85
+	<target host="*.soundcloud.com" />
86
+		<exclusion pattern="^https?://(?:scbackstage\.wpengine\.netdna-cdn|backstage\.soundcloud)\.com/(?!wp-content/)" />
87
+
88
+
89
+	<rule from="^http://([aiw]\d|api|wis)\.sndcdn\.com/"
90
+		to="https://$1.sndcdn.com/" />
91
+
92
+	<rule from="^http://((?:api|backstage|blog|connect|developers|ec-media|eventlogger|help-assets|media|visuals|w|www)\.)?soundcloud\.com/"
93
+		to="https://$1soundcloud.com/" />
94
+
95
+	<rule from="^https?://scbackstage\.wpengine\.netdna-cdn\.com/"
96
+		to="https://backstage.soundcloud.com/" />
97
+
98
+	<rule from="^https?://soundcloud\.wpengine\.netdna-cdn\.com/"
99
+		to="https://blog.soundcloud.com/" />
100
+
101
+</ruleset>

+ 36
- 0
searx/https_rules/ThePirateBay.xml Näytä tiedosto

1
+<!--
2
+  Nonfunctional:
3
+
4
+    - image.bayimg.com
5
+    - (www.)thepiratebay.sx		(http reply)
6
+
7
+
8
+  For problematic rules, see ThePirateBay-mismatches.xml.
9
+
10
+-->
11
+<ruleset name="The Pirate Bay (partial)">
12
+
13
+  <target host="suprbay.org" />
14
+  <target host="*.suprbay.org" />
15
+  <!--	* for cross-domain cookie	-->
16
+  <target host="*.forum.suprbay.org" />
17
+  <target host="thepiratebay.org"/>
18
+  <target host="*.thepiratebay.org"/>
19
+  <target host="thepiratebay.se"/>
20
+  <target host="*.thepiratebay.se"/>
21
+
22
+  <securecookie host="^.*\.suprbay\.org$" name=".*" />
23
+  <securecookie host="^(.*\.)?thepiratebay\.se$" name=".*"/>
24
+
25
+
26
+  <!--	Cert doesn't match (www.), redirects like so.	-->
27
+  <rule from="^https?://(?:forum\.|www\.)?suprbay\.org/"
28
+    to="https://forum.suprbay.org/" />
29
+
30
+  <rule from="^http://(?:www\.)?thepiratebay\.(?:org|se)/"
31
+    to="https://thepiratebay.se/"/>
32
+
33
+  <rule from="^http://(rss|static|torrents)\.thepiratebay\.(?:org|se)/"
34
+    to="https://$1.thepiratebay.se/"/>
35
+
36
+</ruleset>

+ 18
- 0
searx/https_rules/Torproject.xml Näytä tiedosto

1
+<ruleset name="Tor Project">
2
+
3
+	<target host="torproject.org" />
4
+	<target host="*.torproject.org" />
5
+		<exclusion pattern="^http://torperf\.torproject\.org/" />
6
+
7
+
8
+	<!--	Not secured by server:
9
+					-->
10
+	<!--securecookie host="^\.blog\.torproject\.org$" name="^SESS[0-9a-f]{32}$" /-->
11
+
12
+	<securecookie host="^(?:.*\.)?torproject\.org$" name=".+" />
13
+
14
+
15
+	<rule from="^http://([^/:@\.]+\.)?torproject\.org/"
16
+		 to="https://$1torproject.org/" />
17
+
18
+</ruleset>

+ 169
- 0
searx/https_rules/Twitter.xml Näytä tiedosto

1
+<!--
2
+	Other Twitter rulesets:
3
+
4
+		- Twitter_Community.com.xml
5
+
6
+
7
+	Nonfunctional domains:
8
+
9
+		- status.twitter.com *
10
+		- status.twitter.jp *
11
+
12
+	* Tumblr
13
+
14
+
15
+	CDN buckets:
16
+
17
+		- a1095.g.akamai.net/=/1095/134446/1d/platform.twitter.com/ | platform2.twitter.com.edgesuite.net
18
+
19
+			- platform2.twitter.com
20
+
21
+		- twitter-any.s3.amazonaws.com
22
+		- twitter-blog.s3.amazonaws.com
23
+
24
+		- d2rdfnizen5apl.cloudfront.net
25
+
26
+			- s.twimg.com
27
+
28
+		- ssl2.twitter.com.edgekey.net
29
+		- twitter.github.com
30
+
31
+
32
+	Problematic domains:
33
+
34
+		- twimg.com subdomains:
35
+
36
+			- a5 *
37
+			- s		(cloudfront)
38
+
39
+		- twitter.com subdomains:
40
+
41
+			- platform[0-3]		(403, akamai)
42
+
43
+	* akamai
44
+
45
+
46
+	Fully covered domains:
47
+
48
+		- (www.)t.co		(www → ^)
49
+
50
+		- twimg.com subdomains:
51
+
52
+			- a[5-9]	(→ si0)
53
+			- a\d
54
+			- abs
55
+			- dnt
56
+			- ea
57
+			- g
58
+			- g2
59
+			- gu
60
+			- hca
61
+			- jp
62
+			- ma
63
+			- ma[0123]
64
+			- o
65
+			- p
66
+			- pbs
67
+			- r
68
+			- s		(→ d2rdfnizen5apl.cloudfront.net)
69
+			- si[0-5]
70
+			- syndication
71
+			- cdn.syndication
72
+			- tailfeather
73
+			- ton
74
+			- v
75
+			- widgets
76
+
77
+		- twitter.com subdomains:
78
+
79
+			- (www.)
80
+			- 201[012]
81
+			- about
82
+			- ads
83
+			- analytics
84
+			- api
85
+			- cdn.api
86
+			- urls.api
87
+			- blog
88
+			- business
89
+			- preview.cdn
90
+			- preview-dev.cdn
91
+			- preview-stage.cdn
92
+			- de
93
+			- dev
94
+			- en
95
+			- engineering
96
+			- es
97
+			- firefox
98
+			- fr
99
+			- it
100
+			- ja
101
+			- jp
102
+			- m
103
+			- media
104
+			- mobile
105
+			- music
106
+			- oauth
107
+			- p
108
+			- pic
109
+			- platform
110
+			- platform[0-3]		(→ platform)
111
+			- widgets.platform
112
+			- search
113
+			- static
114
+			- support
115
+			- transparency
116
+			- upload
117
+
118
+
119
+	These altnames don't exist:
120
+
121
+		- i3.twimg.com
122
+		- p-dev.twimg.com
123
+		- vmtc.twimg.com
124
+
125
+		- cdn-dev.api.twitter.com
126
+
127
+-->
128
+<ruleset name="Twitter">
129
+
130
+	<target host="t.co" />
131
+	<target host="*.t.co" />
132
+	<target host="*.twimg.com" />
133
+	<target host="twitter.com" />
134
+	<target host="*.twitter.com" />
135
+
136
+
137
+	<!--	Secured by server:
138
+					-->
139
+	<!--securecookie host="^\.twitter\.com$" name="^_twitter_sess$" /-->
140
+	<!--securecookie host="^support\.twitter\.com$" name="^_help_center_session$" /-->
141
+	<!--
142
+		Not secured by server:
143
+					-->
144
+	<!--securecookie host="^\.t\.co$" name="^muc$" /-->
145
+	<!--securecookie host="^\.twitter\.com$" name="^guest_id$" /-->
146
+
147
+	<securecookie host="^\.t\.co$" name=".+" />
148
+	<securecookie host="^(?:.*\.)?twitter\.com$" name=".+" />
149
+
150
+
151
+	<rule from="^http://(?:www\.)?t\.co/"
152
+		to="https://t.co/" />
153
+
154
+	<rule from="^http://a[5-9]\.twimg\.com/"
155
+		to="https://si0.twimg.com/" />
156
+
157
+	<rule from="^http://(abs|a\d|dnt|ea|g[2u]?|hca|jp|ma\d?|o|p|pbs|r|si\d|(?:cdn\.)?syndication|tailfeather|ton|v|widgets)\.twimg\.com/"
158
+		to="https://$1.twimg.com/" />
159
+
160
+	<rule from="^http://s\.twimg\.com/"
161
+		to="https://d2rdfnizen5apl.cloudfront.net/" />
162
+
163
+	<rule from="^http://((?:201\d|about|ads|analytics|blog|(?:cdn\.|urls\.)?api|business|preview(?:-dev|-stage)?\.cdn|de|dev|engineering|en|es|firefox|fr|it|ja|jp|m|media|mobile|music|oauth|p|pic|platform|widgets\.platform|search|static|support|transparency|upload|www)\.)?twitter\.com/"
164
+		to="https://$1twitter.com/" />
165
+
166
+	<rule from="^http://platform\d\.twitter\.com/"
167
+		to="https://platform.twitter.com/" />
168
+
169
+</ruleset>

+ 75
- 0
searx/https_rules/Vimeo.xml Näytä tiedosto

1
+<!--
2
+	CDN buckets:
3
+
4
+		- av.vimeo.com.edgesuite.net
5
+
6
+			- a808.g.akamai.net
7
+
8
+		- pdl.vimeocdn.com.edgesuite.net
9
+
10
+			- a1189.g.akamai.net
11
+
12
+
13
+	Problematic subdomains:
14
+
15
+		- av	(pdl.../crossdomain.xml restricts to port 80)
16
+		- pdl	(works, akamai)
17
+
18
+
19
+	Partially covered subdomains:
20
+
21
+		- developer	(some pages redirect to http)
22
+		- pdl		(→ akamai)
23
+
24
+
25
+	Fully covered subdomains:
26
+
27
+		- (www.)
28
+		- secure
29
+
30
+
31
+Default off per https://trac.torproject.org/projects/tor/ticket/7569 -->
32
+<ruleset name="Vimeo (default off)" default_off="breaks some video embedding">
33
+
34
+	<target host="vimeo.com" />
35
+	<target host="*.vimeo.com" />
36
+		<exclusion pattern="^http://av\.vimeo\.com/crossdomain\.xml" />
37
+		<!--exclusion pattern="^http://developer\.vimeo\.com/($|\?|(apps|guidelines|help|player)($|[?/]))" /-->
38
+		<exclusion pattern="^http://developer\.vimeo\.com/(?!apis(?:$|[?/])|favicon\.ico)" />
39
+	<target host="*.vimeocdn.com" />
40
+		<!--
41
+			Uses crossdomain.xml from s3.amazonaws.com, which sets secure="false"
42
+
43
+				https://mail1.eff.org/pipermail/https-everywhere/2012-October/001583.html
44
+			-->
45
+		<exclusion pattern="^http://a\.vimeocdn\.com/p/flash/moogaloop/" />
46
+
47
+		<!--	We cannot secure streams because crossdomain.xml
48
+			restricts to port 80 :(
49
+						-->
50
+		<exclusion pattern="^http://pdl\.vimeocdn\.com/(?!crossdomain\.xml)" />
51
+
52
+
53
+	<!--	Tracking cookies:
54
+					-->
55
+	<securecookie host="^\.(?:player\.)?vimeo\.com$" name="^__utm\w$" />
56
+
57
+
58
+	<rule from="^http://((?:developer|player|secure|www)\.)?vimeo\.com/"
59
+		to="https://$1vimeo.com/" />
60
+
61
+	<rule from="^http://av\.vimeo\.com/"
62
+		to="https://a248.e.akamai.net/f/808/9207/8m/av.vimeo.com/" />
63
+
64
+	<!--	a & b: Akamai	-->
65
+	<rule from="^http://(?:secure-)?([ab])\.vimeocdn\.com/"
66
+		to="https://secure-$1.vimeocdn.com/" />
67
+
68
+	<rule from="^http://i\.vimeocdn\.com/"
69
+		to="https://i.vimeocdn.com/" />
70
+
71
+	<rule from="^http://pdl\.vimeocdn\.com/"
72
+		to="https://a248.e.akamai.net/f/1189/4415/8d/pdl.vimeocdn.com/" />
73
+
74
+</ruleset>
75
+

+ 13
- 0
searx/https_rules/WikiLeaks.xml Näytä tiedosto

1
+<ruleset name="WikiLeaks">
2
+
3
+	<target host="wikileaks.org" />
4
+	<target host="*.wikileaks.org" />
5
+
6
+
7
+	<securecookie host="^(?:w*\.)?wikileaks\.org$" name=".+" />
8
+
9
+
10
+	<rule from="^http://((?:chat|search|shop|www)\.)?wikileaks\.org/"
11
+		to="https://$1wikileaks.org/" />
12
+
13
+</ruleset>

+ 107
- 0
searx/https_rules/Wikimedia.xml Näytä tiedosto

1
+<!--
2
+	Wikipedia and other Wikimedia Foundation wikis previously had no real HTTPS support, and
3
+	URLs had to be rewritten to https://secure.wikimedia.org/$wikitype/$language/ . This is no
4
+	longer the case, see https://blog.wikimedia.org/2011/10/03/native-https-support-enabled-for-all-wikimedia-foundation-wikis/ ,
5
+	so this file is a lot simpler these days.
6
+
7
+
8
+	Mixed content:
9
+
10
+		- Images, on:
11
+
12
+			- stats.wikimedia.org from upload.wikimedia.org *
13
+			- stats.wikimedia.org from wikimediafoundation.org *
14
+
15
+	* Secured by us
16
+
17
+-->
18
+<ruleset name="Wikimedia">
19
+
20
+	<target host="enwp.org" />
21
+	<target host="frwp.org" />
22
+
23
+	<target host="mediawiki.org" />
24
+	<target host="www.mediawiki.org" />
25
+	<target host="wikimedia.org" />
26
+	<target host="*.wikimedia.org" />
27
+		<exclusion pattern="^http://(?:apt|cs|cz|parsoid-lb\.eqiad|status|torrus|ubuntu)\.wikimedia\.org" />
28
+		<!-- https://mail1.eff.org/pipermail/https-everywhere-rules/2012-June/001189.html -->
29
+		<exclusion pattern="^http://lists\.wikimedia\.org/pipermail(?:$|/)" />
30
+	<target host="wikimediafoundation.org" />
31
+	<target host="www.wikimediafoundation.org" />
32
+
33
+	<!-- Wikimedia projects (also some wikimedia.org subdomains) -->
34
+	<target host="wikibooks.org" />
35
+	<target host="*.wikibooks.org" />
36
+	<target host="wikidata.org" />
37
+	<target host="*.wikidata.org" />
38
+	<target host="wikinews.org" />
39
+	<target host="*.wikinews.org" />
40
+	<target host="wikipedia.org" />
41
+	<target host="*.wikipedia.org" />
42
+	<target host="wikiquote.org" />
43
+	<target host="*.wikiquote.org" />
44
+	<target host="wikisource.org" />
45
+	<target host="*.wikisource.org" />
46
+	<target host="wikiversity.org" />
47
+	<target host="*.wikiversity.org" />
48
+	<target host="wikivoyage.org" />
49
+	<target host="*.wikivoyage.org" />
50
+	<target host="wiktionary.org" />
51
+	<target host="*.wiktionary.org" />
52
+
53
+	<!-- Wikimedia chapters -->
54
+	<target host="wikimedia.ca" />
55
+	<target host="www.wikimedia.ca" />
56
+
57
+	<!-- Wikimedia Tool Labs -->
58
+	<target host="tools.wmflabs.org" />
59
+	<target host="icinga.wmflabs.org" />
60
+	<target host="ganglia.wmflabs.org" />
61
+
62
+	<!--	Not secured by server:
63
+					-->
64
+	<!--securecookie host="^\.wiki(books|ipedia)\.org$" name="^GeoIP$" /-->
65
+
66
+	<securecookie host="^^\.wik(?:ibooks|idata|imedia|inews|ipedia|iquote|isource|iversity|ivoyage|tionary)\.org$" name="^GeoIP$" />
67
+	<securecookie host="^([^@:/]+\.)?wik(ibooks|idata|inews|ipedia|iquote|isource|iversity|ivoyage|tionary)\.org$" name=".*" />
68
+	<securecookie host="^(species|commons|meta|incubator|wikitech).wikimedia.org$" name=".*" />
69
+	<securecookie host="^(?:www\.)?mediawiki\.org$" name=".*" />
70
+	<securecookie host="^wikimediafoundation.org$" name=".*" />
71
+
72
+	<rule from="^http://(en|fr)wp\.org/"
73
+		to="https://$1.wikipedia.org/wiki/" />
74
+
75
+	<rule from="^http://(?:www\.)?mediawiki\.org/"
76
+		to="https://www.mediawiki.org/" />
77
+
78
+	<rule from="^https?://download\.wikipedia\.org/"
79
+		to="https://dumps.wikimedia.org/" />
80
+
81
+	<rule from="^https?://(download|dataset2|sitemap)\.wikimedia\.org/"
82
+		to="https://dumps.wikimedia.org/" />
83
+
84
+	<rule from="^https?://(labs-ns[01]|virt0)\.wikimedia\.org/"
85
+		to="https://wikitech.wikimedia.org/" />	
86
+
87
+	<rule from="^https?://noboard\.chapters\.wikimedia\.org/"
88
+		to="https://noboard-chapters.wikimedia.org/" />
89
+
90
+	<rule from="^https?://wg\.en\.wikipedia\.org/"
91
+		to="https://wg-en.wikipedia.org/" />
92
+
93
+	<rule from="^https?://arbcom\.(de|en|fi|nl)\.wikipedia\.org/"
94
+		to="https://arbcom-$1.wikipedia.org/" />
95
+
96
+	<rule from="^http://([^@:/]+\.)?wik(ibooks|idata|imedia|inews|ipedia|iquote|isource|iversity|ivoyage|tionary)\.org/"
97
+		to="https://$1wik$2.org/" />
98
+
99
+	<rule from="^http://(www\.)?wikimediafoundation\.org/"
100
+		to="https://$1wikimediafoundation.org/" />
101
+
102
+	<rule from="^http://(www\.)?wikimedia\.ca/"
103
+		to="https://wikimedia.ca/" />
104
+
105
+	<rule from="^http://([^@:/]+)\.wmflabs\.org/"
106
+		to="https://$1.wmflabs.org/" />
107
+</ruleset>

+ 2450
- 0
searx/https_rules/Yahoo.xml
File diff suppressed because it is too large
Näytä tiedosto


+ 46
- 0
searx/https_rules/YouTube.xml Näytä tiedosto

1
+<ruleset name="YouTube (partial)">
2
+
3
+	<target host="youtube.com" />
4
+	<target host="*.youtube.com" />
5
+		<exclusion pattern="^http://(?:www\.)?youtube\.com/crossdomain\.xml"/>
6
+		<exclusion pattern="^http://(?:www\.)?youtube\.com/(?:apiplayer|api_video_info)"/>
7
+        <exclusion pattern="^http://(?:[^/@:\.]+\.)?ytimg\.com/.*apiplayer[0-9]*\.swf"/>
8
+	<target host="*.ytimg.com" />
9
+	<target host="youtu.be" />
10
+	<target host="youtube-nocookie.com"/>
11
+	<target host="www.youtube-nocookie.com"/>
12
+	<target host="*.googlevideo.com"/>
13
+                <exclusion pattern="^http://([^/@:\.]+)\.googlevideo\.com/crossdomain\.xml"/>
14
+
15
+
16
+	<!--	Not secured by server:
17
+					-->
18
+	<!--securecookie host="^\.youtube\.com$" name="^(GEUP|PREF|VISITOR_INFO1_LIVE|YSC)$" /-->
19
+
20
+	<!--	observed ^. cookies:
21
+			- use_hitbox
22
+			- VISITOR_INFO1_LIVE
23
+			- recently_watched_video_id_list
24
+			- .youtube.com		-->
25
+	<securecookie host="^\.youtube\.com" name=".*"/>
26
+
27
+
28
+	<rule from="^http://(www\.)?youtube\.com/"
29
+		to="https://$1youtube.com/"/>
30
+
31
+	<rule from="^http://(br|de|es|fr|il|img|insight|jp|m|nl|uk)\.youtube\.com/"
32
+		to="https://$1.youtube.com/"/>
33
+
34
+	<rule from="^http://([^/@:\.]+)\.ytimg\.com/"
35
+		to="https://$1.ytimg.com/"/>
36
+
37
+	<rule from="^http://youtu\.be/"
38
+		to="https://youtu.be/"/>
39
+
40
+	<rule from="^http://(?:www\.)?youtube-nocookie\.com/"
41
+		to="https://www.youtube-nocookie.com/"/>
42
+
43
+	<rule from="^http://([^/@:\.]+)\.googlevideo\.com/"
44
+	        to="https://$1.googlevideo.com/"/>
45
+
46
+</ruleset>

+ 3
- 0
searx/settings_robot.yml Näytä tiedosto

4
     debug : False
4
     debug : False
5
     request_timeout : 3.0 # seconds
5
     request_timeout : 3.0 # seconds
6
     base_url: False
6
     base_url: False
7
+    themes_path : ""
8
+    default_theme : default
9
+    https_rewrite : True
7
 
10
 
8
 engines:
11
 engines:
9
   - name : general_dummy
12
   - name : general_dummy

+ 52
- 5
searx/webapp.py Näytä tiedosto

50
 from searx.query import Query
50
 from searx.query import Query
51
 from searx.autocomplete import backends as autocomplete_backends
51
 from searx.autocomplete import backends as autocomplete_backends
52
 
52
 
53
+from urlparse import urlparse
54
+import re
55
+
53
 
56
 
54
 static_path, templates_path, themes =\
57
 static_path, templates_path, themes =\
55
     get_themes(settings['themes_path']
58
     get_themes(settings['themes_path']
206
         if not search.paging and engines[result['engine']].paging:
209
         if not search.paging and engines[result['engine']].paging:
207
             search.paging = True
210
             search.paging = True
208
 
211
 
212
+        # check if HTTPS rewrite is required 
209
         if settings['server']['https_rewrite']\
213
         if settings['server']['https_rewrite']\
210
            and result['parsed_url'].scheme == 'http':
214
            and result['parsed_url'].scheme == 'http':
211
 
215
 
212
-            for http_regex, https_url in https_rules:
213
-                if http_regex.match(result['url']):
214
-                    result['url'] = http_regex.sub(https_url, result['url'])
215
-                    # TODO result['parsed_url'].scheme
216
+            skip_https_rewrite = False
217
+
218
+            # check if HTTPS rewrite is possible
219
+            for target, rules, exclusions in https_rules:
220
+
221
+                # check if target regex match with url
222
+                if target.match(result['url']):
223
+                    # process exclusions
224
+                    for exclusion in exclusions:
225
+                        # check if exclusion match with url
226
+                        if exclusion.match(result['url']):
227
+                            skip_https_rewrite = True
228
+                            break
229
+
230
+                    # skip https rewrite if required
231
+                    if skip_https_rewrite:
232
+                        break
233
+
234
+                    # process rules
235
+                    for rule in rules:
236
+                        try:
237
+                            # TODO, precompile rule
238
+                            p = re.compile(rule[0])
239
+                            
240
+                            # rewrite url if possible
241
+                            new_result_url = p.sub(rule[1], result['url'])
242
+                        except:
243
+                            break
244
+
245
+                        # parse new url
246
+                        new_parsed_url = urlparse(new_result_url)
247
+
248
+                        # continiue if nothing was rewritten
249
+                        if result['url'] == new_result_url:
250
+                            continue
251
+
252
+                        # get domainname from result
253
+                        # TODO, does only work correct with TLD's like asdf.com, not for asdf.com.de
254
+                        # TODO, using publicsuffix instead of this rewrite rule
255
+                        old_result_domainname = '.'.join(result['parsed_url'].hostname.split('.')[-2:])
256
+                        new_result_domainname = '.'.join(new_parsed_url.hostname.split('.')[-2:])
257
+
258
+                        # check if rewritten hostname is the same, to protect against wrong or malicious rewrite rules
259
+                        if old_result_domainname == new_result_domainname:
260
+                            # set new url
261
+                            result['url'] = new_result_url
262
+
263
+                    # target has matched, do not search over the other rules 
216
                     break
264
                     break
217
 
265
 
218
-        # HTTPS rewrite
219
         if search.request_data.get('format', 'html') == 'html':
266
         if search.request_data.get('format', 'html') == 'html':
220
             if 'content' in result:
267
             if 'content' in result:
221
                 result['content'] = highlight_content(result['content'],
268
                 result['content'] = highlight_content(result['content'],