Browse Source

[enh] https rewrite basics

Adam Tauber 10 years ago
parent
commit
96c8b20a04
3 changed files with 34 additions and 3 deletions
  1. 14
    0
      searx/https_rewrite.py
  2. 2
    1
      searx/settings.yml
  3. 18
    2
      searx/webapp.py

+ 14
- 0
searx/https_rewrite.py View File

1
+import re
2
+
3
+# https://gitweb.torproject.org/\
4
+# pde/https-everywhere.git/tree/4.0:/src/chrome/content/rules
5
+
6
+# HTTPS rewrite rules
7
+https_rules = (
8
+    # from
9
+    (re.compile(r'^http://(www\.|m\.|)?xkcd\.(?:com|org)/', re.I | re.U),
10
+     # to
11
+     r'https://\1xkcd.com/'),
12
+    (re.compile(r'^https?://(?:ssl)?imgs\.xkcd\.com/', re.I | re.U),
13
+     r'https://sslimgs.xkcd.com/'),
14
+)

+ 2
- 1
searx/settings.yml View File

1
 server:
1
 server:
2
     port : 8888
2
     port : 8888
3
     secret_key : "ultrasecretkey" # change this!
3
     secret_key : "ultrasecretkey" # change this!
4
-    debug : True
4
+    debug : False
5
     request_timeout : 2.0 # seconds
5
     request_timeout : 2.0 # seconds
6
     base_url : False
6
     base_url : False
7
     themes_path : ""
7
     themes_path : ""
8
     default_theme : default
8
     default_theme : default
9
+    https_rewrite : True
9
 
10
 
10
 engines:
11
 engines:
11
   - name : wikipedia
12
   - name : wikipedia

+ 18
- 2
searx/webapp.py View File

41
 from searx.utils import (
41
 from searx.utils import (
42
     UnicodeWriter, highlight_content, html_to_text, get_themes
42
     UnicodeWriter, highlight_content, html_to_text, get_themes
43
 )
43
 )
44
+from searx.https_rewrite import https_rules
44
 from searx.languages import language_codes
45
 from searx.languages import language_codes
45
 from searx.search import Search
46
 from searx.search import Search
46
 from searx.autocomplete import backends as autocomplete_backends
47
 from searx.autocomplete import backends as autocomplete_backends
47
 
48
 
48
 
49
 
49
-static_path, templates_path, themes = get_themes(settings['themes_path'] if \
50
-    settings.get('themes_path', None) else searx_dir)
50
+static_path, templates_path, themes =\
51
+    get_themes(settings['themes_path']
52
+               if settings.get('themes_path')
53
+               else searx_dir)
51
 default_theme = settings['default_theme'] if \
54
 default_theme = settings['default_theme'] if \
52
     settings.get('default_theme', None) else 'default'
55
     settings.get('default_theme', None) else 'default'
53
 
56
 
192
                                                    search.lang)
195
                                                    search.lang)
193
 
196
 
194
     for result in search.results:
197
     for result in search.results:
198
+
195
         if not search.paging and engines[result['engine']].paging:
199
         if not search.paging and engines[result['engine']].paging:
196
             search.paging = True
200
             search.paging = True
201
+
202
+        if settings['server']['https_rewrite']\
203
+           and result['parsed_url'].scheme == 'http':
204
+
205
+            for http_regex, https_url in https_rules:
206
+                if http_regex.match(result['url']):
207
+                    result['url'] = http_regex.sub(https_url, result['url'])
208
+                    # TODO result['parsed_url'].scheme
209
+                    break
210
+
211
+        # HTTPS rewrite
197
         if search.request_data.get('format', 'html') == 'html':
212
         if search.request_data.get('format', 'html') == 'html':
198
             if 'content' in result:
213
             if 'content' in result:
199
                 result['content'] = highlight_content(result['content'],
214
                 result['content'] = highlight_content(result['content'],
206
             # removing html content and whitespace duplications
221
             # removing html content and whitespace duplications
207
             result['title'] = ' '.join(html_to_text(result['title'])
222
             result['title'] = ' '.join(html_to_text(result['title'])
208
                                        .strip().split())
223
                                        .strip().split())
224
+
209
         if len(result['url']) > 74:
225
         if len(result['url']) > 74:
210
             url_parts = result['url'][:35], result['url'][-35:]
226
             url_parts = result['url'][:35], result['url'][-35:]
211
             result['pretty_url'] = u'{0}[...]{1}'.format(*url_parts)
227
             result['pretty_url'] = u'{0}[...]{1}'.format(*url_parts)