Browse Source

[enh] reduce the number of http outgoing connections.

Alexandre Flament 10 years ago
parent
commit
78edc16e66

+ 2
- 2
searx/autocomplete.py View File

111
 
111
 
112
 
112
 
113
 def dbpedia(query):
113
 def dbpedia(query):
114
-    # dbpedia autocompleter
114
+    # dbpedia autocompleter, no HTTPS
115
     autocomplete_url = 'http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?'  # noqa
115
     autocomplete_url = 'http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?'  # noqa
116
 
116
 
117
     response = get(autocomplete_url
117
     response = get(autocomplete_url
139
 
139
 
140
 def google(query):
140
 def google(query):
141
     # google autocompleter
141
     # google autocompleter
142
-    autocomplete_url = 'http://suggestqueries.google.com/complete/search?client=toolbar&'  # noqa
142
+    autocomplete_url = 'https://suggestqueries.google.com/complete/search?client=toolbar&'  # noqa
143
 
143
 
144
     response = get(autocomplete_url
144
     response = get(autocomplete_url
145
                    + urlencode(dict(q=query)))
145
                    + urlencode(dict(q=query)))

+ 3
- 0
searx/engines/dailymotion.py View File

60
         publishedDate = datetime.fromtimestamp(res['created_time'], None)
60
         publishedDate = datetime.fromtimestamp(res['created_time'], None)
61
         embedded = embedded_url.format(videoid=res['id'])
61
         embedded = embedded_url.format(videoid=res['id'])
62
 
62
 
63
+        # http to https
64
+        thumbnail = thumbnail.replace("http://", "https://")
65
+
63
         results.append({'template': 'videos.html',
66
         results.append({'template': 'videos.html',
64
                         'url': url,
67
                         'url': url,
65
                         'title': title,
68
                         'title': title,

+ 7
- 1
searx/engines/deviantart.py View File

22
 
22
 
23
 # search-url
23
 # search-url
24
 base_url = 'https://www.deviantart.com/'
24
 base_url = 'https://www.deviantart.com/'
25
-search_url = base_url+'search?offset={offset}&{query}'
25
+search_url = base_url+'browse/all/?offset={offset}&{query}'
26
 
26
 
27
 
27
 
28
 # do search-request
28
 # do search-request
56
         thumbnail_src = link.xpath('.//img')[0].attrib.get('src')
56
         thumbnail_src = link.xpath('.//img')[0].attrib.get('src')
57
         img_src = regex.sub('/', thumbnail_src)
57
         img_src = regex.sub('/', thumbnail_src)
58
 
58
 
59
+        # http to https, remove domain sharding
60
+        thumbnail_src = re.sub(r"https?://(th|fc)\d+.", "https://th01.", thumbnail_src)
61
+        thumbnail_src = re.sub(r"http://", "https://", thumbnail_src)
62
+
63
+        url = re.sub(r"http://(.*)\.deviantart\.com/", "https://\\1.deviantart.com/", url)
64
+
59
         # append result
65
         # append result
60
         results.append({'url': url,
66
         results.append({'url': url,
61
                         'title': title,
67
                         'title': title,

+ 3
- 0
searx/engines/digg.py View File

58
         pubdate = result.xpath(pubdate_xpath)[0].attrib.get('datetime')
58
         pubdate = result.xpath(pubdate_xpath)[0].attrib.get('datetime')
59
         publishedDate = parser.parse(pubdate)
59
         publishedDate = parser.parse(pubdate)
60
 
60
 
61
+        # http to https
62
+        thumbnail = thumbnail.replace("http://static.digg.com", "https://static.digg.com")
63
+
61
         # append result
64
         # append result
62
         results.append({'url': url,
65
         results.append({'url': url,
63
                         'title': title,
66
                         'title': title,

+ 1
- 1
searx/engines/gigablast.py View File

17
 paging = True
17
 paging = True
18
 number_of_results = 5
18
 number_of_results = 5
19
 
19
 
20
-# search-url
20
+# search-url, invalid HTTPS certificate
21
 base_url = 'http://gigablast.com/'
21
 base_url = 'http://gigablast.com/'
22
 search_string = 'search?{query}&n={number_of_results}&s={offset}&xml=1&qh=0'
22
 search_string = 'search?{query}&n={number_of_results}&s={offset}&xml=1&qh=0'
23
 
23
 

+ 3
- 0
searx/engines/google_images.py View File

56
             continue
56
             continue
57
         thumbnail_src = result['tbUrl']
57
         thumbnail_src = result['tbUrl']
58
 
58
 
59
+        # http to https
60
+        thumbnail_src = thumbnail_src.replace("http://", "https://")
61
+
59
         # append result
62
         # append result
60
         results.append({'url': href,
63
         results.append({'url': href,
61
                         'title': title,
64
                         'title': title,

+ 2
- 2
searx/engines/www1x.py View File

19
 categories = ['images']
19
 categories = ['images']
20
 paging = False
20
 paging = False
21
 
21
 
22
-# search-url
23
-base_url = 'http://1x.com'
22
+# search-url, no HTTPS
23
+base_url = 'https://1x.com'
24
 search_url = base_url+'/backend/search.php?{query}'
24
 search_url = base_url+'/backend/search.php?{query}'
25
 
25
 
26
 
26
 

+ 1
- 1
searx/tests/engines/test_deviantart.py View File

75
         self.assertEqual(results[0]['title'], 'Title of image')
75
         self.assertEqual(results[0]['title'], 'Title of image')
76
         self.assertEqual(results[0]['url'], 'http://url.of.result/2nd.part.of.url')
76
         self.assertEqual(results[0]['url'], 'http://url.of.result/2nd.part.of.url')
77
         self.assertNotIn('content', results[0])
77
         self.assertNotIn('content', results[0])
78
-        self.assertEqual(results[0]['thumbnail_src'], 'http://url.of.thumbnail')
78
+        self.assertEqual(results[0]['thumbnail_src'], 'https://url.of.thumbnail')
79
 
79
 
80
         html = """
80
         html = """
81
         <span class="tt-fh-tc" style="width: 202px;">
81
         <span class="tt-fh-tc" style="width: 202px;">

+ 1
- 1
searx/tests/engines/test_google_images.py View File

65
         self.assertEqual(len(results), 1)
65
         self.assertEqual(len(results), 1)
66
         self.assertEqual(results[0]['title'], 'This is the title')
66
         self.assertEqual(results[0]['title'], 'This is the title')
67
         self.assertEqual(results[0]['url'], 'http://this.is.the.url')
67
         self.assertEqual(results[0]['url'], 'http://this.is.the.url')
68
-        self.assertEqual(results[0]['thumbnail_src'], 'http://thumbnail.url')
68
+        self.assertEqual(results[0]['thumbnail_src'], 'https://thumbnail.url')
69
         self.assertEqual(results[0]['img_src'], 'http://image.url.jpg')
69
         self.assertEqual(results[0]['img_src'], 'http://image.url.jpg')
70
         self.assertEqual(results[0]['content'], '<b>test</b>')
70
         self.assertEqual(results[0]['content'], '<b>test</b>')
71
 
71
 

+ 2
- 2
searx/tests/engines/test_www1x.py View File

51
         results = www1x.response(response)
51
         results = www1x.response(response)
52
         self.assertEqual(type(results), list)
52
         self.assertEqual(type(results), list)
53
         self.assertEqual(len(results), 1)
53
         self.assertEqual(len(results), 1)
54
-        self.assertEqual(results[0]['url'], 'http://1x.com/photo/123456')
55
-        self.assertEqual(results[0]['thumbnail_src'], 'http://1x.com/images/user/testimage-123456.jpg')
54
+        self.assertEqual(results[0]['url'], 'https://1x.com/photo/123456')
55
+        self.assertEqual(results[0]['thumbnail_src'], 'https://1x.com/images/user/testimage-123456.jpg')
56
         self.assertEqual(results[0]['content'], '')
56
         self.assertEqual(results[0]['content'], '')
57
         self.assertEqual(results[0]['template'], 'images.html')
57
         self.assertEqual(results[0]['template'], 'images.html')