Pārlūkot izejas kodu

[fix] prevent google engine to redirect

nid/pref cookies are also removed
Adam Tauber 9 gadus atpakaļ
vecāks
revīzija
5cea4f9445
2 mainītis faili ar 2 papildinājumiem un 34 dzēšanām
  1. 1
    29
      searx/engines/google.py
  2. 1
    5
      searx/tests/engines/test_google.py

+ 1
- 29
searx/engines/google.py Parādīt failu

13
 from urllib import urlencode
13
 from urllib import urlencode
14
 from urlparse import urlparse, parse_qsl
14
 from urlparse import urlparse, parse_qsl
15
 from lxml import html, etree
15
 from lxml import html, etree
16
-from searx.poolrequests import get
17
 from searx.engines.xpath import extract_text, extract_url
16
 from searx.engines.xpath import extract_text, extract_url
18
 from searx.search import logger
17
 from searx.search import logger
19
 
18
 
91
 search_path = '/search'
90
 search_path = '/search'
92
 search_url = ('https://{hostname}' +
91
 search_url = ('https://{hostname}' +
93
               search_path +
92
               search_path +
94
-              '?{query}&start={offset}&gbv=1')
93
+              '?{query}&start={offset}&gbv=1&gws_rd=cr')
95
 
94
 
96
 # other URLs
95
 # other URLs
97
 map_hostname_start = 'maps.google.'
96
 map_hostname_start = 'maps.google.'
129
 property_address = "Address"
128
 property_address = "Address"
130
 property_phone = "Phone number"
129
 property_phone = "Phone number"
131
 
130
 
132
-# cookies
133
-pref_cookie = ''
134
-nid_cookie = {}
135
-
136
-
137
-# see https://support.google.com/websearch/answer/873?hl=en
138
-def get_google_pref_cookie():
139
-    global pref_cookie
140
-    if pref_cookie == '':
141
-        resp = get('https://www.google.com/ncr', allow_redirects=False)
142
-        pref_cookie = resp.cookies["PREF"]
143
-    return pref_cookie
144
-
145
-
146
-def get_google_nid_cookie(google_hostname):
147
-    global nid_cookie
148
-    if google_hostname not in nid_cookie:
149
-        resp = get('https://' + google_hostname)
150
-        nid_cookie[google_hostname] = resp.cookies.get("NID", None)
151
-    return nid_cookie[google_hostname]
152
-
153
 
131
 
154
 # remove google-specific tracking-url
132
 # remove google-specific tracking-url
155
 def parse_url(url_string, google_hostname):
133
 def parse_url(url_string, google_hostname):
201
 
179
 
202
     params['headers']['Accept-Language'] = language
180
     params['headers']['Accept-Language'] = language
203
     params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
181
     params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
204
-    if google_hostname == default_hostname:
205
-        try:
206
-            params['cookies']['PREF'] = get_google_pref_cookie()
207
-        except:
208
-            logger.warning('cannot fetch PREF cookie')
209
-    params['cookies']['NID'] = get_google_nid_cookie(google_hostname)
210
 
182
 
211
     params['google_hostname'] = google_hostname
183
     params['google_hostname'] = google_hostname
212
 
184
 

+ 1
- 5
searx/tests/engines/test_google.py Parādīt failu

9
 class TestGoogleEngine(SearxTestCase):
9
 class TestGoogleEngine(SearxTestCase):
10
 
10
 
11
     def mock_response(self, text):
11
     def mock_response(self, text):
12
-        response = mock.Mock(text=text, url='https://www.google.com/search?q=test&start=0&gbv=1')
12
+        response = mock.Mock(text=text, url='https://www.google.com/search?q=test&start=0&gbv=1&gws_rd=cr')
13
         response.search_params = mock.Mock()
13
         response.search_params = mock.Mock()
14
         response.search_params.get = mock.Mock(return_value='www.google.com')
14
         response.search_params.get = mock.Mock(return_value='www.google.com')
15
         return response
15
         return response
23
         self.assertIn('url', params)
23
         self.assertIn('url', params)
24
         self.assertIn(query, params['url'])
24
         self.assertIn(query, params['url'])
25
         self.assertIn('google.fr', params['url'])
25
         self.assertIn('google.fr', params['url'])
26
-        self.assertNotIn('PREF', params['cookies'])
27
-        self.assertIn('NID', params['cookies'])
28
         self.assertIn('fr', params['headers']['Accept-Language'])
26
         self.assertIn('fr', params['headers']['Accept-Language'])
29
 
27
 
30
         dicto['language'] = 'all'
28
         dicto['language'] = 'all'
31
         params = google.request(query, dicto)
29
         params = google.request(query, dicto)
32
         self.assertIn('google.com', params['url'])
30
         self.assertIn('google.com', params['url'])
33
         self.assertIn('en', params['headers']['Accept-Language'])
31
         self.assertIn('en', params['headers']['Accept-Language'])
34
-        # self.assertIn('PREF', params['cookies'])
35
-        self.assertIn('NID', params['cookies'])
36
 
32
 
37
     def test_response(self):
33
     def test_response(self):
38
         self.assertRaises(AttributeError, google.response, None)
34
         self.assertRaises(AttributeError, google.response, None)