浏览代码

[fix] google engine: depending on the IP of the searx instance, each searx request where making two HTTP requests (see https://support.google.com/websearch/answer/873?hl=en )

dalf 10 年前
父节点
当前提交
0a83be0ec9
共有 1 个文件被更改,包括 13 次插入0 次删除
  1. 13
    0
      searx/engines/google.py

+ 13
- 0
searx/engines/google.py 查看文件

11
 from urllib import urlencode
11
 from urllib import urlencode
12
 from urlparse import urlparse, parse_qsl
12
 from urlparse import urlparse, parse_qsl
13
 from lxml import html
13
 from lxml import html
14
+from searx.poolrequests import get
14
 from searx.engines.xpath import extract_text, extract_url
15
 from searx.engines.xpath import extract_text, extract_url
15
 
16
 
16
 # engine dependent config
17
 # engine dependent config
39
 image_url_xpath = './@href'
40
 image_url_xpath = './@href'
40
 image_img_src_xpath = './img/@src'
41
 image_img_src_xpath = './img/@src'
41
 
42
 
43
+pref_cookie = ''
44
+
45
+
46
+# see https://support.google.com/websearch/answer/873?hl=en
47
+def get_google_pref_cookie():
48
+    global pref_cookie
49
+    if pref_cookie == '':
50
+        resp = get('https://www.google.com/ncr', allow_redirects=False)
51
+        pref_cookie = resp.cookies["PREF"]
52
+    return pref_cookie
53
+
42
 
54
 
43
 # remove google-specific tracking-url
55
 # remove google-specific tracking-url
44
 def parse_url(url_string):
56
 def parse_url(url_string):
64
                                       query=urlencode({'q': query}))
76
                                       query=urlencode({'q': query}))
65
 
77
 
66
     params['headers']['Accept-Language'] = language
78
     params['headers']['Accept-Language'] = language
79
+    params['cookies']['PREF'] = get_google_pref_cookie()
67
 
80
 
68
     return params
81
     return params
69
 
82