浏览代码

[fix] use html result page in google images (previous endpoint stopped working)

Adam Tauber 7 年前
父节点
当前提交
57e7e9da98
共有 1 个文件被更改,包括 16 次插入25 次删除
  1. 16
    25
      searx/engines/google_images.py

+ 16
- 25
searx/engines/google_images.py 查看文件

13
 from datetime import date, timedelta
13
 from datetime import date, timedelta
14
 from json import loads
14
 from json import loads
15
 from lxml import html
15
 from lxml import html
16
-from searx.url_utils import urlencode
16
+from searx.url_utils import urlencode, urlparse, parse_qs
17
 
17
 
18
 
18
 
19
 # engine dependent config
19
 # engine dependent config
25
 
25
 
26
 search_url = 'https://www.google.com/search'\
26
 search_url = 'https://www.google.com/search'\
27
     '?{query}'\
27
     '?{query}'\
28
-    '&asearch=ichunk'\
29
-    '&async=_id:rg_s,_pms:s'\
30
     '&tbm=isch'\
28
     '&tbm=isch'\
31
-    '&yv=2'\
29
+    '&gbv=1'\
30
+    '&sa=G'\
32
     '&{search_options}'
31
     '&{search_options}'
33
 time_range_attr = "qdr:{range}"
32
 time_range_attr = "qdr:{range}"
34
 time_range_custom_attr = "cdr:1,cd_min:{start},cd_max{end}"
33
 time_range_custom_attr = "cdr:1,cd_min:{start},cd_max{end}"
66
 def response(resp):
65
 def response(resp):
67
     results = []
66
     results = []
68
 
67
 
69
-    g_result = loads(resp.text)
70
-
71
-    dom = html.fromstring(g_result[1][1])
68
+    dom = html.fromstring(resp.text)
72
 
69
 
73
     # parse results
70
     # parse results
74
-    for result in dom.xpath('//div[@data-ved]'):
75
-
76
-        try:
77
-            metadata = loads(''.join(result.xpath('./div[contains(@class, "rg_meta")]/text()')))
78
-        except:
79
-            continue
80
-
81
-        thumbnail_src = metadata['tu']
82
-
83
-        # http to https
84
-        thumbnail_src = thumbnail_src.replace("http://", "https://")
85
-
71
+    for img in dom.xpath('//a'):
72
+        r = {
73
+            'title': u' '.join(img.xpath('.//div[class="rg_ilmbg"]//text()')),
74
+            'content': '',
75
+            'template': 'images.html',
76
+        }
77
+        url = urlparse(img.xpath('.//@href')[0])
78
+        query = parse_qs(url.query)
79
+        r['url'] = query['imgrefurl'][0]
80
+        r['img_src'] = query['imgurl'][0]
81
+        r['thumbnail_src'] = r['img_src']
86
         # append result
82
         # append result
87
-        results.append({'url': metadata['ru'],
88
-                        'title': metadata['pt'],
89
-                        'content': metadata['s'],
90
-                        'thumbnail_src': thumbnail_src,
91
-                        'img_src': metadata['ou'],
92
-                        'template': 'images.html'})
83
+        results.append(r)
93
 
84
 
94
     # return results
85
     # return results
95
     return results
86
     return results