Procházet zdrojové kódy

[fix] use html result page in google images (previous endpoint stopped working)

Adam Tauber před 6 roky
rodič
revize
57e7e9da98
1 změnil soubory, kde provedl 16 přidání a 25 odebrání
  1. 16
    25
      searx/engines/google_images.py

+ 16
- 25
searx/engines/google_images.py Zobrazit soubor

@@ -13,7 +13,7 @@
13 13
 from datetime import date, timedelta
14 14
 from json import loads
15 15
 from lxml import html
16
-from searx.url_utils import urlencode
16
+from searx.url_utils import urlencode, urlparse, parse_qs
17 17
 
18 18
 
19 19
 # engine dependent config
@@ -25,10 +25,9 @@ number_of_results = 100
25 25
 
26 26
 search_url = 'https://www.google.com/search'\
27 27
     '?{query}'\
28
-    '&asearch=ichunk'\
29
-    '&async=_id:rg_s,_pms:s'\
30 28
     '&tbm=isch'\
31
-    '&yv=2'\
29
+    '&gbv=1'\
30
+    '&sa=G'\
32 31
     '&{search_options}'
33 32
 time_range_attr = "qdr:{range}"
34 33
 time_range_custom_attr = "cdr:1,cd_min:{start},cd_max{end}"
@@ -66,30 +65,22 @@ def request(query, params):
66 65
 def response(resp):
67 66
     results = []
68 67
 
69
-    g_result = loads(resp.text)
70
-
71
-    dom = html.fromstring(g_result[1][1])
68
+    dom = html.fromstring(resp.text)
72 69
 
73 70
     # parse results
74
-    for result in dom.xpath('//div[@data-ved]'):
75
-
76
-        try:
77
-            metadata = loads(''.join(result.xpath('./div[contains(@class, "rg_meta")]/text()')))
78
-        except:
79
-            continue
80
-
81
-        thumbnail_src = metadata['tu']
82
-
83
-        # http to https
84
-        thumbnail_src = thumbnail_src.replace("http://", "https://")
85
-
71
+    for img in dom.xpath('//a'):
72
+        r = {
73
+            'title': u' '.join(img.xpath('.//div[class="rg_ilmbg"]//text()')),
74
+            'content': '',
75
+            'template': 'images.html',
76
+        }
77
+        url = urlparse(img.xpath('.//@href')[0])
78
+        query = parse_qs(url.query)
79
+        r['url'] = query['imgrefurl'][0]
80
+        r['img_src'] = query['imgurl'][0]
81
+        r['thumbnail_src'] = r['img_src']
86 82
         # append result
87
-        results.append({'url': metadata['ru'],
88
-                        'title': metadata['pt'],
89
-                        'content': metadata['s'],
90
-                        'thumbnail_src': thumbnail_src,
91
-                        'img_src': metadata['ou'],
92
-                        'template': 'images.html'})
83
+        results.append(r)
93 84
 
94 85
     # return results
95 86
     return results