Parcourir la source

Merge branch 'master' of github.com:nicholasks/searx

Nicholas Kegler il y a 6 ans
Parent
révision
f34733c3fe

+ 1
- 1
searx/engines/base.py Voir le fichier

@@ -55,7 +55,7 @@ shorcut_dict = {
55 55
 def request(query, params):
56 56
     # replace shortcuts with API advanced search keywords
57 57
     for key in shorcut_dict.keys():
58
-        query = re.sub(str(key), str(shorcut_dict[key]), query)
58
+        query = re.sub(key, shorcut_dict[key], str(query))
59 59
 
60 60
     # basic search
61 61
     offset = (params['pageno'] - 1) * number_of_results

+ 16
- 25
searx/engines/google_images.py Voir le fichier

@@ -13,7 +13,7 @@
13 13
 from datetime import date, timedelta
14 14
 from json import loads
15 15
 from lxml import html
16
-from searx.url_utils import urlencode
16
+from searx.url_utils import urlencode, urlparse, parse_qs
17 17
 
18 18
 
19 19
 # engine dependent config
@@ -25,10 +25,9 @@ number_of_results = 100
25 25
 
26 26
 search_url = 'https://www.google.com/search'\
27 27
     '?{query}'\
28
-    '&asearch=ichunk'\
29
-    '&async=_id:rg_s,_pms:s'\
30 28
     '&tbm=isch'\
31
-    '&yv=2'\
29
+    '&gbv=1'\
30
+    '&sa=G'\
32 31
     '&{search_options}'
33 32
 time_range_attr = "qdr:{range}"
34 33
 time_range_custom_attr = "cdr:1,cd_min:{start},cd_max{end}"
@@ -66,30 +65,22 @@ def request(query, params):
66 65
 def response(resp):
67 66
     results = []
68 67
 
69
-    g_result = loads(resp.text)
70
-
71
-    dom = html.fromstring(g_result[1][1])
68
+    dom = html.fromstring(resp.text)
72 69
 
73 70
     # parse results
74
-    for result in dom.xpath('//div[@data-ved]'):
75
-
76
-        try:
77
-            metadata = loads(''.join(result.xpath('./div[contains(@class, "rg_meta")]/text()')))
78
-        except:
79
-            continue
80
-
81
-        thumbnail_src = metadata['tu']
82
-
83
-        # http to https
84
-        thumbnail_src = thumbnail_src.replace("http://", "https://")
85
-
71
+    for img in dom.xpath('//a'):
72
+        r = {
73
+            'title': u' '.join(img.xpath('.//div[class="rg_ilmbg"]//text()')),
74
+            'content': '',
75
+            'template': 'images.html',
76
+        }
77
+        url = urlparse(img.xpath('.//@href')[0])
78
+        query = parse_qs(url.query)
79
+        r['url'] = query['imgrefurl'][0]
80
+        r['img_src'] = query['imgurl'][0]
81
+        r['thumbnail_src'] = r['img_src']
86 82
         # append result
87
-        results.append({'url': metadata['ru'],
88
-                        'title': metadata['pt'],
89
-                        'content': metadata['s'],
90
-                        'thumbnail_src': thumbnail_src,
91
-                        'img_src': metadata['ou'],
92
-                        'template': 'images.html'})
83
+        results.append(r)
93 84
 
94 85
     # return results
95 86
     return results

+ 1
- 0
searx/utils.py Voir le fichier

@@ -33,6 +33,7 @@ if sys.version_info[0] == 3:
33 33
     unichr = chr
34 34
     unicode = str
35 35
     IS_PY2 = False
36
+    basestring = str
36 37
 else:
37 38
     IS_PY2 = True
38 39
 

+ 0
- 15
tests/unit/engines/test_google_images.py Voir le fichier

@@ -25,18 +25,3 @@ class TestGoogleImagesEngine(SearxTestCase):
25 25
         self.assertRaises(AttributeError, google_images.response, [])
26 26
         self.assertRaises(AttributeError, google_images.response, '')
27 27
         self.assertRaises(AttributeError, google_images.response, '[]')
28
-
29
-        html = r"""
30
-["rg_s",["dom","\u003Cstyle\u003E.rg_kn,.rg_s{}.rg_bx{display:-moz-inline-box;display:inline-block;margin-top:0;margin-right:12px;margin-bottom:12px;margin-left:0;overflow:hidden;position:relative;vertical-align:top;z-index:1}.rg_meta{display:none}.rg_l{display:inline-block;height:100%;position:absolute;text-decoration:none;width:100%}.rg_l:focus{outline:0}.rg_i{border:0;color:rgba(0,0,0,0);display:block;-webkit-touch-callout:none;}.rg_an,.rg_anbg,.rg_ilm,.rg_ilmbg{right:0;bottom:0;box-sizing:border-box;-moz-box-sizing:border-box;color:#fff;font:normal 11px arial,sans-serif;line-height:100%;white-space:nowrap;width:100%}.rg_anbg,.rg_ilmbg{background:rgba(51,51,51,0.8);margin-left:0;padding:2px 4px;position:absolute}.rg_ilmn{bottom:0;display:block;overflow:hidden;text-overflow:ellipsis;white-space:nowrap}.rg_ilm{display:none}#rg_s.rg_kn .rg_l:focus .rg_ilm{display:block}.rg_kn .rg_bx:hover .rg_ilm,.rg_bx:hover .rg_anbg{display:none}.rg_bx:hover .rg_ilm,.rg_anbg,.rg_kn .rg_bx:hover .rg_anbg{display:block}\u003C\/style\u003E\u003Cdiv eid=\"qlKuV-T3BoqksAHMnaroAw\" id=\"isr_scm_0\" style=\"display:none\"\u003E\u003C\/div\u003E\u003Cdiv data-cei=\"qlKuV-T3BoqksAHMnaroAw\" class=\"rg_add_chunk\"\u003E\u003C!--m--\u003E\u003Cdiv class=\"rg_di rg_bx rg_el ivg-i\" data-ved=\"0ahUKEwjk9PCm-7zOAhUKEiwKHcyOCj0QMwgCKAAwAA\"\u003E\u003Ca jsaction=\"fire.ivg_o;mouseover:str.hmov;mouseout:str.hmou\" class=\"rg_l\" style=\"background:rgb(170,205,240)\"\u003E\u003Cimg data-sz=\"f\" name=\"5eykIeMjmCk7xM:\" src=\"https:\/\/encrypted-tbn0.gstatic.com\/images?q=tbn\" class=\"rg_i rg_ic\" alt=\"Image result for south\" jsaction=\"load:str.tbn\" onload=\"google.aft\u0026\u0026google.aft(this)\"\u003E\u003Cdiv class=\"_aOd rg_ilm\"\u003E\u003Cdiv class=\"rg_ilmbg\"\u003E\u003Cspan class=\"rg_ilmn\"\u003E 566\u0026nbsp;\u0026#215;\u0026nbsp;365 - en.wikipedia.org \u003C\/span\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/a\u003E\u003Cdiv class=\"rg_meta\"\u003E{\"id\":\"5eykIeMjmCk7xM:\",\"isu\":\"en.wikipedia.org\",\"itg\":false,\"ity\":\"png\",\"oh\":365,\"ou\":\"https:\/\/upload.wikimedia.org\/wikipedia\/commons\/e\/e4\/Us_south_census.png\",\"ow\":566,\"pt\":\"Southern United States - Wikipedia, the free encyclopedia\",\"rid\":\"cErfE02-v-VcAM\",\"ru\":\"https:\/\/en.wikipedia.org\/wiki\/Southern_United_States\",\"s\":\"The Southern United States as defined by the United States Census Bureau.\",\"sc\":1,\"th\":180,\"tu\":\"https:\/\/encrypted-tbn0.gstatic.com\/images?q\\u003dtbn\",\"tw\":280}\u003C\/div\u003E\u003C\/div\u003E\u003C!--n--\u003E\u003C!--m--\u003E\u003Cdiv class=\"rg_di rg_bx rg_el ivg-i\" data-ved=\"0ahUKEwjk9PCm-7zOAhUKEiwKHcyOCj0QMwgDKAEwAQ\"\u003E\u003Ca jsaction=\"fire.ivg_o;mouseover:str.hmov;mouseout:str.hmou\" class=\"rg_l\" style=\"background:rgb(249,252,249)\"\u003E\u003Cimg data-sz=\"f\" name=\"eRjGCc0cFyVkKM:\" src=\"https:\/\/encrypted-tbn2.gstatic.com\/images?q=tbn:ANd9GcSI7SZlbDwdMCgGXzJkpwgdn9uL41xUJ1IiIcKs0qW43_Yp0EhEsg\" class=\"rg_i rg_ic\" alt=\"Image result for south\" jsaction=\"load:str.tbn\" onload=\"google.aft\u0026\u0026google.aft(this)\"\u003E\u003Cdiv class=\"_aOd rg_ilm\"\u003E\u003Cdiv class=\"rg_ilmbg\"\u003E\u003Cspan class=\"rg_ilmn\"\u003E 2000\u0026nbsp;\u0026#215;\u0026nbsp;1002 - commons.wikimedia.org \u003C\/span\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/a\u003E\u003Cdiv class=\"rg_meta\"\u003E{\"id\":\"eRjGCc0cFyVkKM:\",\"isu\":\"commons.wikimedia.org\",\"itg\":false,\"ity\":\"png\",\"oh\":1002,\"ou\":\"https:\/\/upload.wikimedia.org\/wikipedia\/commons\/thumb\/8\/84\/South_plate.svg\/2000px-South_plate.svg.png\",\"ow\":2000,\"pt\":\"File:South plate.svg - Wikimedia Commons\",\"rid\":\"F8TVsT2GBLb6RM\",\"ru\":\"https:\/\/commons.wikimedia.org\/wiki\/File:South_plate.svg\",\"s\":\"This image rendered as PNG in other widths: 200px, 500px, 1000px, 2000px.\",\"sc\":1,\"th\":159,\"tu\":\"https:\/\/encrypted-tbn2.gstatic.com\/images?q\\u003dtbn:ANd9GcSI7SZlbDwdMCgGXzJkpwgdn9uL41xUJ1IiIcKs0qW43_Yp0EhEsg\",\"tw\":317}\u003C\/div\u003E\u003C\/div\u003E\u003C!--n--\u003E\u003C\/div\u003E"]]"""  # noqa
31
-        response = mock.Mock(text=html)
32
-        results = google_images.response(response)
33
-        self.assertEqual(type(results), list)
34
-        self.assertEqual(len(results), 2)
35
-        self.assertEqual(results[0]['title'], u'Southern United States - Wikipedia, the free encyclopedia')
36
-        self.assertEqual(results[0]['url'], 'https://en.wikipedia.org/wiki/Southern_United_States')
37
-        self.assertEqual(results[0]['img_src'],
38
-                         'https://upload.wikimedia.org/wikipedia/commons/e/e4/Us_south_census.png')
39
-        self.assertEqual(results[0]['content'],
40
-                         'The Southern United States as defined by the United States Census Bureau.')
41
-        self.assertEqual(results[0]['thumbnail_src'],
42
-                         'https://encrypted-tbn0.gstatic.com/images?q=tbn')