Explorar el Código

Merge pull request #542 from ukwt/fix538

[fix] incorrect URLs in Reddit image search results - closes #538
Adam Tauber hace 9 años
padre
commit
817c74e523
Se han modificado 2 ficheros con 11 adiciones y 5 borrados
  1. 5
    3
      searx/engines/reddit.py
  2. 6
    2
      tests/unit/engines/test_reddit.py

+ 5
- 3
searx/engines/reddit.py Ver fichero

@@ -13,7 +13,7 @@
13 13
 import json
14 14
 from cgi import escape
15 15
 from urllib import urlencode
16
-from urlparse import urlparse
16
+from urlparse import urlparse, urljoin
17 17
 from datetime import datetime
18 18
 
19 19
 # engine dependent config
@@ -21,7 +21,8 @@ categories = ['general', 'images', 'news', 'social media']
21 21
 page_size = 25
22 22
 
23 23
 # search-url
24
-search_url = 'https://www.reddit.com/search.json?{query}'
24
+base_url = 'https://www.reddit.com/'
25
+search_url = base_url + 'search.json?{query}'
25 26
 
26 27
 
27 28
 # do search-request
@@ -52,7 +53,7 @@ def response(resp):
52 53
 
53 54
         # extract post information
54 55
         params = {
55
-            'url': data['url'],
56
+            'url': urljoin(base_url, data['permalink']),
56 57
             'title': data['title']
57 58
         }
58 59
 
@@ -61,6 +62,7 @@ def response(resp):
61 62
         url_info = urlparse(thumbnail)
62 63
         # netloc & path
63 64
         if url_info[1] != '' and url_info[2] != '':
65
+            params['img_src'] = data['url']
64 66
             params['thumbnail_src'] = thumbnail
65 67
             params['template'] = 'images.html'
66 68
             img_results.append(params)

+ 6
- 2
tests/unit/engines/test_reddit.py Ver fichero

@@ -25,7 +25,8 @@ class TestRedditEngine(SearxTestCase):
25 25
             "data": {
26 26
                 "children": [{
27 27
                     "data": {
28
-                        "url": "http://google.com/",
28
+                        "url": "http://google2.com/",
29
+                        "permalink": "http://google.com/",
29 30
                         "title": "Title number one",
30 31
                         "selftext": "Sample",
31 32
                         "created_utc": 1401219957.0,
@@ -33,7 +34,8 @@ class TestRedditEngine(SearxTestCase):
33 34
                     }
34 35
                 }, {
35 36
                     "data": {
36
-                        "url": "https://reddit.com/",
37
+                        "url": "https://reddit2.com/",
38
+                        "permalink": "https://reddit.com/",
37 39
                         "title": "Title number two",
38 40
                         "selftext": "Dominus vobiscum",
39 41
                         "created_utc": 1438792533.0,
@@ -55,6 +57,7 @@ class TestRedditEngine(SearxTestCase):
55 57
         self.assertEqual(r['url'], 'http://google.com/')
56 58
         self.assertEqual(r['title'], 'Title number one')
57 59
         self.assertEqual(r['template'], 'images.html')
60
+        self.assertEqual(r['img_src'], 'http://google2.com/')
58 61
         self.assertEqual(r['thumbnail_src'], 'http://image.com/picture.jpg')
59 62
 
60 63
         # testing second result (self-post)
@@ -65,3 +68,4 @@ class TestRedditEngine(SearxTestCase):
65 68
         created = datetime.fromtimestamp(1438792533.0)
66 69
         self.assertEqual(r['publishedDate'], created)
67 70
         self.assertTrue('thumbnail_src' not in r)
71
+        self.assertTrue('img_src' not in r)