瀏覽代碼

Merge pull request #542 from ukwt/fix538

[fix] incorrect URLs in Reddit image search results - closes #538
Adam Tauber 9 年之前
父節點
當前提交
817c74e523
共有 2 個文件被更改,包括 11 次插入5 次删除
  1. 5
    3
      searx/engines/reddit.py
  2. 6
    2
      tests/unit/engines/test_reddit.py

+ 5
- 3
searx/engines/reddit.py 查看文件

13
 import json
13
 import json
14
 from cgi import escape
14
 from cgi import escape
15
 from urllib import urlencode
15
 from urllib import urlencode
16
-from urlparse import urlparse
16
+from urlparse import urlparse, urljoin
17
 from datetime import datetime
17
 from datetime import datetime
18
 
18
 
19
 # engine dependent config
19
 # engine dependent config
21
 page_size = 25
21
 page_size = 25
22
 
22
 
23
 # search-url
23
 # search-url
24
-search_url = 'https://www.reddit.com/search.json?{query}'
24
+base_url = 'https://www.reddit.com/'
25
+search_url = base_url + 'search.json?{query}'
25
 
26
 
26
 
27
 
27
 # do search-request
28
 # do search-request
52
 
53
 
53
         # extract post information
54
         # extract post information
54
         params = {
55
         params = {
55
-            'url': data['url'],
56
+            'url': urljoin(base_url, data['permalink']),
56
             'title': data['title']
57
             'title': data['title']
57
         }
58
         }
58
 
59
 
61
         url_info = urlparse(thumbnail)
62
         url_info = urlparse(thumbnail)
62
         # netloc & path
63
         # netloc & path
63
         if url_info[1] != '' and url_info[2] != '':
64
         if url_info[1] != '' and url_info[2] != '':
65
+            params['img_src'] = data['url']
64
             params['thumbnail_src'] = thumbnail
66
             params['thumbnail_src'] = thumbnail
65
             params['template'] = 'images.html'
67
             params['template'] = 'images.html'
66
             img_results.append(params)
68
             img_results.append(params)

+ 6
- 2
tests/unit/engines/test_reddit.py 查看文件

25
             "data": {
25
             "data": {
26
                 "children": [{
26
                 "children": [{
27
                     "data": {
27
                     "data": {
28
-                        "url": "http://google.com/",
28
+                        "url": "http://google2.com/",
29
+                        "permalink": "http://google.com/",
29
                         "title": "Title number one",
30
                         "title": "Title number one",
30
                         "selftext": "Sample",
31
                         "selftext": "Sample",
31
                         "created_utc": 1401219957.0,
32
                         "created_utc": 1401219957.0,
33
                     }
34
                     }
34
                 }, {
35
                 }, {
35
                     "data": {
36
                     "data": {
36
-                        "url": "https://reddit.com/",
37
+                        "url": "https://reddit2.com/",
38
+                        "permalink": "https://reddit.com/",
37
                         "title": "Title number two",
39
                         "title": "Title number two",
38
                         "selftext": "Dominus vobiscum",
40
                         "selftext": "Dominus vobiscum",
39
                         "created_utc": 1438792533.0,
41
                         "created_utc": 1438792533.0,
55
         self.assertEqual(r['url'], 'http://google.com/')
57
         self.assertEqual(r['url'], 'http://google.com/')
56
         self.assertEqual(r['title'], 'Title number one')
58
         self.assertEqual(r['title'], 'Title number one')
57
         self.assertEqual(r['template'], 'images.html')
59
         self.assertEqual(r['template'], 'images.html')
60
+        self.assertEqual(r['img_src'], 'http://google2.com/')
58
         self.assertEqual(r['thumbnail_src'], 'http://image.com/picture.jpg')
61
         self.assertEqual(r['thumbnail_src'], 'http://image.com/picture.jpg')
59
 
62
 
60
         # testing second result (self-post)
63
         # testing second result (self-post)
65
         created = datetime.fromtimestamp(1438792533.0)
68
         created = datetime.fromtimestamp(1438792533.0)
66
         self.assertEqual(r['publishedDate'], created)
69
         self.assertEqual(r['publishedDate'], created)
67
         self.assertTrue('thumbnail_src' not in r)
70
         self.assertTrue('thumbnail_src' not in r)
71
+        self.assertTrue('img_src' not in r)