浏览代码

[mod] add/modify image fetching for bing_news, qwant and twitter engines

Alexandre Flament 8 年前
父节点
当前提交
f5128c7cb9
共有 4 个文件被更改,包括 27 次插入10 次删除
  1. 2
    3
      searx/engines/bing_news.py
  2. 16
    3
      searx/engines/qwant.py
  3. 6
    1
      searx/engines/twitter.py
  4. 3
    3
      tests/unit/engines/test_bing_news.py

+ 2
- 3
searx/engines/bing_news.py 查看文件

112
 
112
 
113
         # append result
113
         # append result
114
         if thumbnail is not None:
114
         if thumbnail is not None:
115
-            results.append({'template': 'videos.html',
116
-                            'url': url,
115
+            results.append({'url': url,
117
                             'title': title,
116
                             'title': title,
118
                             'publishedDate': publishedDate,
117
                             'publishedDate': publishedDate,
119
                             'content': content,
118
                             'content': content,
120
-                            'thumbnail': thumbnail})
119
+                            'img_src': thumbnail})
121
         else:
120
         else:
122
             results.append({'url': url,
121
             results.append({'url': url,
123
                             'title': title,
122
                             'title': title,

+ 16
- 3
searx/engines/qwant.py 查看文件

96
                             'thumbnail_src': thumbnail_src,
96
                             'thumbnail_src': thumbnail_src,
97
                             'img_src': img_src})
97
                             'img_src': img_src})
98
 
98
 
99
-        elif (category_to_keyword.get(categories[0], '') == 'news' or
100
-              category_to_keyword.get(categories[0], '') == 'social'):
99
+        elif category_to_keyword.get(categories[0], '') == 'social':
101
             published_date = datetime.fromtimestamp(result['date'], None)
100
             published_date = datetime.fromtimestamp(result['date'], None)
101
+            img_src = result.get('img', None)
102
+            results.append({'url': res_url,
103
+                            'title': title,
104
+                            'publishedDate': published_date,
105
+                            'content': content,
106
+                            'img_src': img_src})
102
 
107
 
108
+        elif category_to_keyword.get(categories[0], '') == 'news':
109
+            published_date = datetime.fromtimestamp(result['date'], None)
110
+            media = result.get('media', [])
111
+            if len(media) > 0:
112
+                img_src = media[0].get('pict', {}).get('url', None)
113
+            else:
114
+                img_src = None
103
             results.append({'url': res_url,
115
             results.append({'url': res_url,
104
                             'title': title,
116
                             'title': title,
105
                             'publishedDate': published_date,
117
                             'publishedDate': published_date,
106
-                            'content': content})
118
+                            'content': content,
119
+                            'img_src': img_src})
107
 
120
 
108
     return results
121
     return results
109
 
122
 

+ 6
- 1
searx/engines/twitter.py 查看文件

27
 
27
 
28
 # specific xpath variables
28
 # specific xpath variables
29
 results_xpath = '//li[@data-item-type="tweet"]'
29
 results_xpath = '//li[@data-item-type="tweet"]'
30
+avatar_xpath = './/img[contains(@class, "avatar")]/@src'
30
 link_xpath = './/small[@class="time"]//a'
31
 link_xpath = './/small[@class="time"]//a'
31
 title_xpath = './/span[contains(@class, "username")]'
32
 title_xpath = './/span[contains(@class, "username")]'
32
 content_xpath = './/p[contains(@class, "tweet-text")]'
33
 content_xpath = './/p[contains(@class, "tweet-text")]'
57
         try:
58
         try:
58
             link = tweet.xpath(link_xpath)[0]
59
             link = tweet.xpath(link_xpath)[0]
59
             content = extract_text(tweet.xpath(content_xpath)[0])
60
             content = extract_text(tweet.xpath(content_xpath)[0])
61
+            img_src = tweet.xpath(avatar_xpath)[0]
62
+            img_src = img_src.replace('_bigger', '_normal')
60
         except Exception:
63
         except Exception:
61
             continue
64
             continue
62
 
65
 
71
             results.append({'url': url,
74
             results.append({'url': url,
72
                             'title': title,
75
                             'title': title,
73
                             'content': content,
76
                             'content': content,
77
+                            'img_src': img_src,
74
                             'publishedDate': publishedDate})
78
                             'publishedDate': publishedDate})
75
         else:
79
         else:
76
             # append result
80
             # append result
77
             results.append({'url': url,
81
             results.append({'url': url,
78
                             'title': title,
82
                             'title': title,
79
-                            'content': content})
83
+                            'content': content,
84
+                            'img_src': img_src})
80
 
85
 
81
     # return results
86
     # return results
82
     return results
87
     return results

+ 3
- 3
tests/unit/engines/test_bing_news.py 查看文件

81
         self.assertEqual(results[0]['title'], 'Title')
81
         self.assertEqual(results[0]['title'], 'Title')
82
         self.assertEqual(results[0]['url'], 'http://url.of.article/')
82
         self.assertEqual(results[0]['url'], 'http://url.of.article/')
83
         self.assertEqual(results[0]['content'], 'Article Content')
83
         self.assertEqual(results[0]['content'], 'Article Content')
84
-        self.assertEqual(results[0]['thumbnail'], 'https://www.bing.com/th?id=ON.13371337133713371337133713371337')
84
+        self.assertEqual(results[0]['img_src'], 'https://www.bing.com/th?id=ON.13371337133713371337133713371337')
85
         self.assertEqual(results[1]['title'], 'Another Title')
85
         self.assertEqual(results[1]['title'], 'Another Title')
86
         self.assertEqual(results[1]['url'], 'http://another.url.of.article/')
86
         self.assertEqual(results[1]['url'], 'http://another.url.of.article/')
87
         self.assertEqual(results[1]['content'], 'Another Article Content')
87
         self.assertEqual(results[1]['content'], 'Another Article Content')
88
-        self.assertNotIn('thumbnail', results[1])
88
+        self.assertNotIn('img_src', results[1])
89
 
89
 
90
         html = """<?xml version="1.0" encoding="utf-8" ?>
90
         html = """<?xml version="1.0" encoding="utf-8" ?>
91
 <rss version="2.0" xmlns:News="https://www.bing.com:443/news/search?q=python&amp;setmkt=en-US&amp;first=1&amp;format=RSS">
91
 <rss version="2.0" xmlns:News="https://www.bing.com:443/news/search?q=python&amp;setmkt=en-US&amp;first=1&amp;format=RSS">
120
         self.assertEqual(results[0]['title'], 'Title')
120
         self.assertEqual(results[0]['title'], 'Title')
121
         self.assertEqual(results[0]['url'], 'http://another.url.of.article/')
121
         self.assertEqual(results[0]['url'], 'http://another.url.of.article/')
122
         self.assertEqual(results[0]['content'], 'Article Content')
122
         self.assertEqual(results[0]['content'], 'Article Content')
123
-        self.assertEqual(results[0]['thumbnail'], 'http://another.bing.com/image')
123
+        self.assertEqual(results[0]['img_src'], 'http://another.bing.com/image')
124
 
124
 
125
         html = """<?xml version="1.0" encoding="utf-8" ?>
125
         html = """<?xml version="1.0" encoding="utf-8" ?>
126
 <rss version="2.0" xmlns:News="https://www.bing.com:443/news/search?q=python&amp;setmkt=en-US&amp;first=1&amp;format=RSS">
126
 <rss version="2.0" xmlns:News="https://www.bing.com:443/news/search?q=python&amp;setmkt=en-US&amp;first=1&amp;format=RSS">