|
@@ -2,6 +2,7 @@ from collections import defaultdict
|
2
|
2
|
import mock
|
3
|
3
|
from searx.engines import bing_news
|
4
|
4
|
from searx.testing import SearxTestCase
|
|
5
|
+import lxml
|
5
|
6
|
|
6
|
7
|
|
7
|
8
|
class TestBingNewsEngine(SearxTestCase):
|
|
@@ -16,14 +17,10 @@ class TestBingNewsEngine(SearxTestCase):
|
16
|
17
|
self.assertIn(query, params['url'])
|
17
|
18
|
self.assertIn('bing.com', params['url'])
|
18
|
19
|
self.assertIn('fr', params['url'])
|
19
|
|
- self.assertIn('_FP', params['cookies'])
|
20
|
|
- self.assertIn('en', params['cookies']['_FP'])
|
21
|
20
|
|
22
|
21
|
dicto['language'] = 'all'
|
23
|
22
|
params = bing_news.request(query, dicto)
|
24
|
23
|
self.assertIn('en', params['url'])
|
25
|
|
- self.assertIn('_FP', params['cookies'])
|
26
|
|
- self.assertIn('en', params['cookies']['_FP'])
|
27
|
24
|
|
28
|
25
|
def test_response(self):
|
29
|
26
|
self.assertRaises(AttributeError, bing_news.response, None)
|
|
@@ -37,200 +34,105 @@ class TestBingNewsEngine(SearxTestCase):
|
37
|
34
|
response = mock.Mock(content='<html></html>')
|
38
|
35
|
self.assertEqual(bing_news.response(response), [])
|
39
|
36
|
|
40
|
|
- html = """
|
41
|
|
- <div class="sn_r">
|
42
|
|
- <div class="newstitle">
|
43
|
|
- <a href="http://url.of.article/" target="_blank" h="ID=news,5022.1">
|
44
|
|
- Title
|
45
|
|
- </a>
|
46
|
|
- </div>
|
47
|
|
- <div class="sn_img">
|
48
|
|
- <a href="http://url.of.article2/" target="_blank" h="ID=news,5024.1">
|
49
|
|
- <img class="rms_img" height="80" id="emb1" src="/image.src" title="Title" width="80" />
|
50
|
|
- </a>
|
51
|
|
- </div>
|
52
|
|
- <div class="sn_txt">
|
53
|
|
- <div class="sn_oi">
|
54
|
|
- <span class="sn_snip">Article Content</span>
|
55
|
|
- <div class="sn_ST">
|
56
|
|
- <cite class="sn_src">metronews.fr</cite>
|
57
|
|
- · 
|
58
|
|
- <span class="sn_tm">44 minutes ago</span>
|
59
|
|
- </div>
|
60
|
|
- </div>
|
61
|
|
- </div>
|
62
|
|
- </div>
|
63
|
|
- """
|
|
37
|
+ html = """<?xml version="1.0" encoding="utf-8" ?>
|
|
38
|
+<rss version="2.0" xmlns:News="https://www.bing.com:443/news/search?q=python&setmkt=en-US&first=1&format=RSS">
|
|
39
|
+ <channel>
|
|
40
|
+ <title>python - Bing News</title>
|
|
41
|
+ <link>https://www.bing.com:443/news/search?q=python&setmkt=en-US&first=1&format=RSS</link>
|
|
42
|
+ <description>Search results</description>
|
|
43
|
+ <image>
|
|
44
|
+ <url>http://10.53.64.9/rsslogo.gif</url>
|
|
45
|
+ <title>test</title>
|
|
46
|
+ <link>https://www.bing.com:443/news/search?q=test&setmkt=en-US&first=1&format=RSS</link>
|
|
47
|
+ </image>
|
|
48
|
+ <copyright>Copyright</copyright>
|
|
49
|
+ <item>
|
|
50
|
+ <title>Title</title>
|
|
51
|
+ <link>https://www.bing.com/news/apiclick.aspx?ref=FexRss&aid=&tid=c237eccc50bd4758b106a5e3c94fce09&url=http%3a%2f%2furl.of.article%2f&c=xxxxxxxxx&mkt=en-us</link>
|
|
52
|
+ <description>Article Content</description>
|
|
53
|
+ <pubDate>Tue, 02 Jun 2015 13:37:00 GMT</pubDate>
|
|
54
|
+ <News:Source>Infoworld</News:Source>
|
|
55
|
+ <News:Image>http://a1.bing4.com/th?id=ON.13371337133713371337133713371337&pid=News</News:Image>
|
|
56
|
+ <News:ImageSize>w={0}&h={1}&c=7</News:ImageSize>
|
|
57
|
+ <News:ImageKeepOriginalRatio></News:ImageKeepOriginalRatio>
|
|
58
|
+ <News:ImageMaxWidth>620</News:ImageMaxWidth>
|
|
59
|
+ <News:ImageMaxHeight>413</News:ImageMaxHeight>
|
|
60
|
+ </item>
|
|
61
|
+ <item>
|
|
62
|
+ <title>Another Title</title>
|
|
63
|
+ <link>https://www.bing.com/news/apiclick.aspx?ref=FexRss&aid=&tid=c237eccc50bd4758b106a5e3c94fce09&url=http%3a%2f%2fanother.url.of.article%2f&c=xxxxxxxxx&mkt=en-us</link>
|
|
64
|
+ <description>Another Article Content</description>
|
|
65
|
+ <pubDate>Tue, 02 Jun 2015 13:37:00 GMT</pubDate>
|
|
66
|
+ </item>
|
|
67
|
+ </channel>
|
|
68
|
+</rss>""" # noqa
|
64
|
69
|
response = mock.Mock(content=html)
|
65
|
70
|
results = bing_news.response(response)
|
66
|
71
|
self.assertEqual(type(results), list)
|
67
|
|
- self.assertEqual(len(results), 1)
|
|
72
|
+ self.assertEqual(len(results), 2)
|
68
|
73
|
self.assertEqual(results[0]['title'], 'Title')
|
69
|
74
|
self.assertEqual(results[0]['url'], 'http://url.of.article/')
|
70
|
75
|
self.assertEqual(results[0]['content'], 'Article Content')
|
|
76
|
+ self.assertEqual(results[0]['thumbnail'], 'https://www.bing.com/th?id=ON.13371337133713371337133713371337')
|
|
77
|
+ self.assertEqual(results[1]['title'], 'Another Title')
|
|
78
|
+ self.assertEqual(results[1]['url'], 'http://another.url.of.article/')
|
|
79
|
+ self.assertEqual(results[1]['content'], 'Another Article Content')
|
|
80
|
+ self.assertNotIn('thumbnail', results[1])
|
71
|
81
|
|
72
|
|
- html = """
|
73
|
|
- <div class="sn_r">
|
74
|
|
- <div class="newstitle">
|
75
|
|
- <a href="http://url.of.article/" target="_blank" h="ID=news,5022.1">
|
76
|
|
- Title
|
77
|
|
- </a>
|
78
|
|
- </div>
|
79
|
|
- <div class="sn_img">
|
80
|
|
- <a href="http://url.of.article2/" target="_blank" h="ID=news,5024.1">
|
81
|
|
- <img class="rms_img" height="80" id="emb1" src="/image.src" title="Title" width="80" />
|
82
|
|
- </a>
|
83
|
|
- </div>
|
84
|
|
- <div class="sn_txt">
|
85
|
|
- <div class="sn_oi">
|
86
|
|
- <span class="sn_snip">Article Content</span>
|
87
|
|
- <div class="sn_ST">
|
88
|
|
- <cite class="sn_src">metronews.fr</cite>
|
89
|
|
- · 
|
90
|
|
- <span class="sn_tm">44 minutes ago</span>
|
91
|
|
- </div>
|
92
|
|
- </div>
|
93
|
|
- </div>
|
94
|
|
- </div>
|
95
|
|
- <div class="sn_r">
|
96
|
|
- <div class="newstitle">
|
97
|
|
- <a href="http://url.of.article/" target="_blank" h="ID=news,5022.1">
|
98
|
|
- Title
|
99
|
|
- </a>
|
100
|
|
- </div>
|
101
|
|
- <div class="sn_img">
|
102
|
|
- <a href="http://url.of.article2/" target="_blank" h="ID=news,5024.1">
|
103
|
|
- <img class="rms_img" height="80" id="emb1" src="/image.src" title="Title" width="80" />
|
104
|
|
- </a>
|
105
|
|
- </div>
|
106
|
|
- <div class="sn_txt">
|
107
|
|
- <div class="sn_oi">
|
108
|
|
- <span class="sn_snip">Article Content</span>
|
109
|
|
- <div class="sn_ST">
|
110
|
|
- <cite class="sn_src">metronews.fr</cite>
|
111
|
|
- · 
|
112
|
|
- <span class="sn_tm">3 hours, 44 minutes ago</span>
|
113
|
|
- </div>
|
114
|
|
- </div>
|
115
|
|
- </div>
|
116
|
|
- </div>
|
117
|
|
- <div class="sn_r">
|
118
|
|
- <div class="newstitle">
|
119
|
|
- <a href="http://url.of.article/" target="_blank" h="ID=news,5022.1">
|
120
|
|
- Title
|
121
|
|
- </a>
|
122
|
|
- </div>
|
123
|
|
- <div class="sn_img">
|
124
|
|
- <a href="http://url.of.article2/" target="_blank" h="ID=news,5024.1">
|
125
|
|
- <img class="rms_img" height="80" id="emb1" src="/image.src" title="Title" width="80" />
|
126
|
|
- </a>
|
127
|
|
- </div>
|
128
|
|
- <div class="sn_txt">
|
129
|
|
- <div class="sn_oi">
|
130
|
|
- <span class="sn_snip">Article Content</span>
|
131
|
|
- <div class="sn_ST">
|
132
|
|
- <cite class="sn_src">metronews.fr</cite>
|
133
|
|
- · 
|
134
|
|
- <span class="sn_tm">44 hours ago</span>
|
135
|
|
- </div>
|
136
|
|
- </div>
|
137
|
|
- </div>
|
138
|
|
- </div>
|
139
|
|
- <div class="sn_r">
|
140
|
|
- <div class="newstitle">
|
141
|
|
- <a href="http://url.of.article/" target="_blank" h="ID=news,5022.1">
|
142
|
|
- Title
|
143
|
|
- </a>
|
144
|
|
- </div>
|
145
|
|
- <div class="sn_img">
|
146
|
|
- <a href="http://url.of.article2/" target="_blank" h="ID=news,5024.1">
|
147
|
|
- <img class="rms_img" height="80" id="emb1" src="/image.src" title="Title" width="80" />
|
148
|
|
- </a>
|
149
|
|
- </div>
|
150
|
|
- <div class="sn_txt">
|
151
|
|
- <div class="sn_oi">
|
152
|
|
- <span class="sn_snip">Article Content</span>
|
153
|
|
- <div class="sn_ST">
|
154
|
|
- <cite class="sn_src">metronews.fr</cite>
|
155
|
|
- · 
|
156
|
|
- <span class="sn_tm">2 days ago</span>
|
157
|
|
- </div>
|
158
|
|
- </div>
|
159
|
|
- </div>
|
160
|
|
- </div>
|
161
|
|
- <div class="sn_r">
|
162
|
|
- <div class="newstitle">
|
163
|
|
- <a href="http://url.of.article/" target="_blank" h="ID=news,5022.1">
|
164
|
|
- Title
|
165
|
|
- </a>
|
166
|
|
- </div>
|
167
|
|
- <div class="sn_img">
|
168
|
|
- <a href="http://url.of.article2/" target="_blank" h="ID=news,5024.1">
|
169
|
|
- <img class="rms_img" height="80" id="emb1" src="/image.src" title="Title" width="80" />
|
170
|
|
- </a>
|
171
|
|
- </div>
|
172
|
|
- <div class="sn_txt">
|
173
|
|
- <div class="sn_oi">
|
174
|
|
- <span class="sn_snip">Article Content</span>
|
175
|
|
- <div class="sn_ST">
|
176
|
|
- <cite class="sn_src">metronews.fr</cite>
|
177
|
|
- · 
|
178
|
|
- <span class="sn_tm">27/01/2015</span>
|
179
|
|
- </div>
|
180
|
|
- </div>
|
181
|
|
- </div>
|
182
|
|
- </div>
|
183
|
|
- <div class="sn_r">
|
184
|
|
- <div class="newstitle">
|
185
|
|
- <a href="http://url.of.article/" target="_blank" h="ID=news,5022.1">
|
186
|
|
- Title
|
187
|
|
- </a>
|
188
|
|
- </div>
|
189
|
|
- <div class="sn_img">
|
190
|
|
- <a href="http://url.of.article2/" target="_blank" h="ID=news,5024.1">
|
191
|
|
- <img class="rms_img" height="80" id="emb1" src="/image.src" title="Title" width="80" />
|
192
|
|
- </a>
|
193
|
|
- </div>
|
194
|
|
- <div class="sn_txt">
|
195
|
|
- <div class="sn_oi">
|
196
|
|
- <span class="sn_snip">Article Content</span>
|
197
|
|
- <div class="sn_ST">
|
198
|
|
- <cite class="sn_src">metronews.fr</cite>
|
199
|
|
- · 
|
200
|
|
- <span class="sn_tm">Il y a 3 heures</span>
|
201
|
|
- </div>
|
202
|
|
- </div>
|
203
|
|
- </div>
|
204
|
|
- </div>
|
205
|
|
- """
|
|
82
|
+ html = """<?xml version="1.0" encoding="utf-8" ?>
|
|
83
|
+<rss version="2.0" xmlns:News="https://www.bing.com:443/news/search?q=python&setmkt=en-US&first=1&format=RSS">
|
|
84
|
+ <channel>
|
|
85
|
+ <title>python - Bing News</title>
|
|
86
|
+ <link>https://www.bing.com:443/news/search?q=python&setmkt=en-US&first=1&format=RSS</link>
|
|
87
|
+ <description>Search results</description>
|
|
88
|
+ <image>
|
|
89
|
+ <url>http://10.53.64.9/rsslogo.gif</url>
|
|
90
|
+ <title>test</title>
|
|
91
|
+ <link>https://www.bing.com:443/news/search?q=test&setmkt=en-US&first=1&format=RSS</link>
|
|
92
|
+ </image>
|
|
93
|
+ <copyright>Copyright</copyright>
|
|
94
|
+ <item>
|
|
95
|
+ <title>Title</title>
|
|
96
|
+ <link>http://another.url.of.article/</link>
|
|
97
|
+ <description>Article Content</description>
|
|
98
|
+ <pubDate>garbage</pubDate>
|
|
99
|
+ <News:Source>Infoworld</News:Source>
|
|
100
|
+ <News:Image>http://another.bing.com/image</News:Image>
|
|
101
|
+ <News:ImageSize>w={0}&h={1}&c=7</News:ImageSize>
|
|
102
|
+ <News:ImageKeepOriginalRatio></News:ImageKeepOriginalRatio>
|
|
103
|
+ <News:ImageMaxWidth>620</News:ImageMaxWidth>
|
|
104
|
+ <News:ImageMaxHeight>413</News:ImageMaxHeight>
|
|
105
|
+ </item>
|
|
106
|
+ </channel>
|
|
107
|
+</rss>""" # noqa
|
206
|
108
|
response = mock.Mock(content=html)
|
207
|
109
|
results = bing_news.response(response)
|
208
|
110
|
self.assertEqual(type(results), list)
|
209
|
|
- self.assertEqual(len(results), 6)
|
|
111
|
+ self.assertEqual(len(results), 1)
|
|
112
|
+ self.assertEqual(results[0]['title'], 'Title')
|
|
113
|
+ self.assertEqual(results[0]['url'], 'http://another.url.of.article/')
|
|
114
|
+ self.assertEqual(results[0]['content'], 'Article Content')
|
|
115
|
+ self.assertEqual(results[0]['thumbnail'], 'http://another.bing.com/image')
|
|
116
|
+
|
|
117
|
+ html = """<?xml version="1.0" encoding="utf-8" ?>
|
|
118
|
+<rss version="2.0" xmlns:News="https://www.bing.com:443/news/search?q=python&setmkt=en-US&first=1&format=RSS">
|
|
119
|
+ <channel>
|
|
120
|
+ <title>python - Bing News</title>
|
|
121
|
+ <link>https://www.bing.com:443/news/search?q=python&setmkt=en-US&first=1&format=RSS</link>
|
|
122
|
+ <description>Search results</description>
|
|
123
|
+ <image>
|
|
124
|
+ <url>http://10.53.64.9/rsslogo.gif</url>
|
|
125
|
+ <title>test</title>
|
|
126
|
+ <link>https://www.bing.com:443/news/search?q=test&setmkt=en-US&first=1&format=RSS</link>
|
|
127
|
+ </image>
|
|
128
|
+ </channel>
|
|
129
|
+</rss>""" # noqa
|
210
|
130
|
|
211
|
|
- html = """
|
212
|
|
- <div class="newstitle">
|
213
|
|
- <a href="http://url.of.article/" target="_blank" h="ID=news,5022.1">
|
214
|
|
- Title
|
215
|
|
- </a>
|
216
|
|
- </div>
|
217
|
|
- <div class="sn_img">
|
218
|
|
- <a href="http://url.of.article2/" target="_blank" h="ID=news,5024.1">
|
219
|
|
- <img class="rms_img" height="80" id="emb1" src="/image.src" title="Title" width="80" />
|
220
|
|
- </a>
|
221
|
|
- </div>
|
222
|
|
- <div class="sn_txt">
|
223
|
|
- <div class="sn_oi">
|
224
|
|
- <span class="sn_snip">Article Content</span>
|
225
|
|
- <div class="sn_ST">
|
226
|
|
- <cite class="sn_src">metronews.fr</cite>
|
227
|
|
- · 
|
228
|
|
- <span class="sn_tm">44 minutes ago</span>
|
229
|
|
- </div>
|
230
|
|
- </div>
|
231
|
|
- </div>
|
232
|
|
- """
|
233
|
131
|
response = mock.Mock(content=html)
|
234
|
132
|
results = bing_news.response(response)
|
235
|
133
|
self.assertEqual(type(results), list)
|
236
|
134
|
self.assertEqual(len(results), 0)
|
|
135
|
+
|
|
136
|
+ html = """<?xml version="1.0" encoding="utf-8" ?>gabarge"""
|
|
137
|
+ response = mock.Mock(content=html)
|
|
138
|
+ self.assertRaises(lxml.etree.XMLSyntaxError, bing_news.response, response)
|