浏览代码

Add bing in the test units

Cqoicebordel 10 年前
父节点
当前提交
525af2a031
共有 4 个文件被更改,包括 97 次插入4 次删除
  1. 1
    0
      .gitignore
  2. 5
    4
      searx/engines/bing.py
  3. 90
    0
      searx/tests/engines/test_bing.py
  4. 1
    0
      searx/tests/test_engines.py

+ 1
- 0
.gitignore 查看文件

23
 parts/
23
 parts/
24
 searx.egg-info/
24
 searx.egg-info/
25
 var/
25
 var/
26
+node_modules/

+ 5
- 4
searx/engines/bing.py 查看文件

14
 from urllib import urlencode
14
 from urllib import urlencode
15
 from cgi import escape
15
 from cgi import escape
16
 from lxml import html
16
 from lxml import html
17
+from searx.engines.xpath import extract_text
17
 
18
 
18
 # engine dependent config
19
 # engine dependent config
19
 categories = ['general']
20
 categories = ['general']
55
     for result in dom.xpath('//div[@class="sa_cc"]'):
56
     for result in dom.xpath('//div[@class="sa_cc"]'):
56
         link = result.xpath('.//h3/a')[0]
57
         link = result.xpath('.//h3/a')[0]
57
         url = link.attrib.get('href')
58
         url = link.attrib.get('href')
58
-        title = ' '.join(link.xpath('.//text()'))
59
-        content = escape(' '.join(result.xpath('.//p//text()')))
59
+        title = extract_text(link)
60
+        content = escape(extract_text(result.xpath('.//p')))
60
 
61
 
61
         # append result
62
         # append result
62
         results.append({'url': url,
63
         results.append({'url': url,
71
     for result in dom.xpath('//li[@class="b_algo"]'):
72
     for result in dom.xpath('//li[@class="b_algo"]'):
72
         link = result.xpath('.//h2/a')[0]
73
         link = result.xpath('.//h2/a')[0]
73
         url = link.attrib.get('href')
74
         url = link.attrib.get('href')
74
-        title = ' '.join(link.xpath('.//text()'))
75
-        content = escape(' '.join(result.xpath('.//p//text()')))
75
+        title = extract_text(link)
76
+        content = escape(extract_text(result.xpath('.//p')))
76
 
77
 
77
         # append result
78
         # append result
78
         results.append({'url': url,
79
         results.append({'url': url,

+ 90
- 0
searx/tests/engines/test_bing.py 查看文件

1
+from collections import defaultdict
2
+import mock
3
+from searx.engines import bing
4
+from searx.testing import SearxTestCase
5
+
6
+
7
+class TestBingEngine(SearxTestCase):
8
+
9
+    def test_request(self):
10
+        query = 'test_query'
11
+        dicto = defaultdict(dict)
12
+        dicto['pageno'] = 0
13
+        dicto['language'] = 'fr_FR'
14
+        params = bing.request(query, dicto)
15
+        self.assertTrue('url' in params)
16
+        self.assertTrue(query in params['url'])
17
+        self.assertTrue('bing.com' in params['url'])
18
+        self.assertTrue('SRCHHPGUSR' in params['cookies'])
19
+        self.assertTrue('fr' in params['cookies']['SRCHHPGUSR'])
20
+
21
+        dicto['language'] = 'all'
22
+        params = bing.request(query, dicto)
23
+        self.assertTrue('SRCHHPGUSR' in params['cookies'])
24
+        self.assertTrue('en' in params['cookies']['SRCHHPGUSR'])
25
+
26
+    def test_response(self):
27
+        self.assertRaises(AttributeError, bing.response, None)
28
+        self.assertRaises(AttributeError, bing.response, [])
29
+        self.assertRaises(AttributeError, bing.response, '')
30
+        self.assertRaises(AttributeError, bing.response, '[]')
31
+
32
+        response = mock.Mock(content='<html></html>')
33
+        self.assertEqual(bing.response(response), [])
34
+
35
+        response = mock.Mock(content='<html></html>')
36
+        self.assertEqual(bing.response(response), [])
37
+
38
+        html = """
39
+        <div class="sa_cc" u="0|5109|4755453613245655|UAGjXgIrPH5yh-o5oNHRx_3Zta87f_QO">
40
+            <div Class="sa_mc">
41
+                <div class="sb_tlst">
42
+                    <h3>
43
+                        <a href="http://this.should.be.the.link/" h="ID=SERP,5124.1">
44
+                        <strong>This</strong> should be the title</a>
45
+                    </h3>
46
+                </div>
47
+                <div class="sb_meta"><cite><strong>this</strong>.meta.com</cite>
48
+                    <span class="c_tlbxTrg">
49
+                        <span class="c_tlbxH" H="BASE:CACHEDPAGEDEFAULT" K="SERP,5125.1">
50
+                        </span>
51
+                    </span>
52
+                </div>
53
+                <p><strong>This</strong> should be the content.</p>
54
+            </div>
55
+        </div>
56
+        """
57
+        response = mock.Mock(content=html)
58
+        results = bing.response(response)
59
+        self.assertEqual(type(results), list)
60
+        self.assertEqual(len(results), 1)
61
+        self.assertEqual(results[0]['title'], 'This should be the title')
62
+        self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
63
+        self.assertEqual(results[0]['content'], 'This should be the content.')
64
+
65
+        html = """
66
+        <li class="b_algo" u="0|5109|4755453613245655|UAGjXgIrPH5yh-o5oNHRx_3Zta87f_QO">
67
+            <div Class="sa_mc">
68
+                <div class="sb_tlst">
69
+                    <h2>
70
+                        <a href="http://this.should.be.the.link/" h="ID=SERP,5124.1">
71
+                        <strong>This</strong> should be the title</a>
72
+                    </h2>
73
+                </div>
74
+                <div class="sb_meta"><cite><strong>this</strong>.meta.com</cite>
75
+                    <span class="c_tlbxTrg">
76
+                        <span class="c_tlbxH" H="BASE:CACHEDPAGEDEFAULT" K="SERP,5125.1">
77
+                        </span>
78
+                    </span>
79
+                </div>
80
+                <p><strong>This</strong> should be the content.</p>
81
+            </div>
82
+        </li>
83
+        """
84
+        response = mock.Mock(content=html)
85
+        results = bing.response(response)
86
+        self.assertEqual(type(results), list)
87
+        self.assertEqual(len(results), 1)
88
+        self.assertEqual(results[0]['title'], 'This should be the title')
89
+        self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
90
+        self.assertEqual(results[0]['content'], 'This should be the content.')

+ 1
- 0
searx/tests/test_engines.py 查看文件

1
+from searx.tests.engines.test_bing import *  # noqa
1
 from searx.tests.engines.test_dummy import *  # noqa
2
 from searx.tests.engines.test_dummy import *  # noqa
2
 from searx.tests.engines.test_github import *  # noqa
3
 from searx.tests.engines.test_github import *  # noqa