Browse Source

Add bing in the test units

Cqoicebordel 10 years ago
parent
commit
525af2a031
4 changed files with 97 additions and 4 deletions
  1. 1
    0
      .gitignore
  2. 5
    4
      searx/engines/bing.py
  3. 90
    0
      searx/tests/engines/test_bing.py
  4. 1
    0
      searx/tests/test_engines.py

+ 1
- 0
.gitignore View File

@@ -23,3 +23,4 @@ local/
23 23
 parts/
24 24
 searx.egg-info/
25 25
 var/
26
+node_modules/

+ 5
- 4
searx/engines/bing.py View File

@@ -14,6 +14,7 @@
14 14
 from urllib import urlencode
15 15
 from cgi import escape
16 16
 from lxml import html
17
+from searx.engines.xpath import extract_text
17 18
 
18 19
 # engine dependent config
19 20
 categories = ['general']
@@ -55,8 +56,8 @@ def response(resp):
55 56
     for result in dom.xpath('//div[@class="sa_cc"]'):
56 57
         link = result.xpath('.//h3/a')[0]
57 58
         url = link.attrib.get('href')
58
-        title = ' '.join(link.xpath('.//text()'))
59
-        content = escape(' '.join(result.xpath('.//p//text()')))
59
+        title = extract_text(link)
60
+        content = escape(extract_text(result.xpath('.//p')))
60 61
 
61 62
         # append result
62 63
         results.append({'url': url,
@@ -71,8 +72,8 @@ def response(resp):
71 72
     for result in dom.xpath('//li[@class="b_algo"]'):
72 73
         link = result.xpath('.//h2/a')[0]
73 74
         url = link.attrib.get('href')
74
-        title = ' '.join(link.xpath('.//text()'))
75
-        content = escape(' '.join(result.xpath('.//p//text()')))
75
+        title = extract_text(link)
76
+        content = escape(extract_text(result.xpath('.//p')))
76 77
 
77 78
         # append result
78 79
         results.append({'url': url,

+ 90
- 0
searx/tests/engines/test_bing.py View File

@@ -0,0 +1,90 @@
1
+from collections import defaultdict
2
+import mock
3
+from searx.engines import bing
4
+from searx.testing import SearxTestCase
5
+
6
+
7
+class TestBingEngine(SearxTestCase):
8
+
9
+    def test_request(self):
10
+        query = 'test_query'
11
+        dicto = defaultdict(dict)
12
+        dicto['pageno'] = 0
13
+        dicto['language'] = 'fr_FR'
14
+        params = bing.request(query, dicto)
15
+        self.assertTrue('url' in params)
16
+        self.assertTrue(query in params['url'])
17
+        self.assertTrue('bing.com' in params['url'])
18
+        self.assertTrue('SRCHHPGUSR' in params['cookies'])
19
+        self.assertTrue('fr' in params['cookies']['SRCHHPGUSR'])
20
+
21
+        dicto['language'] = 'all'
22
+        params = bing.request(query, dicto)
23
+        self.assertTrue('SRCHHPGUSR' in params['cookies'])
24
+        self.assertTrue('en' in params['cookies']['SRCHHPGUSR'])
25
+
26
+    def test_response(self):
27
+        self.assertRaises(AttributeError, bing.response, None)
28
+        self.assertRaises(AttributeError, bing.response, [])
29
+        self.assertRaises(AttributeError, bing.response, '')
30
+        self.assertRaises(AttributeError, bing.response, '[]')
31
+
32
+        response = mock.Mock(content='<html></html>')
33
+        self.assertEqual(bing.response(response), [])
34
+
35
+        response = mock.Mock(content='<html></html>')
36
+        self.assertEqual(bing.response(response), [])
37
+
38
+        html = """
39
+        <div class="sa_cc" u="0|5109|4755453613245655|UAGjXgIrPH5yh-o5oNHRx_3Zta87f_QO">
40
+            <div Class="sa_mc">
41
+                <div class="sb_tlst">
42
+                    <h3>
43
+                        <a href="http://this.should.be.the.link/" h="ID=SERP,5124.1">
44
+                        <strong>This</strong> should be the title</a>
45
+                    </h3>
46
+                </div>
47
+                <div class="sb_meta"><cite><strong>this</strong>.meta.com</cite>
48
+                    <span class="c_tlbxTrg">
49
+                        <span class="c_tlbxH" H="BASE:CACHEDPAGEDEFAULT" K="SERP,5125.1">
50
+                        </span>
51
+                    </span>
52
+                </div>
53
+                <p><strong>This</strong> should be the content.</p>
54
+            </div>
55
+        </div>
56
+        """
57
+        response = mock.Mock(content=html)
58
+        results = bing.response(response)
59
+        self.assertEqual(type(results), list)
60
+        self.assertEqual(len(results), 1)
61
+        self.assertEqual(results[0]['title'], 'This should be the title')
62
+        self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
63
+        self.assertEqual(results[0]['content'], 'This should be the content.')
64
+
65
+        html = """
66
+        <li class="b_algo" u="0|5109|4755453613245655|UAGjXgIrPH5yh-o5oNHRx_3Zta87f_QO">
67
+            <div Class="sa_mc">
68
+                <div class="sb_tlst">
69
+                    <h2>
70
+                        <a href="http://this.should.be.the.link/" h="ID=SERP,5124.1">
71
+                        <strong>This</strong> should be the title</a>
72
+                    </h2>
73
+                </div>
74
+                <div class="sb_meta"><cite><strong>this</strong>.meta.com</cite>
75
+                    <span class="c_tlbxTrg">
76
+                        <span class="c_tlbxH" H="BASE:CACHEDPAGEDEFAULT" K="SERP,5125.1">
77
+                        </span>
78
+                    </span>
79
+                </div>
80
+                <p><strong>This</strong> should be the content.</p>
81
+            </div>
82
+        </li>
83
+        """
84
+        response = mock.Mock(content=html)
85
+        results = bing.response(response)
86
+        self.assertEqual(type(results), list)
87
+        self.assertEqual(len(results), 1)
88
+        self.assertEqual(results[0]['title'], 'This should be the title')
89
+        self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
90
+        self.assertEqual(results[0]['content'], 'This should be the content.')

+ 1
- 0
searx/tests/test_engines.py View File

@@ -1,2 +1,3 @@
1
+from searx.tests.engines.test_bing import *  # noqa
1 2
 from searx.tests.engines.test_dummy import *  # noqa
2 3
 from searx.tests.engines.test_github import *  # noqa