Browse Source

Stackoverflow's unit test

Cqoicebordel 10 years ago
parent
commit
d20ddf9da1

+ 4
- 4
searx/engines/stackoverflow.py View File

12
 from cgi import escape
12
 from cgi import escape
13
 from urllib import urlencode
13
 from urllib import urlencode
14
 from lxml import html
14
 from lxml import html
15
+from searx.engines.xpath import extract_text
15
 
16
 
16
 # engine dependent config
17
 # engine dependent config
17
 categories = ['it']
18
 categories = ['it']
24
 # specific xpath variables
25
 # specific xpath variables
25
 results_xpath = '//div[contains(@class,"question-summary")]'
26
 results_xpath = '//div[contains(@class,"question-summary")]'
26
 link_xpath = './/div[@class="result-link"]//a|.//div[@class="summary"]//h3//a'
27
 link_xpath = './/div[@class="result-link"]//a|.//div[@class="summary"]//h3//a'
27
-title_xpath = './/text()'
28
-content_xpath = './/div[@class="excerpt"]//text()'
28
+content_xpath = './/div[@class="excerpt"]'
29
 
29
 
30
 
30
 
31
 # do search-request
31
 # do search-request
46
     for result in dom.xpath(results_xpath):
46
     for result in dom.xpath(results_xpath):
47
         link = result.xpath(link_xpath)[0]
47
         link = result.xpath(link_xpath)[0]
48
         href = urljoin(url, link.attrib.get('href'))
48
         href = urljoin(url, link.attrib.get('href'))
49
-        title = escape(' '.join(link.xpath(title_xpath)))
50
-        content = escape(' '.join(result.xpath(content_xpath)))
49
+        title = escape(extract_text(link))
50
+        content = escape(extract_text(result.xpath(content_xpath)))
51
 
51
 
52
         # append result
52
         # append result
53
         results.append({'url': href,
53
         results.append({'url': href,

+ 106
- 0
searx/tests/engines/test_stackoverflow.py View File

1
+from collections import defaultdict
2
+import mock
3
+from searx.engines import stackoverflow
4
+from searx.testing import SearxTestCase
5
+
6
+
7
+class TestStackoverflowEngine(SearxTestCase):
8
+
9
+    def test_request(self):
10
+        query = 'test_query'
11
+        dicto = defaultdict(dict)
12
+        dicto['pageno'] = 0
13
+        params = stackoverflow.request(query, dicto)
14
+        self.assertTrue('url' in params)
15
+        self.assertTrue(query in params['url'])
16
+        self.assertTrue('stackoverflow.com' in params['url'])
17
+
18
+    def test_response(self):
19
+        self.assertRaises(AttributeError, stackoverflow.response, None)
20
+        self.assertRaises(AttributeError, stackoverflow.response, [])
21
+        self.assertRaises(AttributeError, stackoverflow.response, '')
22
+        self.assertRaises(AttributeError, stackoverflow.response, '[]')
23
+
24
+        response = mock.Mock(text='<html></html>')
25
+        self.assertEqual(stackoverflow.response(response), [])
26
+
27
+        html = """
28
+        <div class="question-summary search-result" id="answer-id-1783426">
29
+            <div class="statscontainer">
30
+                <div class="statsarrow"></div>
31
+                <div class="stats">
32
+                    <div class="vote">
33
+                        <div class="votes answered">
34
+                            <span class="vote-count-post "><strong>2583</strong></span>
35
+                            <div class="viewcount">votes</div>
36
+                        </div>
37
+                    </div>
38
+                </div>
39
+            </div>
40
+            <div class="summary">
41
+                <div class="result-link">
42
+                    <span>
43
+                        <a href="/questions/this.is.the.url"
44
+                            data-searchsession="/questions"
45
+                            title="Checkout remote Git branch">
46
+                            This is the title
47
+                        </a>
48
+                    </span>
49
+                </div>
50
+                <div class="excerpt">
51
+                    This is the content
52
+                </div>
53
+                <div class="tags user-tags t-git t-git-checkout t-remote-branch">
54
+                </div>
55
+                <div class="started fr">
56
+                    answered <span title="2009-11-23 14:26:08Z" class="relativetime">nov 23 '09</span> by
57
+                    <a href="/users/214090/hallski">hallski</a>
58
+                </div>
59
+            </div>
60
+        </div>
61
+        """
62
+        response = mock.Mock(text=html)
63
+        results = stackoverflow.response(response)
64
+        self.assertEqual(type(results), list)
65
+        self.assertEqual(len(results), 1)
66
+        self.assertEqual(results[0]['title'], 'This is the title')
67
+        self.assertEqual(results[0]['url'], 'http://stackoverflow.com/questions/this.is.the.url')
68
+        self.assertEqual(results[0]['content'], 'This is the content')
69
+
70
+        html = """
71
+        <div class="statscontainer">
72
+            <div class="statsarrow"></div>
73
+            <div class="stats">
74
+                <div class="vote">
75
+                    <div class="votes answered">
76
+                        <span class="vote-count-post "><strong>2583</strong></span>
77
+                        <div class="viewcount">votes</div>
78
+                    </div>
79
+                </div>
80
+            </div>
81
+        </div>
82
+        <div class="summary">
83
+            <div class="result-link">
84
+                <span>
85
+                    <a href="/questions/this.is.the.url"
86
+                        data-searchsession="/questions"
87
+                        title="Checkout remote Git branch">
88
+                        This is the title
89
+                    </a>
90
+                </span>
91
+            </div>
92
+            <div class="excerpt">
93
+                This is the content
94
+            </div>
95
+            <div class="tags user-tags t-git t-git-checkout t-remote-branch">
96
+            </div>
97
+            <div class="started fr">
98
+                answered <span title="2009-11-23 14:26:08Z" class="relativetime">nov 23 '09</span> by
99
+                <a href="/users/214090/hallski">hallski</a>
100
+            </div>
101
+        </div>
102
+        """
103
+        response = mock.Mock(text=html)
104
+        results = stackoverflow.response(response)
105
+        self.assertEqual(type(results), list)
106
+        self.assertEqual(len(results), 0)

+ 1
- 0
searx/tests/test_engines.py View File

16
 from searx.tests.engines.test_searchcode_code import *  # noqa
16
 from searx.tests.engines.test_searchcode_code import *  # noqa
17
 from searx.tests.engines.test_searchcode_doc import *  # noqa
17
 from searx.tests.engines.test_searchcode_doc import *  # noqa
18
 from searx.tests.engines.test_soundcloud import *  # noqa
18
 from searx.tests.engines.test_soundcloud import *  # noqa
19
+from searx.tests.engines.test_stackoverflow import *  # noqa
19
 from searx.tests.engines.test_youtube import *  # noqa
20
 from searx.tests.engines.test_youtube import *  # noqa