Browse Source

Stackoverflow's unit test

Cqoicebordel 10 years ago
parent
commit
d20ddf9da1

+ 4
- 4
searx/engines/stackoverflow.py View File

@@ -12,6 +12,7 @@ from urlparse import urljoin
12 12
 from cgi import escape
13 13
 from urllib import urlencode
14 14
 from lxml import html
15
+from searx.engines.xpath import extract_text
15 16
 
16 17
 # engine dependent config
17 18
 categories = ['it']
@@ -24,8 +25,7 @@ search_url = url+'search?{query}&page={pageno}'
24 25
 # specific xpath variables
25 26
 results_xpath = '//div[contains(@class,"question-summary")]'
26 27
 link_xpath = './/div[@class="result-link"]//a|.//div[@class="summary"]//h3//a'
27
-title_xpath = './/text()'
28
-content_xpath = './/div[@class="excerpt"]//text()'
28
+content_xpath = './/div[@class="excerpt"]'
29 29
 
30 30
 
31 31
 # do search-request
@@ -46,8 +46,8 @@ def response(resp):
46 46
     for result in dom.xpath(results_xpath):
47 47
         link = result.xpath(link_xpath)[0]
48 48
         href = urljoin(url, link.attrib.get('href'))
49
-        title = escape(' '.join(link.xpath(title_xpath)))
50
-        content = escape(' '.join(result.xpath(content_xpath)))
49
+        title = escape(extract_text(link))
50
+        content = escape(extract_text(result.xpath(content_xpath)))
51 51
 
52 52
         # append result
53 53
         results.append({'url': href,

+ 106
- 0
searx/tests/engines/test_stackoverflow.py View File

@@ -0,0 +1,106 @@
1
+from collections import defaultdict
2
+import mock
3
+from searx.engines import stackoverflow
4
+from searx.testing import SearxTestCase
5
+
6
+
7
+class TestStackoverflowEngine(SearxTestCase):
8
+
9
+    def test_request(self):
10
+        query = 'test_query'
11
+        dicto = defaultdict(dict)
12
+        dicto['pageno'] = 0
13
+        params = stackoverflow.request(query, dicto)
14
+        self.assertTrue('url' in params)
15
+        self.assertTrue(query in params['url'])
16
+        self.assertTrue('stackoverflow.com' in params['url'])
17
+
18
+    def test_response(self):
19
+        self.assertRaises(AttributeError, stackoverflow.response, None)
20
+        self.assertRaises(AttributeError, stackoverflow.response, [])
21
+        self.assertRaises(AttributeError, stackoverflow.response, '')
22
+        self.assertRaises(AttributeError, stackoverflow.response, '[]')
23
+
24
+        response = mock.Mock(text='<html></html>')
25
+        self.assertEqual(stackoverflow.response(response), [])
26
+
27
+        html = """
28
+        <div class="question-summary search-result" id="answer-id-1783426">
29
+            <div class="statscontainer">
30
+                <div class="statsarrow"></div>
31
+                <div class="stats">
32
+                    <div class="vote">
33
+                        <div class="votes answered">
34
+                            <span class="vote-count-post "><strong>2583</strong></span>
35
+                            <div class="viewcount">votes</div>
36
+                        </div>
37
+                    </div>
38
+                </div>
39
+            </div>
40
+            <div class="summary">
41
+                <div class="result-link">
42
+                    <span>
43
+                        <a href="/questions/this.is.the.url"
44
+                            data-searchsession="/questions"
45
+                            title="Checkout remote Git branch">
46
+                            This is the title
47
+                        </a>
48
+                    </span>
49
+                </div>
50
+                <div class="excerpt">
51
+                    This is the content
52
+                </div>
53
+                <div class="tags user-tags t-git t-git-checkout t-remote-branch">
54
+                </div>
55
+                <div class="started fr">
56
+                    answered <span title="2009-11-23 14:26:08Z" class="relativetime">nov 23 '09</span> by
57
+                    <a href="/users/214090/hallski">hallski</a>
58
+                </div>
59
+            </div>
60
+        </div>
61
+        """
62
+        response = mock.Mock(text=html)
63
+        results = stackoverflow.response(response)
64
+        self.assertEqual(type(results), list)
65
+        self.assertEqual(len(results), 1)
66
+        self.assertEqual(results[0]['title'], 'This is the title')
67
+        self.assertEqual(results[0]['url'], 'http://stackoverflow.com/questions/this.is.the.url')
68
+        self.assertEqual(results[0]['content'], 'This is the content')
69
+
70
+        html = """
71
+        <div class="statscontainer">
72
+            <div class="statsarrow"></div>
73
+            <div class="stats">
74
+                <div class="vote">
75
+                    <div class="votes answered">
76
+                        <span class="vote-count-post "><strong>2583</strong></span>
77
+                        <div class="viewcount">votes</div>
78
+                    </div>
79
+                </div>
80
+            </div>
81
+        </div>
82
+        <div class="summary">
83
+            <div class="result-link">
84
+                <span>
85
+                    <a href="/questions/this.is.the.url"
86
+                        data-searchsession="/questions"
87
+                        title="Checkout remote Git branch">
88
+                        This is the title
89
+                    </a>
90
+                </span>
91
+            </div>
92
+            <div class="excerpt">
93
+                This is the content
94
+            </div>
95
+            <div class="tags user-tags t-git t-git-checkout t-remote-branch">
96
+            </div>
97
+            <div class="started fr">
98
+                answered <span title="2009-11-23 14:26:08Z" class="relativetime">nov 23 '09</span> by
99
+                <a href="/users/214090/hallski">hallski</a>
100
+            </div>
101
+        </div>
102
+        """
103
+        response = mock.Mock(text=html)
104
+        results = stackoverflow.response(response)
105
+        self.assertEqual(type(results), list)
106
+        self.assertEqual(len(results), 0)

+ 1
- 0
searx/tests/test_engines.py View File

@@ -16,4 +16,5 @@ from searx.tests.engines.test_mixcloud import *  # noqa
16 16
 from searx.tests.engines.test_searchcode_code import *  # noqa
17 17
 from searx.tests.engines.test_searchcode_doc import *  # noqa
18 18
 from searx.tests.engines.test_soundcloud import *  # noqa
19
+from searx.tests.engines.test_stackoverflow import *  # noqa
19 20
 from searx.tests.engines.test_youtube import *  # noqa