Browse Source

Startpage's unit test

Cqoicebordel 10 years ago
parent
commit
f1c10f4fe4
3 changed files with 146 additions and 8 deletions
  1. 5
    8
      searx/engines/startpage.py
  2. 140
    0
      searx/tests/engines/test_startpage.py
  3. 1
    0
      searx/tests/test_engines.py

+ 5
- 8
searx/engines/startpage.py View File

@@ -13,6 +13,7 @@
13 13
 from lxml import html
14 14
 from cgi import escape
15 15
 import re
16
+from searx.engines.xpath import extract_text
16 17
 
17 18
 # engine dependent config
18 19
 categories = ['general']
@@ -45,8 +46,7 @@ def request(query, params):
45 46
 
46 47
     # set language if specified
47 48
     if params['language'] != 'all':
48
-        params['data']['with_language'] = ('lang_' +
49
-                                           params['language'].split('_')[0])
49
+        params['data']['with_language'] = ('lang_' + params['language'].split('_')[0])
50 50
 
51 51
     return params
52 52
 
@@ -64,18 +64,15 @@ def response(resp):
64 64
             continue
65 65
         link = links[0]
66 66
         url = link.attrib.get('href')
67
-        try:
68
-            title = escape(link.text_content())
69
-        except UnicodeDecodeError:
70
-            continue
71 67
 
72 68
         # block google-ad url's
73 69
         if re.match("^http(s|)://www.google.[a-z]+/aclk.*$", url):
74 70
             continue
75 71
 
72
+        title = escape(extract_text(link))
73
+
76 74
         if result.xpath('./p[@class="desc"]'):
77
-            content = escape(result.xpath('./p[@class="desc"]')[0]
78
-                             .text_content())
75
+            content = escape(extract_text(result.xpath('./p[@class="desc"]')))
79 76
         else:
80 77
             content = ''
81 78
 

+ 140
- 0
searx/tests/engines/test_startpage.py View File

@@ -0,0 +1,140 @@
1
+# -*- coding: utf-8 -*-
2
+from collections import defaultdict
3
+import mock
4
+from searx.engines import startpage
5
+from searx.testing import SearxTestCase
6
+
7
+
8
+class TestStartpageEngine(SearxTestCase):
9
+
10
+    def test_request(self):
11
+        query = 'test_query'
12
+        dicto = defaultdict(dict)
13
+        dicto['pageno'] = 1
14
+        dicto['language'] = 'fr_FR'
15
+        params = startpage.request(query, dicto)
16
+        self.assertIn('url', params)
17
+        self.assertIn('startpage.com', params['url'])
18
+        self.assertIn('data', params)
19
+        self.assertIn('query', params['data'])
20
+        self.assertIn(query, params['data']['query'])
21
+        self.assertIn('with_language', params['data'])
22
+        self.assertIn('lang_fr', params['data']['with_language'])
23
+
24
+        dicto['language'] = 'all'
25
+        params = startpage.request(query, dicto)
26
+        self.assertNotIn('with_language', params['data'])
27
+
28
+    def test_response(self):
29
+        self.assertRaises(AttributeError, startpage.response, None)
30
+        self.assertRaises(AttributeError, startpage.response, [])
31
+        self.assertRaises(AttributeError, startpage.response, '')
32
+        self.assertRaises(AttributeError, startpage.response, '[]')
33
+
34
+        response = mock.Mock(content='<html></html>')
35
+        self.assertEqual(startpage.response(response), [])
36
+
37
+        html = """
38
+        <div class='result' style=' *width : auto; *margin-right : 10%;'>
39
+            <h3>
40
+                <a href='http://this.should.be.the.link/' id='title_2' name='title_2' >
41
+                    This should be the title
42
+                </a>
43
+                <span id='title_stars_2' name='title_stars_2'>  </span>
44
+            </h3>
45
+            <p class='desc'>
46
+                This should be the content.
47
+            </p>
48
+            <p>
49
+                <span class='url'>www.speed<b>test</b>.net/fr/
50
+                </span>
51
+                  -
52
+                <A class="proxy" id="proxy_link" HREF="https://ixquick-proxy.com/do/spg/proxy?ep=&edata=&ek=&ekdata="
53
+                    class='proxy'>
54
+                    Navigation avec Ixquick Proxy
55
+                </A>
56
+                    -
57
+                <A HREF="https://ixquick-proxy.com/do/spg/highlight.pl?l=francais&c=hf&cat=web&q=test&rl=NONE&rid=
58
+                    &hlq=https://startpage.com/do/search&mtabp=-1&mtcmd=process_search&mtlanguage=francais&mtengine0=
59
+                    &mtcat=web&u=http:%2F%2Fwww.speedtest.net%2Ffr%2F" class='proxy'>
60
+                    Mis en surbrillance
61
+                </A>
62
+            </p>
63
+        </div>
64
+        """
65
+        response = mock.Mock(content=html)
66
+        results = startpage.response(response)
67
+        self.assertEqual(type(results), list)
68
+        self.assertEqual(len(results), 1)
69
+        self.assertEqual(results[0]['title'], 'This should be the title')
70
+        self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
71
+        self.assertEqual(results[0]['content'], 'This should be the content.')
72
+
73
+        html = """
74
+        <div class='result' style=' *width : auto; *margin-right : 10%;'>
75
+            <h3>
76
+                <a href='http://www.google.com/aclk?sa=l&ai=C' id='title_2' name='title_2' >
77
+                    This should be the title
78
+                </a>
79
+                <span id='title_stars_2' name='title_stars_2'>  </span>
80
+            </h3>
81
+            <p class='desc'>
82
+                This should be the content.
83
+            </p>
84
+            <p>
85
+                <span class='url'>www.speed<b>test</b>.net/fr/
86
+                </span>
87
+                  -
88
+                <A class="proxy" id="proxy_link" HREF="https://ixquick-proxy.com/do/spg/proxy?ep=&edata=&ek=&ekdata="
89
+                    class='proxy'>
90
+                    Navigation avec Ixquick Proxy
91
+                </A>
92
+                    -
93
+                <A HREF="https://ixquick-proxy.com/do/spg/highlight.pl?l=francais&c=hf&cat=web&q=test&rl=NONE&rid=
94
+                    &hlq=https://startpage.com/do/search&mtabp=-1&mtcmd=process_search&mtlanguage=francais&mtengine0=
95
+                    &mtcat=web&u=http:%2F%2Fwww.speedtest.net%2Ffr%2F" class='proxy'>
96
+                    Mis en surbrillance
97
+                </A>
98
+            </p>
99
+        </div>
100
+        <div class='result' style=' *width : auto; *margin-right : 10%;'>
101
+            <h3>
102
+                <span id='title_stars_2' name='title_stars_2'>  </span>
103
+            </h3>
104
+            <p class='desc'>
105
+                This should be the content.
106
+            </p>
107
+            <p>
108
+                <span class='url'>www.speed<b>test</b>.net/fr/
109
+                </span>
110
+            </p>
111
+        </div>
112
+        <div class='result' style=' *width : auto; *margin-right : 10%;'>
113
+            <h3>
114
+                <a href='http://this.should.be.the.link/' id='title_2' name='title_2' >
115
+                    This should be the title
116
+                </a>
117
+                <span id='title_stars_2' name='title_stars_2'>  </span>
118
+            </h3>
119
+            <p>
120
+                <span class='url'>www.speed<b>test</b>.net/fr/
121
+                </span>
122
+                  -
123
+                <A class="proxy" id="proxy_link" HREF="https://ixquick-proxy.com/do/spg/proxy?ep=&edata=&ek=&ekdata="
124
+                    class='proxy'>
125
+                    Navigation avec Ixquick Proxy
126
+                </A>
127
+                    -
128
+                <A HREF="https://ixquick-proxy.com/do/spg/highlight.pl?l=francais&c=hf&cat=web&q=test&rl=NONE&rid=
129
+                    &hlq=https://startpage.com/do/search&mtabp=-1&mtcmd=process_search&mtlanguage=francais&mtengine0=
130
+                    &mtcat=web&u=http:%2F%2Fwww.speedtest.net%2Ffr%2F" class='proxy'>
131
+                    Mis en surbrillance
132
+                </A>
133
+            </p>
134
+        </div>
135
+        """
136
+        response = mock.Mock(content=html)
137
+        results = startpage.response(response)
138
+        self.assertEqual(type(results), list)
139
+        self.assertEqual(len(results), 1)
140
+        self.assertEqual(results[0]['content'], '')

+ 1
- 0
searx/tests/test_engines.py View File

@@ -23,6 +23,7 @@ from searx.tests.engines.test_searchcode_code import *  # noqa
23 23
 from searx.tests.engines.test_searchcode_doc import *  # noqa
24 24
 from searx.tests.engines.test_soundcloud import *  # noqa
25 25
 from searx.tests.engines.test_stackoverflow import *  # noqa
26
+from searx.tests.engines.test_startpage import *  # noqa
26 27
 from searx.tests.engines.test_subtitleseeker import *  # noqa
27 28
 from searx.tests.engines.test_twitter import *  # noqa
28 29
 from searx.tests.engines.test_vimeo import *  # noqa