Browse Source

Duckduckgo unit test

Cqoicebordel 10 years ago
parent
commit
efe6dead55
3 changed files with 96 additions and 5 deletions
  1. 5
    5
      searx/engines/duckduckgo.py
  2. 90
    0
      searx/tests/engines/test_duckduckgo.py
  3. 1
    0
      searx/tests/test_engines.py

+ 5
- 5
searx/engines/duckduckgo.py View File

@@ -15,7 +15,7 @@
15 15
 
16 16
 from urllib import urlencode
17 17
 from lxml.html import fromstring
18
-from searx.utils import html_to_text
18
+from searx.engines.xpath import extract_text
19 19
 
20 20
 # engine dependent config
21 21
 categories = ['general']
@@ -28,8 +28,8 @@ url = 'https://duckduckgo.com/html?{query}&s={offset}'
28 28
 # specific xpath variables
29 29
 result_xpath = '//div[@class="results_links results_links_deep web-result"]'  # noqa
30 30
 url_xpath = './/a[@class="large"]/@href'
31
-title_xpath = './/a[@class="large"]//text()'
32
-content_xpath = './/div[@class="snippet"]//text()'
31
+title_xpath = './/a[@class="large"]'
32
+content_xpath = './/div[@class="snippet"]'
33 33
 
34 34
 
35 35
 # do search-request
@@ -64,8 +64,8 @@ def response(resp):
64 64
         if not res_url:
65 65
             continue
66 66
 
67
-        title = html_to_text(''.join(r.xpath(title_xpath)))
68
-        content = html_to_text(''.join(r.xpath(content_xpath)))
67
+        title = extract_text(r.xpath(title_xpath))
68
+        content = extract_text(r.xpath(content_xpath))
69 69
 
70 70
         # append result
71 71
         results.append({'title': title,

+ 90
- 0
searx/tests/engines/test_duckduckgo.py View File

@@ -0,0 +1,90 @@
1
+from collections import defaultdict
2
+import mock
3
+from searx.engines import duckduckgo
4
+from searx.testing import SearxTestCase
5
+
6
+
7
+class TestBingEngine(SearxTestCase):
8
+
9
+    def test_request(self):
10
+        query = 'test_query'
11
+        dicto = defaultdict(dict)
12
+        dicto['pageno'] = 1
13
+        dicto['language'] = 'fr_FR'
14
+        params = duckduckgo.request(query, dicto)
15
+        self.assertIn('url', params)
16
+        self.assertIn(query, params['url'])
17
+        self.assertIn('duckduckgo.com', params['url'])
18
+        self.assertIn('fr-fr', params['url'])
19
+
20
+        dicto['language'] = 'all'
21
+        params = duckduckgo.request(query, dicto)
22
+        self.assertIn('en-us', params['url'])
23
+
24
+    def test_response(self):
25
+        self.assertRaises(AttributeError, duckduckgo.response, None)
26
+        self.assertRaises(AttributeError, duckduckgo.response, [])
27
+        self.assertRaises(AttributeError, duckduckgo.response, '')
28
+        self.assertRaises(AttributeError, duckduckgo.response, '[]')
29
+
30
+        response = mock.Mock(text='<html></html>')
31
+        self.assertEqual(duckduckgo.response(response), [])
32
+
33
+        html = """
34
+        <div class="results_links results_links_deep web-result">
35
+            <div class="icon_fav" style="display: block;">
36
+                <a rel="nofollow" href="https://www.test.com/">
37
+                    <img width="16" height="16" alt=""
38
+                    src="/i/www.test.com.ico" style="visibility: visible;" name="i15" />
39
+                </a>
40
+            </div>
41
+            <div class="links_main links_deep"> <!-- This is the visible part -->
42
+                <a rel="nofollow" class="large" href="http://this.should.be.the.link/">
43
+                    This <b>is</b> <b>the</b> title
44
+                </a>
45
+                <div class="snippet"><b>This</b> should be the content.</div>
46
+                <div class="url">
47
+                    http://this.should.be.the.link/
48
+                </div>
49
+            </div>
50
+        </div>
51
+        """
52
+        response = mock.Mock(text=html)
53
+        results = duckduckgo.response(response)
54
+        self.assertEqual(type(results), list)
55
+        self.assertEqual(len(results), 1)
56
+        self.assertEqual(results[0]['title'], 'This is the title')
57
+        self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
58
+        self.assertEqual(results[0]['content'], 'This should be the content.')
59
+
60
+        html = """
61
+        <div class="results_links results_links_deep web-result">
62
+            <div class="icon_fav" style="display: block;">
63
+            </div>
64
+            <div class="links_main links_deep"> <!-- This is the visible part -->
65
+                <div class="snippet"><b>This</b> should be the content.</div>
66
+                <div class="url">
67
+                    http://this.should.be.the.link/
68
+                </div>
69
+            </div>
70
+        </div>
71
+        <div class="results_links results_links_deep web-result">
72
+            <div class="icon_fav" style="display: block;">
73
+                <img width="16" height="16" alt=""
74
+                src="/i/www.test.com.ico" style="visibility: visible;" name="i15" />
75
+            </div>
76
+            <div class="links_main links_deep"> <!-- This is the visible part -->
77
+                <a rel="nofollow" class="large" href="">
78
+                    This <b>is</b> <b>the</b> title
79
+                </a>
80
+                <div class="snippet"><b>This</b> should be the content.</div>
81
+                <div class="url">
82
+                    http://this.should.be.the.link/
83
+                </div>
84
+            </div>
85
+        </div>
86
+        """
87
+        response = mock.Mock(text=html)
88
+        results = duckduckgo.response(response)
89
+        self.assertEqual(type(results), list)
90
+        self.assertEqual(len(results), 0)

+ 1
- 0
searx/tests/test_engines.py View File

@@ -6,6 +6,7 @@ from searx.tests.engines.test_dailymotion import *  # noqa
6 6
 from searx.tests.engines.test_deezer import *  # noqa
7 7
 from searx.tests.engines.test_deviantart import *  # noqa
8 8
 from searx.tests.engines.test_digg import *  # noqa
9
+from searx.tests.engines.test_duckduckgo import *  # noqa
9 10
 from searx.tests.engines.test_dummy import *  # noqa
10 11
 from searx.tests.engines.test_flickr import *  # noqa
11 12
 from searx.tests.engines.test_flickr_noapi import *  # noqa