瀏覽代碼

[fix] duckduckgo's xpaths changed

test_duckduckgo modified to reflect changes in duckduckgo's html
a01200356 9 年之前
父節點
當前提交
751c9a346e
共有 2 個檔案被更改,包括 26 行新增46 行删除
  1. 4
    4
      searx/engines/duckduckgo.py
  2. 22
    42
      tests/unit/engines/test_duckduckgo.py

+ 4
- 4
searx/engines/duckduckgo.py 查看文件

@@ -28,10 +28,10 @@ language_support = True
28 28
 url = 'https://duckduckgo.com/html?{query}&s={offset}'
29 29
 
30 30
 # specific xpath variables
31
-result_xpath = '//div[@class="results_links results_links_deep web-result"]'  # noqa
32
-url_xpath = './/a[@class="large"]/@href'
33
-title_xpath = './/a[@class="large"]'
34
-content_xpath = './/div[@class="snippet"]'
31
+result_xpath = '//div[@class="result results_links results_links_deep web-result "]'  # noqa
32
+url_xpath = './/a[@class="result__a"]/@href'
33
+title_xpath = './/a[@class="result__a"]'
34
+content_xpath = './/a[@class="result__snippet"]'
35 35
 
36 36
 
37 37
 # do search-request

+ 22
- 42
tests/unit/engines/test_duckduckgo.py 查看文件

@@ -32,55 +32,32 @@ class TestDuckduckgoEngine(SearxTestCase):
32 32
         self.assertEqual(duckduckgo.response(response), [])
33 33
 
34 34
         html = u"""
35
-        <div class="results_links results_links_deep web-result">
36
-            <div class="icon_fav" style="display: block;">
37
-                <a rel="nofollow" href="https://www.test.com/">
38
-                    <img width="16" height="16" alt=""
39
-                    src="/i/www.test.com.ico" style="visibility: visible;" name="i15" />
40
-                </a>
41
-            </div>
42
-            <div class="links_main links_deep"> <!-- This is the visible part -->
43
-                <a rel="nofollow" class="large" href="http://this.should.be.the.link/ű">
44
-                    This <b>is</b> <b>the</b> title
45
-                </a>
46
-                <div class="snippet"><b>This</b> should be the content.</div>
47
-                <div class="url">
48
-                    http://this.should.be.the.link/
35
+        <div class="result results_links results_links_deep web-result result--no-result">
36
+            <div class="links_main links_deep result__body">
37
+                <h2 class="result__title">
38
+                </h2>
39
+                <div class="no-results">No results</div>
40
+                <div class="result__extras">
49 41
                 </div>
50 42
             </div>
51 43
         </div>
52 44
         """
53 45
         response = mock.Mock(text=html)
54 46
         results = duckduckgo.response(response)
55
-        self.assertEqual(type(results), list)
56
-        self.assertEqual(len(results), 1)
57
-        self.assertEqual(results[0]['title'], 'This is the title')
58
-        self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű')
59
-        self.assertEqual(results[0]['content'], 'This should be the content.')
47
+        self.assertEqual(duckduckgo.response(response), [])
60 48
 
61
-        html = """
62
-        <div class="results_links results_links_deep web-result">
63
-            <div class="icon_fav" style="display: block;">
64
-            </div>
65
-            <div class="links_main links_deep"> <!-- This is the visible part -->
66
-                <div class="snippet"><b>This</b> should be the content.</div>
67
-                <div class="url">
68
-                    http://this.should.be.the.link/
69
-                </div>
70
-            </div>
71
-        </div>
72
-        <div class="results_links results_links_deep web-result">
73
-            <div class="icon_fav" style="display: block;">
74
-                <img width="16" height="16" alt=""
75
-                src="/i/www.test.com.ico" style="visibility: visible;" name="i15" />
76
-            </div>
77
-            <div class="links_main links_deep"> <!-- This is the visible part -->
78
-                <a rel="nofollow" class="large" href="">
79
-                    This <b>is</b> <b>the</b> title
49
+        html = u"""
50
+        <div class="result results_links results_links_deep web-result ">
51
+            <div class="links_main links_deep result__body">
52
+                <h2 class="result__title">
53
+                    <a rel="nofollow" class="result__a" href="http://this.should.be.the.link/ű">
54
+                        This <b>is</b> <b>the</b> title
55
+                    </a>
56
+                </h2>
57
+                <a class="result__snippet" href="http://this.should.be.the.link/ű">
58
+                    <b>This</b> should be the content.
80 59
                 </a>
81
-                <div class="snippet"><b>This</b> should be the content.</div>
82
-                <div class="url">
83
-                    http://this.should.be.the.link/
60
+                <div class="result__extras">
84 61
                 </div>
85 62
             </div>
86 63
         </div>
@@ -88,4 +65,7 @@ class TestDuckduckgoEngine(SearxTestCase):
88 65
         response = mock.Mock(text=html)
89 66
         results = duckduckgo.response(response)
90 67
         self.assertEqual(type(results), list)
91
-        self.assertEqual(len(results), 0)
68
+        self.assertEqual(len(results), 1)
69
+        self.assertEqual(results[0]['title'], 'This is the title')
70
+        self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű')
71
+        self.assertEqual(results[0]['content'], 'This should be the content.')