Cqoicebordel 10 anni fa
parent
commit
9f13af8d3c
3 ha cambiato i file con 157 aggiunte e 2 eliminazioni
  1. 2
    2
      searx/engines/yahoo.py
  2. 154
    0
      searx/tests/engines/test_yahoo.py
  3. 1
    0
      searx/tests/test_engines.py

+ 2
- 2
searx/engines/yahoo.py Vedi File

@@ -35,7 +35,7 @@ suggestion_xpath = '//div[@id="satat"]//a'
35 35
 def parse_url(url_string):
36 36
     endings = ['/RS', '/RK']
37 37
     endpositions = []
38
-    start = url_string.find('http', url_string.find('/RU=')+1)
38
+    start = url_string.find('http', url_string.find('/RU=') + 1)
39 39
 
40 40
     for ending in endings:
41 41
         endpos = url_string.rfind(ending)
@@ -91,7 +91,7 @@ def response(resp):
91 91
                         'content': content})
92 92
 
93 93
     # if no suggestion found, return results
94
-    if not suggestion_xpath:
94
+    if not dom.xpath(suggestion_xpath):
95 95
         return results
96 96
 
97 97
     # parse suggestion

+ 154
- 0
searx/tests/engines/test_yahoo.py Vedi File

@@ -0,0 +1,154 @@
1
+# -*- coding: utf-8 -*-
2
+from collections import defaultdict
3
+import mock
4
+from searx.engines import yahoo
5
+from searx.testing import SearxTestCase
6
+
7
+
8
+class TestYahooEngine(SearxTestCase):
9
+
10
+    def test_parse_url(self):
11
+        test_url = 'http://r.search.yahoo.com/_ylt=A0LEb9JUSKcAEGRXNyoA;_ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb' +\
12
+                   '2xvA2Jm2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10/RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=' +\
13
+                   'dtcJsfP4mEeBOjnVfUQ-'
14
+        url = yahoo.parse_url(test_url)
15
+        self.assertEqual('https://this.is.the.url/', url)
16
+
17
+        test_url = 'http://r.search.yahoo.com/_ylt=A0LElb9JUSKcAEGRXNyoA;_ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb' +\
18
+                   '2xvA2Jm2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10/RU=https%3a%2f%2fthis.is.the.url%2f/RS=' +\
19
+                   'dtcJsfP4mEeBOjnVfUQ-'
20
+        url = yahoo.parse_url(test_url)
21
+        self.assertEqual('https://this.is.the.url/', url)
22
+
23
+        test_url = 'https://this.is.the.url/'
24
+        url = yahoo.parse_url(test_url)
25
+        self.assertEqual('https://this.is.the.url/', url)
26
+
27
+    def test_request(self):
28
+        query = 'test_query'
29
+        dicto = defaultdict(dict)
30
+        dicto['pageno'] = 1
31
+        dicto['language'] = 'fr_FR'
32
+        params = yahoo.request(query, dicto)
33
+        self.assertIn('url', params)
34
+        self.assertIn(query, params['url'])
35
+        self.assertIn('search.yahoo.com', params['url'])
36
+        self.assertIn('fr', params['url'])
37
+        self.assertIn('cookies', params)
38
+        self.assertIn('sB', params['cookies'])
39
+        self.assertIn('fr', params['cookies']['sB'])
40
+
41
+        dicto['language'] = 'all'
42
+        params = yahoo.request(query, dicto)
43
+        self.assertIn('cookies', params)
44
+        self.assertIn('sB', params['cookies'])
45
+        self.assertIn('en', params['cookies']['sB'])
46
+        self.assertIn('en', params['url'])
47
+
48
+    def test_response(self):
49
+        self.assertRaises(AttributeError, yahoo.response, None)
50
+        self.assertRaises(AttributeError, yahoo.response, [])
51
+        self.assertRaises(AttributeError, yahoo.response, '')
52
+        self.assertRaises(AttributeError, yahoo.response, '[]')
53
+
54
+        response = mock.Mock(text='<html></html>')
55
+        self.assertEqual(yahoo.response(response), [])
56
+
57
+        html = """
58
+        <div class="res">
59
+            <div>
60
+                <h3>
61
+                <a id="link-1" class="yschttl spt" href="http://r.search.yahoo.com/_ylt=A0LEVzClb9JUSKcAEGRXNyoA;
62
+                    _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2JmMQR2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10
63
+                    /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"target="_blank" data-bk="5063.1">
64
+                    <b>This</b> is the title
65
+                </a>
66
+                </h3>
67
+            </div>
68
+            <span class="url" dir="ltr">www.<b>test</b>.com</span>
69
+            <div class="abstr">
70
+                <b>This</b> is the content
71
+            </div>
72
+        </div>
73
+        <div id="satat"  data-bns="Yahoo" data-bk="124.1">
74
+            <h2>Also Try</h2>
75
+            <table>
76
+                <tbody>
77
+                    <tr>
78
+                        <td>
79
+                            <a id="srpnat0" class="" href="https://search.yahoo.com/search=rs-bottom" >
80
+                                <span>
81
+                                    <b></b>This is <b>the suggestion</b>
82
+                                </span>
83
+                            </a>
84
+                        </td>
85
+                    </tr>
86
+                </tbody>
87
+            </table>
88
+        </div>
89
+        """
90
+        response = mock.Mock(text=html)
91
+        results = yahoo.response(response)
92
+        self.assertEqual(type(results), list)
93
+        self.assertEqual(len(results), 2)
94
+        self.assertEqual(results[0]['title'], 'This is the title')
95
+        self.assertEqual(results[0]['url'], 'https://this.is.the.url/')
96
+        self.assertEqual(results[0]['content'], 'This is the content')
97
+        self.assertEqual(results[1]['suggestion'], 'This is the suggestion')
98
+
99
+        html = """
100
+        <div class="res">
101
+            <div>
102
+                <h3>
103
+                <a id="link-1" class="yschttl spt" href="http://r.search.yahoo.com/_ylt=A0LEVzClb9JUSKcAEGRXNyoA;
104
+                    _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2JmMQR2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10
105
+                    /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"target="_blank" data-bk="5063.1">
106
+                    <b>This</b> is the title
107
+                </a>
108
+                </h3>
109
+            </div>
110
+            <span class="url" dir="ltr">www.<b>test</b>.com</span>
111
+            <div class="abstr">
112
+                <b>This</b> is the content
113
+            </div>
114
+        </div>
115
+        <div class="res">
116
+            <div>
117
+                <h3>
118
+                <a id="link-1" class="yschttl spt">
119
+                    <b>This</b> is the title
120
+                </a>
121
+                </h3>
122
+            </div>
123
+            <span class="url" dir="ltr">www.<b>test</b>.com</span>
124
+            <div class="abstr">
125
+                <b>This</b> is the content
126
+            </div>
127
+        </div>
128
+        <div class="res">
129
+            <div>
130
+                <h3>
131
+                </h3>
132
+            </div>
133
+            <span class="url" dir="ltr">www.<b>test</b>.com</span>
134
+            <div class="abstr">
135
+                <b>This</b> is the content
136
+            </div>
137
+        </div>
138
+        """
139
+        response = mock.Mock(text=html)
140
+        results = yahoo.response(response)
141
+        self.assertEqual(type(results), list)
142
+        self.assertEqual(len(results), 1)
143
+        self.assertEqual(results[0]['title'], 'This is the title')
144
+        self.assertEqual(results[0]['url'], 'https://this.is.the.url/')
145
+        self.assertEqual(results[0]['content'], 'This is the content')
146
+
147
+        html = """
148
+        <li class="b_algo" u="0|5109|4755453613245655|UAGjXgIrPH5yh-o5oNHRx_3Zta87f_QO">
149
+        </li>
150
+        """
151
+        response = mock.Mock(text=html)
152
+        results = yahoo.response(response)
153
+        self.assertEqual(type(results), list)
154
+        self.assertEqual(len(results), 0)

+ 1
- 0
searx/tests/test_engines.py Vedi File

@@ -27,4 +27,5 @@ from searx.tests.engines.test_twitter import *  # noqa
27 27
 from searx.tests.engines.test_vimeo import *  # noqa
28 28
 from searx.tests.engines.test_www500px import *  # noqa
29 29
 from searx.tests.engines.test_youtube import *  # noqa
30
+from searx.tests.engines.test_yahoo import *  # noqa
30 31
 from searx.tests.engines.test_yahoo_news import *  # noqa