Преглед изворни кода

Merge pull request #249 from dalf/master

[fix] update yahoo engine according to the web site changes
Adam Tauber пре 10 година
родитељ
комит
7f7f10bb6f
2 измењених фајлова са 65 додато и 67 уклоњено
  1. 6
    5
      searx/engines/yahoo.py
  2. 59
    62
      searx/tests/engines/test_yahoo.py

+ 6
- 5
searx/engines/yahoo.py Прегледај датотеку

@@ -24,11 +24,11 @@ base_url = 'https://search.yahoo.com/'
24 24
 search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
25 25
 
26 26
 # specific xpath variables
27
-results_xpath = '//div[@class="res"]'
27
+results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]"
28 28
 url_xpath = './/h3/a/@href'
29 29
 title_xpath = './/h3/a'
30
-content_xpath = './/div[@class="abstr"]'
31
-suggestion_xpath = '//div[@id="satat"]//a'
30
+content_xpath = './/div[@class="compText aAbs"]'
31
+suggestion_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' AlsoTry ')]//a"
32 32
 
33 33
 
34 34
 # remove yahoo-specific tracking-url
@@ -91,11 +91,12 @@ def response(resp):
91 91
                         'content': content})
92 92
 
93 93
     # if no suggestion found, return results
94
-    if not dom.xpath(suggestion_xpath):
94
+    suggestions = dom.xpath(suggestion_xpath)
95
+    if not suggestions:
95 96
         return results
96 97
 
97 98
     # parse suggestion
98
-    for suggestion in dom.xpath(suggestion_xpath):
99
+    for suggestion in suggestions:
99 100
         # append suggestion
100 101
         results.append({'suggestion': extract_text(suggestion)})
101 102
 

+ 59
- 62
searx/tests/engines/test_yahoo.py Прегледај датотеку

@@ -55,86 +55,83 @@ class TestYahooEngine(SearxTestCase):
55 55
         self.assertEqual(yahoo.response(response), [])
56 56
 
57 57
         html = """
58
-        <div class="res">
59
-            <div>
60
-                <h3>
61
-                <a id="link-1" class="yschttl spt" href="http://r.search.yahoo.com/_ylt=A0LEVzClb9JUSKcAEGRXNyoA;
62
-                    _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2JmMQR2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10
63
-                    /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"target="_blank" data-bk="5063.1">
64
-                    <b>This</b> is the title
65
-                </a>
58
+<ol class="reg mb-15 searchCenterMiddle">
59
+    <li class="first">
60
+        <div class="dd algo fst Sr">
61
+            <div class="compTitle">
62
+                <h3 class="title"><a class=" td-u" href="http://r.search.yahoo.com/_ylt=A0LEb9JUSKcAEGRXNyoA;
63
+                     _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2Jm2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10
64
+                     /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"
65
+                     target="_blank" data-bid="54e712e13671c">
66
+                     <b><b>This is the title</b></b></a>
66 67
                 </h3>
67 68
             </div>
68
-            <span class="url" dir="ltr">www.<b>test</b>.com</span>
69
-            <div class="abstr">
70
-                <b>This</b> is the content
69
+            <div class="compText aAbs">
70
+                <p class="lh-18"><b><b>This is the </b>content</b>
71
+                </p>
71 72
             </div>
72 73
         </div>
73
-        <div id="satat"  data-bns="Yahoo" data-bk="124.1">
74
-            <h2>Also Try</h2>
75
-            <table>
76
-                <tbody>
77
-                    <tr>
78
-                        <td>
79
-                            <a id="srpnat0" class="" href="https://search.yahoo.com/search=rs-bottom" >
80
-                                <span>
81
-                                    <b></b>This is <b>the suggestion</b>
82
-                                </span>
83
-                            </a>
84
-                        </td>
85
-                    </tr>
86
-                </tbody>
87
-            </table>
74
+    </li>
75
+    <li>
76
+        <div class="dd algo lst Sr">
77
+            <div class="compTitle">
78
+                <h3 class="title"><a class=" td-u" href="http://r.search.yahoo.com/_ylt=AwrBT7zgEudUW.wAe2ZXNyoA;
79
+                     _ylu=X3oDMTBybGY3bmpvBGNvbG8DYmYxBHBvcwMyBHZ0aWQDBHNlYwNzcg--/RV=2\/RE=1424458593/RO=10
80
+                     /RU=https%3a%2f%2fthis.is.the.second.url%2f/RK=0/RS=jIctjj_cBH1Efj88GCgHKp3__Qk-"
81
+                     target="_blank" data-bid="54e712e136926">
82
+                     This is the second <b><b>title</b></b></a>
83
+                </h3>
84
+            </div>
85
+            <div class="compText aAbs">
86
+                <p class="lh-18">This is the second content</p>
87
+            </div>
88 88
         </div>
89
+    </li>
90
+</ol>
91
+<div class="dd assist fst lst AlsoTry" data-bid="54e712e138d04">
92
+    <div class="compTitle mb-4 h-17">
93
+        <h3 class="title">Also Try</h3> </div>
94
+    <table class="compTable m-0 ac-1st td-u fz-ms">
95
+        <tbody>
96
+            <tr>
97
+                <td class="w-50p pr-28"><a href="https://search.yahoo.com/"><B>This is the </B>suggestion<B></B></a>
98
+                </td>
99
+            </tr>
100
+    </table>
101
+</div>
89 102
         """
90 103
         response = mock.Mock(text=html)
91 104
         results = yahoo.response(response)
105
+        print results
92 106
         self.assertEqual(type(results), list)
93
-        self.assertEqual(len(results), 2)
107
+        self.assertEqual(len(results), 3)
94 108
         self.assertEqual(results[0]['title'], 'This is the title')
95 109
         self.assertEqual(results[0]['url'], 'https://this.is.the.url/')
96 110
         self.assertEqual(results[0]['content'], 'This is the content')
97
-        self.assertEqual(results[1]['suggestion'], 'This is the suggestion')
111
+        self.assertEqual(results[1]['title'], 'This is the second title')
112
+        self.assertEqual(results[1]['url'], 'https://this.is.the.second.url/')
113
+        self.assertEqual(results[1]['content'], 'This is the second content')
114
+        self.assertEqual(results[2]['suggestion'], 'This is the suggestion')
98 115
 
99 116
         html = """
100
-        <div class="res">
101
-            <div>
102
-                <h3>
103
-                <a id="link-1" class="yschttl spt" href="http://r.search.yahoo.com/_ylt=A0LEVzClb9JUSKcAEGRXNyoA;
104
-                    _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2JmMQR2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10
105
-                    /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"target="_blank" data-bk="5063.1">
106
-                    <b>This</b> is the title
107
-                </a>
108
-                </h3>
109
-            </div>
110
-            <span class="url" dir="ltr">www.<b>test</b>.com</span>
111
-            <div class="abstr">
112
-                <b>This</b> is the content
113
-            </div>
114
-        </div>
115
-        <div class="res">
116
-            <div>
117
-                <h3>
118
-                <a id="link-1" class="yschttl spt">
119
-                    <b>This</b> is the title
120
-                </a>
121
-                </h3>
122
-            </div>
123
-            <span class="url" dir="ltr">www.<b>test</b>.com</span>
124
-            <div class="abstr">
125
-                <b>This</b> is the content
126
-            </div>
127
-        </div>
128
-        <div class="res">
129
-            <div>
130
-                <h3>
117
+<ol class="reg mb-15 searchCenterMiddle">
118
+    <li class="first">
119
+        <div class="dd algo fst Sr">
120
+            <div class="compTitle">
121
+                <h3 class="title"><a class=" td-u" href="http://r.search.yahoo.com/_ylt=A0LEb9JUSKcAEGRXNyoA;
122
+                     _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2Jm2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10
123
+                     /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"
124
+                     target="_blank" data-bid="54e712e13671c">
125
+                  <b><b>This is the title</b></b></a>
131 126
                 </h3>
132 127
             </div>
133
-            <span class="url" dir="ltr">www.<b>test</b>.com</span>
134
-            <div class="abstr">
135
-                <b>This</b> is the content
128
+            <div class="compText aAbs">
129
+                <p class="lh-18"><b><b>This is the </b>content</b>
130
+                </p>
136 131
             </div>
137 132
         </div>
133
+    </li>
134
+</ol>
138 135
         """
139 136
         response = mock.Mock(text=html)
140 137
         results = yahoo.response(response)