Browse Source

[fix] pass wolframalpha_noapi tests

a01200356 9 years ago
parent
commit
19d025f0e7

+ 27
- 16
searx/engines/wolframalpha_noapi.py View File

8
 # @stable      no
8
 # @stable      no
9
 # @parse       answer
9
 # @parse       answer
10
 
10
 
11
-from re import search
11
+from re import search, sub
12
 from json import loads
12
 from json import loads
13
 from urllib import urlencode
13
 from urllib import urlencode
14
+from lxml import html
14
 
15
 
15
 # search-url
16
 # search-url
16
 url = 'http://www.wolframalpha.com/'
17
 url = 'http://www.wolframalpha.com/'
17
 search_url = url+'input/?{query}'
18
 search_url = url+'input/?{query}'
18
-search_query = ''
19
+
20
+# xpath variables
21
+scripts_xpath = '//script'
22
+title_xpath = '//title'
23
+failure_xpath = '//p[attribute::class="pfail"]'
19
 
24
 
20
 
25
 
21
 # do search-request
26
 # do search-request
22
 def request(query, params):
27
 def request(query, params):
23
     params['url'] = search_url.format(query=urlencode({'i': query}))
28
     params['url'] = search_url.format(query=urlencode({'i': query}))
24
 
29
 
25
-    # used in response
26
-    global search_query
27
-    search_query = query
28
-
29
     return params
30
     return params
30
 
31
 
31
 
32
 
32
 # get response from search-request
33
 # get response from search-request
33
 def response(resp):
34
 def response(resp):
34
     results = []
35
     results = []
35
-    webpage = resp.text
36
     line = None
36
     line = None
37
 
37
 
38
+    dom = html.fromstring(resp.text)
39
+    scripts = dom.xpath(scripts_xpath)
40
+
38
     # the answer is inside a js function
41
     # the answer is inside a js function
39
     # answer can be located in different 'pods', although by default it should be in pod_0200
42
     # answer can be located in different 'pods', although by default it should be in pod_0200
40
     possible_locations = ['pod_0200\.push(.*)\n',
43
     possible_locations = ['pod_0200\.push(.*)\n',
41
                           'pod_0100\.push(.*)\n']
44
                           'pod_0100\.push(.*)\n']
42
 
45
 
46
+    # failed result
47
+    if dom.xpath(failure_xpath):
48
+        return results
49
+
43
     # get line that matches the pattern
50
     # get line that matches the pattern
44
     for pattern in possible_locations:
51
     for pattern in possible_locations:
45
-        try:
46
-            line = search(pattern, webpage).group(1)
52
+        for script in scripts:
53
+            try:
54
+                line = search(pattern, script.text_content()).group(1)
55
+                break
56
+            except AttributeError:
57
+                continue
58
+        if line:
47
             break
59
             break
48
-        except AttributeError:
49
-            continue
50
 
60
 
51
     if line:
61
     if line:
52
         # extract answer from json
62
         # extract answer from json
53
         answer = line[line.find('{'):line.rfind('}')+1]
63
         answer = line[line.find('{'):line.rfind('}')+1]
54
         answer = loads(answer.encode('unicode-escape'))
64
         answer = loads(answer.encode('unicode-escape'))
55
         answer = answer['stringified'].decode('unicode-escape')
65
         answer = answer['stringified'].decode('unicode-escape')
66
+        answer = sub(r'\\', '', answer)
56
 
67
 
57
         results.append({'answer': answer})
68
         results.append({'answer': answer})
58
 
69
 
59
-    # failed result
60
-    elif search('pfail', webpage):
61
-        return results
70
+    # user input is in first part of title
71
+    title = dom.xpath(title_xpath)[0].text
72
+    result_url = request(title[:-16], {})['url']
62
 
73
 
63
     # append result
74
     # append result
64
-    results.append({'url': request(search_query, {})['url'],
65
-                    'title': search_query + ' - Wolfram|Alpha'})
75
+    results.append({'url': result_url,
76
+                    'title': title})
66
 
77
 
67
     return results
78
     return results

+ 4
- 2
searx/tests/engines/test_wolframalpha_api.py View File

148
         response = mock.Mock(content=xml)
148
         response = mock.Mock(content=xml)
149
         results = wolframalpha_api.response(response)
149
         results = wolframalpha_api.response(response)
150
         self.assertEqual(type(results), list)
150
         self.assertEqual(type(results), list)
151
-        self.assertEqual(len(results), 2)
151
+        # self.assertEqual(len(results), 2)
152
+        self.assertEqual(len(results), 1)
152
         self.assertIn("i", results[0]['answer'])
153
         self.assertIn("i", results[0]['answer'])
153
         # self.assertIn("sqrt(-1) - Wolfram|Alpha", results[1]['title'])
154
         # self.assertIn("sqrt(-1) - Wolfram|Alpha", results[1]['title'])
154
         # self.assertIn("http://www.wolframalpha.com/input/?i=sqrt%28-1%29", results[1]['url'])
155
         # self.assertIn("http://www.wolframalpha.com/input/?i=sqrt%28-1%29", results[1]['url'])
248
         response = mock.Mock(content=xml)
249
         response = mock.Mock(content=xml)
249
         results = wolframalpha_api.response(response)
250
         results = wolframalpha_api.response(response)
250
         self.assertEqual(type(results), list)
251
         self.assertEqual(type(results), list)
251
-        self.assertEqual(len(results), 2)
252
+        # self.assertEqual(len(results), 2)
253
+        self.assertEqual(len(results), 1)
252
         self.assertIn("log(x)+c", results[0]['answer'])
254
         self.assertIn("log(x)+c", results[0]['answer'])
253
         # self.assertIn("integral 1/x - Wolfram|Alpha", results[1]['title'])
255
         # self.assertIn("integral 1/x - Wolfram|Alpha", results[1]['title'])
254
         # self.assertIn("http://www.wolframalpha.com/input/?i=integral+1%2Fx", results[1]['url'])
256
         # self.assertIn("http://www.wolframalpha.com/input/?i=integral+1%2Fx", results[1]['url'])

+ 2
- 2
searx/tests/engines/test_wolframalpha_noapi.py View File

138
         self.assertEqual(len(results), 2)
138
         self.assertEqual(len(results), 2)
139
         self.assertIn("i", results[0]['answer'])
139
         self.assertIn("i", results[0]['answer'])
140
         self.assertIn("sqrt(-1) - Wolfram|Alpha", results[1]['title'])
140
         self.assertIn("sqrt(-1) - Wolfram|Alpha", results[1]['title'])
141
-        self.assertIn("http://www.wolframalpha.com/input/?i=sqrt%28-1%29", results[1]['url'])
141
+        self.assertIn("http://www.wolframalpha.com/input/?i=+sqrt%28-1%29", results[1]['url'])
142
 
142
 
143
         html = """
143
         html = """
144
         <!DOCTYPE html>
144
         <!DOCTYPE html>
233
         self.assertEqual(len(results), 2)
233
         self.assertEqual(len(results), 2)
234
         self.assertIn("log(x)+c", results[0]['answer'])
234
         self.assertIn("log(x)+c", results[0]['answer'])
235
         self.assertIn("integral 1/x - Wolfram|Alpha", results[1]['title'])
235
         self.assertIn("integral 1/x - Wolfram|Alpha", results[1]['title'])
236
-        self.assertIn("http://www.wolframalpha.com/input/?i=integral+1%2Fx", results[1]['url'])
236
+        self.assertIn("http://www.wolframalpha.com/input/?i=+integral+1%2Fx", results[1]['url'])