Browse Source

Merge pull request #609 from LuccoJ/betterwolfram

Improving Wolfram Alpha search hit content
Adam Tauber 8 years ago
parent
commit
8f48c518aa

+ 12
- 4
searx/engines/wolframalpha_api.py View File

18
 
18
 
19
 # xpath variables
19
 # xpath variables
20
 failure_xpath = '/queryresult[attribute::success="false"]'
20
 failure_xpath = '/queryresult[attribute::success="false"]'
21
-answer_xpath = '//pod[attribute::primary="true"]/subpod/plaintext'
22
 input_xpath = '//pod[starts-with(attribute::id, "Input")]/subpod/plaintext'
21
 input_xpath = '//pod[starts-with(attribute::id, "Input")]/subpod/plaintext'
23
 pods_xpath = '//pod'
22
 pods_xpath = '//pod'
24
 subpods_xpath = './subpod'
23
 subpods_xpath = './subpod'
24
+pod_primary_xpath = './@primary'
25
 pod_id_xpath = './@id'
25
 pod_id_xpath = './@id'
26
 pod_title_xpath = './@title'
26
 pod_title_xpath = './@title'
27
 plaintext_xpath = './plaintext'
27
 plaintext_xpath = './plaintext'
75
     try:
75
     try:
76
         infobox_title = search_results.xpath(input_xpath)[0].text
76
         infobox_title = search_results.xpath(input_xpath)[0].text
77
     except:
77
     except:
78
-        infobox_title = None
78
+        infobox_title = ""
79
 
79
 
80
     pods = search_results.xpath(pods_xpath)
80
     pods = search_results.xpath(pods_xpath)
81
     result_chunks = []
81
     result_chunks = []
82
+    result_content = ""
82
     for pod in pods:
83
     for pod in pods:
83
         pod_id = pod.xpath(pod_id_xpath)[0]
84
         pod_id = pod.xpath(pod_id_xpath)[0]
84
         pod_title = pod.xpath(pod_title_xpath)[0]
85
         pod_title = pod.xpath(pod_title_xpath)[0]
86
+        pod_is_result = pod.xpath(pod_primary_xpath)
85
 
87
 
86
         subpods = pod.xpath(subpods_xpath)
88
         subpods = pod.xpath(subpods_xpath)
87
         if not subpods:
89
         if not subpods:
94
 
96
 
95
             if content and pod_id not in image_pods:
97
             if content and pod_id not in image_pods:
96
 
98
 
99
+                if pod_is_result or not result_content:
100
+                    if pod_id != "Input":
101
+                        result_content = "%s: %s" % (pod_title, content)
102
+
97
                 # if no input pod was found, title is first plaintext pod
103
                 # if no input pod was found, title is first plaintext pod
98
                 if not infobox_title:
104
                 if not infobox_title:
99
                     infobox_title = content
105
                     infobox_title = content
109
     if not result_chunks:
115
     if not result_chunks:
110
         return []
116
         return []
111
 
117
 
118
+    title = "Wolfram|Alpha (%s)" % infobox_title
119
+
112
     # append infobox
120
     # append infobox
113
     results.append({'infobox': infobox_title,
121
     results.append({'infobox': infobox_title,
114
                     'attributes': result_chunks,
122
                     'attributes': result_chunks,
116
 
124
 
117
     # append link to site
125
     # append link to site
118
     results.append({'url': resp.request.headers['Referer'].decode('utf8'),
126
     results.append({'url': resp.request.headers['Referer'].decode('utf8'),
119
-                    'title': 'Wolfram|Alpha',
120
-                    'content': infobox_title})
127
+                    'title': title,
128
+                    'content': result_content})
121
 
129
 
122
     return results
130
     return results

+ 12
- 4
searx/engines/wolframalpha_noapi.py View File

8
 # @stable      no
8
 # @stable      no
9
 # @parse       url, infobox
9
 # @parse       url, infobox
10
 
10
 
11
+from cgi import escape
11
 from json import loads
12
 from json import loads
12
 from time import time
13
 from time import time
13
 from urllib import urlencode
14
 from urllib import urlencode
15
+from lxml.etree import XML
14
 
16
 
15
 from searx.poolrequests import get as http_get
17
 from searx.poolrequests import get as http_get
16
 
18
 
34
 referer_url = url + 'input/?{query}'
36
 referer_url = url + 'input/?{query}'
35
 
37
 
36
 token = {'value': '',
38
 token = {'value': '',
37
-         'last_updated': 0}
39
+         'last_updated': None}
38
 
40
 
39
 # pods to display as image in infobox
41
 # pods to display as image in infobox
40
 # this pods do return a plaintext, but they look better and are more useful as images
42
 # this pods do return a plaintext, but they look better and are more useful as images
80
 
82
 
81
     # TODO handle resp_json['queryresult']['assumptions']
83
     # TODO handle resp_json['queryresult']['assumptions']
82
     result_chunks = []
84
     result_chunks = []
83
-    infobox_title = None
85
+    infobox_title = ""
86
+    result_content = ""
84
     for pod in resp_json['queryresult']['pods']:
87
     for pod in resp_json['queryresult']['pods']:
85
         pod_id = pod.get('id', '')
88
         pod_id = pod.get('id', '')
86
         pod_title = pod.get('title', '')
89
         pod_title = pod.get('title', '')
90
+        pod_is_result = pod.get('primary', None)
87
 
91
 
88
         if 'subpods' not in pod:
92
         if 'subpods' not in pod:
89
             continue
93
             continue
97
                 if subpod['plaintext'] != '(requires interactivity)':
101
                 if subpod['plaintext'] != '(requires interactivity)':
98
                     result_chunks.append({'label': pod_title, 'value': subpod['plaintext']})
102
                     result_chunks.append({'label': pod_title, 'value': subpod['plaintext']})
99
 
103
 
104
+                if pod_is_result or not result_content:
105
+                    if pod_id != "Input":
106
+                        result_content = pod_title + ': ' + subpod['plaintext']
107
+
100
             elif 'img' in subpod:
108
             elif 'img' in subpod:
101
                 result_chunks.append({'label': pod_title, 'image': subpod['img']})
109
                 result_chunks.append({'label': pod_title, 'image': subpod['img']})
102
 
110
 
108
                     'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
116
                     'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
109
 
117
 
110
     results.append({'url': resp.request.headers['Referer'].decode('utf8'),
118
     results.append({'url': resp.request.headers['Referer'].decode('utf8'),
111
-                    'title': 'Wolfram|Alpha',
112
-                    'content': infobox_title})
119
+                    'title': 'Wolfram|Alpha (' + infobox_title + ')',
120
+                    'content': result_content})
113
 
121
 
114
     return results
122
     return results

+ 4
- 2
tests/unit/engines/test_wolframalpha_api.py View File

103
         self.assertEqual(referer_url, results[0]['urls'][0]['url'])
103
         self.assertEqual(referer_url, results[0]['urls'][0]['url'])
104
         self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
104
         self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
105
         self.assertEqual(referer_url, results[1]['url'])
105
         self.assertEqual(referer_url, results[1]['url'])
106
-        self.assertEqual('Wolfram|Alpha', results[1]['title'])
106
+        self.assertEqual('Wolfram|Alpha (input_plaintext)', results[1]['title'])
107
+        self.assertIn('result_plaintext', results[1]['content'])
107
 
108
 
108
         # test calc
109
         # test calc
109
         xml = """<?xml version='1.0' encoding='UTF-8'?>
110
         xml = """<?xml version='1.0' encoding='UTF-8'?>
161
         self.assertEqual(referer_url, results[0]['urls'][0]['url'])
162
         self.assertEqual(referer_url, results[0]['urls'][0]['url'])
162
         self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
163
         self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
163
         self.assertEqual(referer_url, results[1]['url'])
164
         self.assertEqual(referer_url, results[1]['url'])
164
-        self.assertEqual('Wolfram|Alpha', results[1]['title'])
165
+        self.assertEqual('Wolfram|Alpha (integral_plaintext)', results[1]['title'])
166
+        self.assertIn('integral_plaintext', results[1]['content'])

+ 4
- 2
tests/unit/engines/test_wolframalpha_noapi.py View File

140
         self.assertEqual(referer_url, results[0]['urls'][0]['url'])
140
         self.assertEqual(referer_url, results[0]['urls'][0]['url'])
141
         self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
141
         self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
142
         self.assertEqual(referer_url, results[1]['url'])
142
         self.assertEqual(referer_url, results[1]['url'])
143
-        self.assertEqual('Wolfram|Alpha', results[1]['title'])
143
+        self.assertEqual('Wolfram|Alpha (input_plaintext)', results[1]['title'])
144
+        self.assertIn('result_plaintext', results[1]['content'])
144
 
145
 
145
         # test calc
146
         # test calc
146
         json = r"""
147
         json = r"""
219
         self.assertEqual(referer_url, results[0]['urls'][0]['url'])
220
         self.assertEqual(referer_url, results[0]['urls'][0]['url'])
220
         self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
221
         self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
221
         self.assertEqual(referer_url, results[1]['url'])
222
         self.assertEqual(referer_url, results[1]['url'])
222
-        self.assertEqual('Wolfram|Alpha', results[1]['title'])
223
+        self.assertEqual('Wolfram|Alpha (integral_plaintext)', results[1]['title'])
224
+        self.assertIn('integral_plaintext', results[1]['content'])