浏览代码

Merge pull request #609 from LuccoJ/betterwolfram

Improving Wolfram Alpha search hit content
Adam Tauber 8 年前
父节点
当前提交
8f48c518aa

+ 12
- 4
searx/engines/wolframalpha_api.py 查看文件

@@ -18,10 +18,10 @@ api_key = ''  # defined in settings.yml
18 18
 
19 19
 # xpath variables
20 20
 failure_xpath = '/queryresult[attribute::success="false"]'
21
-answer_xpath = '//pod[attribute::primary="true"]/subpod/plaintext'
22 21
 input_xpath = '//pod[starts-with(attribute::id, "Input")]/subpod/plaintext'
23 22
 pods_xpath = '//pod'
24 23
 subpods_xpath = './subpod'
24
+pod_primary_xpath = './@primary'
25 25
 pod_id_xpath = './@id'
26 26
 pod_title_xpath = './@title'
27 27
 plaintext_xpath = './plaintext'
@@ -75,13 +75,15 @@ def response(resp):
75 75
     try:
76 76
         infobox_title = search_results.xpath(input_xpath)[0].text
77 77
     except:
78
-        infobox_title = None
78
+        infobox_title = ""
79 79
 
80 80
     pods = search_results.xpath(pods_xpath)
81 81
     result_chunks = []
82
+    result_content = ""
82 83
     for pod in pods:
83 84
         pod_id = pod.xpath(pod_id_xpath)[0]
84 85
         pod_title = pod.xpath(pod_title_xpath)[0]
86
+        pod_is_result = pod.xpath(pod_primary_xpath)
85 87
 
86 88
         subpods = pod.xpath(subpods_xpath)
87 89
         if not subpods:
@@ -94,6 +96,10 @@ def response(resp):
94 96
 
95 97
             if content and pod_id not in image_pods:
96 98
 
99
+                if pod_is_result or not result_content:
100
+                    if pod_id != "Input":
101
+                        result_content = "%s: %s" % (pod_title, content)
102
+
97 103
                 # if no input pod was found, title is first plaintext pod
98 104
                 if not infobox_title:
99 105
                     infobox_title = content
@@ -109,6 +115,8 @@ def response(resp):
109 115
     if not result_chunks:
110 116
         return []
111 117
 
118
+    title = "Wolfram|Alpha (%s)" % infobox_title
119
+
112 120
     # append infobox
113 121
     results.append({'infobox': infobox_title,
114 122
                     'attributes': result_chunks,
@@ -116,7 +124,7 @@ def response(resp):
116 124
 
117 125
     # append link to site
118 126
     results.append({'url': resp.request.headers['Referer'].decode('utf8'),
119
-                    'title': 'Wolfram|Alpha',
120
-                    'content': infobox_title})
127
+                    'title': title,
128
+                    'content': result_content})
121 129
 
122 130
     return results

+ 12
- 4
searx/engines/wolframalpha_noapi.py 查看文件

@@ -8,9 +8,11 @@
8 8
 # @stable      no
9 9
 # @parse       url, infobox
10 10
 
11
+from cgi import escape
11 12
 from json import loads
12 13
 from time import time
13 14
 from urllib import urlencode
15
+from lxml.etree import XML
14 16
 
15 17
 from searx.poolrequests import get as http_get
16 18
 
@@ -34,7 +36,7 @@ search_url = url + 'input/json.jsp'\
34 36
 referer_url = url + 'input/?{query}'
35 37
 
36 38
 token = {'value': '',
37
-         'last_updated': 0}
39
+         'last_updated': None}
38 40
 
39 41
 # pods to display as image in infobox
40 42
 # this pods do return a plaintext, but they look better and are more useful as images
@@ -80,10 +82,12 @@ def response(resp):
80 82
 
81 83
     # TODO handle resp_json['queryresult']['assumptions']
82 84
     result_chunks = []
83
-    infobox_title = None
85
+    infobox_title = ""
86
+    result_content = ""
84 87
     for pod in resp_json['queryresult']['pods']:
85 88
         pod_id = pod.get('id', '')
86 89
         pod_title = pod.get('title', '')
90
+        pod_is_result = pod.get('primary', None)
87 91
 
88 92
         if 'subpods' not in pod:
89 93
             continue
@@ -97,6 +101,10 @@ def response(resp):
97 101
                 if subpod['plaintext'] != '(requires interactivity)':
98 102
                     result_chunks.append({'label': pod_title, 'value': subpod['plaintext']})
99 103
 
104
+                if pod_is_result or not result_content:
105
+                    if pod_id != "Input":
106
+                        result_content = pod_title + ': ' + subpod['plaintext']
107
+
100 108
             elif 'img' in subpod:
101 109
                 result_chunks.append({'label': pod_title, 'image': subpod['img']})
102 110
 
@@ -108,7 +116,7 @@ def response(resp):
108 116
                     'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
109 117
 
110 118
     results.append({'url': resp.request.headers['Referer'].decode('utf8'),
111
-                    'title': 'Wolfram|Alpha',
112
-                    'content': infobox_title})
119
+                    'title': 'Wolfram|Alpha (' + infobox_title + ')',
120
+                    'content': result_content})
113 121
 
114 122
     return results

+ 4
- 2
tests/unit/engines/test_wolframalpha_api.py 查看文件

@@ -103,7 +103,8 @@ class TestWolframAlphaAPIEngine(SearxTestCase):
103 103
         self.assertEqual(referer_url, results[0]['urls'][0]['url'])
104 104
         self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
105 105
         self.assertEqual(referer_url, results[1]['url'])
106
-        self.assertEqual('Wolfram|Alpha', results[1]['title'])
106
+        self.assertEqual('Wolfram|Alpha (input_plaintext)', results[1]['title'])
107
+        self.assertIn('result_plaintext', results[1]['content'])
107 108
 
108 109
         # test calc
109 110
         xml = """<?xml version='1.0' encoding='UTF-8'?>
@@ -161,4 +162,5 @@ class TestWolframAlphaAPIEngine(SearxTestCase):
161 162
         self.assertEqual(referer_url, results[0]['urls'][0]['url'])
162 163
         self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
163 164
         self.assertEqual(referer_url, results[1]['url'])
164
-        self.assertEqual('Wolfram|Alpha', results[1]['title'])
165
+        self.assertEqual('Wolfram|Alpha (integral_plaintext)', results[1]['title'])
166
+        self.assertIn('integral_plaintext', results[1]['content'])

+ 4
- 2
tests/unit/engines/test_wolframalpha_noapi.py 查看文件

@@ -140,7 +140,8 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase):
140 140
         self.assertEqual(referer_url, results[0]['urls'][0]['url'])
141 141
         self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
142 142
         self.assertEqual(referer_url, results[1]['url'])
143
-        self.assertEqual('Wolfram|Alpha', results[1]['title'])
143
+        self.assertEqual('Wolfram|Alpha (input_plaintext)', results[1]['title'])
144
+        self.assertIn('result_plaintext', results[1]['content'])
144 145
 
145 146
         # test calc
146 147
         json = r"""
@@ -219,4 +220,5 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase):
219 220
         self.assertEqual(referer_url, results[0]['urls'][0]['url'])
220 221
         self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
221 222
         self.assertEqual(referer_url, results[1]['url'])
222
-        self.assertEqual('Wolfram|Alpha', results[1]['title'])
223
+        self.assertEqual('Wolfram|Alpha (integral_plaintext)', results[1]['title'])
224
+        self.assertIn('integral_plaintext', results[1]['content'])