Browse Source

Improving Wolfram Alpha search hit content

Making WA search hits contain
- the (parsed) input inside the "title" instead of just "Wolfram|Alpha", to better match other hit titles and to confirm correct parsing of input to the user
- the first output field that contains any text (skipping ones that are only pictures; this is usually the most meaningful "result" field) instead of the raw input as the "content", making it additionally possible to obtain WA computations from JSON API calls
Lorenzo J. Lucchini 8 years ago
parent
commit
a8907224a1
1 changed files with 9 additions and 7 deletions
  1. 9
    7
      searx/engines/wolframalpha_api.py

+ 9
- 7
searx/engines/wolframalpha_api.py View File

18
 
18
 
19
 # xpath variables
19
 # xpath variables
20
 failure_xpath = '/queryresult[attribute::success="false"]'
20
 failure_xpath = '/queryresult[attribute::success="false"]'
21
-answer_xpath = '//pod[attribute::primary="true"]/subpod/plaintext'
22
 input_xpath = '//pod[starts-with(attribute::id, "Input")]/subpod/plaintext'
21
 input_xpath = '//pod[starts-with(attribute::id, "Input")]/subpod/plaintext'
23
 pods_xpath = '//pod'
22
 pods_xpath = '//pod'
24
 subpods_xpath = './subpod'
23
 subpods_xpath = './subpod'
76
     try:
75
     try:
77
         infobox_title = search_results.xpath(input_xpath)[0].text
76
         infobox_title = search_results.xpath(input_xpath)[0].text
78
     except:
77
     except:
79
-        infobox_title = None
78
+        infobox_title = ""
80
 
79
 
81
     pods = search_results.xpath(pods_xpath)
80
     pods = search_results.xpath(pods_xpath)
82
-    result = ""
83
     result_chunks = []
81
     result_chunks = []
82
+    result_content = ""
84
     for pod in pods:
83
     for pod in pods:
85
         pod_id = pod.xpath(pod_id_xpath)[0]
84
         pod_id = pod.xpath(pod_id_xpath)[0]
86
         pod_title = pod.xpath(pod_title_xpath)[0]
85
         pod_title = pod.xpath(pod_title_xpath)[0]
97
 
96
 
98
             if content and pod_id not in image_pods:
97
             if content and pod_id not in image_pods:
99
 
98
 
100
-                if pod_is_result:
101
-                    result = content
99
+                if pod_is_result or not result_content:
100
+                    if pod_id != "Input":
101
+                        result_content = "%s: %s" % (pod_title, content)
102
 
102
 
103
                 # if no input pod was found, title is first plaintext pod
103
                 # if no input pod was found, title is first plaintext pod
104
                 if not infobox_title:
104
                 if not infobox_title:
115
     if not result_chunks:
115
     if not result_chunks:
116
         return []
116
         return []
117
 
117
 
118
+    title = "Wolfram|Alpha (%s)" % infobox_title
119
+
118
     # append infobox
120
     # append infobox
119
     results.append({'infobox': infobox_title,
121
     results.append({'infobox': infobox_title,
120
                     'attributes': result_chunks,
122
                     'attributes': result_chunks,
122
 
124
 
123
     # append link to site
125
     # append link to site
124
     results.append({'url': resp.request.headers['Referer'].decode('utf8'),
126
     results.append({'url': resp.request.headers['Referer'].decode('utf8'),
125
-                    'title': infobox_title + ' - Wolfram|Alpha',
126
-                    'content': result})
127
+                    'title': title,
128
+                    'content': result_content})
127
 
129
 
128
     return results
130
     return results