|  | @@ -1,23 +1,23 @@
 | 
	
		
			
			| 1 |  | -# WolframAlpha (Maths)
 | 
	
		
			
			|  | 1 | +# Wolfram|Alpha (Science)
 | 
	
		
			
			| 2 | 2 |  #
 | 
	
		
			
			| 3 |  | -# @website     http://www.wolframalpha.com/
 | 
	
		
			
			| 4 |  | -# @provide-api yes (http://api.wolframalpha.com/v2/)
 | 
	
		
			
			|  | 3 | +# @website     https://www.wolframalpha.com/
 | 
	
		
			
			|  | 4 | +# @provide-api yes (https://api.wolframalpha.com/v2/)
 | 
	
		
			
			| 5 | 5 |  #
 | 
	
		
			
			| 6 | 6 |  # @using-api   no
 | 
	
		
			
			| 7 |  | -# @results     HTML
 | 
	
		
			
			|  | 7 | +# @results     JSON
 | 
	
		
			
			| 8 | 8 |  # @stable      no
 | 
	
		
			
			| 9 |  | -# @parse       answer
 | 
	
		
			
			|  | 9 | +# @parse       url, infobox
 | 
	
		
			
			| 10 | 10 |  
 | 
	
		
			
			| 11 | 11 |  from cgi import escape
 | 
	
		
			
			| 12 | 12 |  from json import loads
 | 
	
		
			
			| 13 | 13 |  from time import time
 | 
	
		
			
			| 14 | 14 |  from urllib import urlencode
 | 
	
		
			
			|  | 15 | +from lxml.etree import XML
 | 
	
		
			
			| 15 | 16 |  
 | 
	
		
			
			| 16 | 17 |  from searx.poolrequests import get as http_get
 | 
	
		
			
			| 17 | 18 |  
 | 
	
		
			
			| 18 | 19 |  # search-url
 | 
	
		
			
			| 19 | 20 |  url = 'https://www.wolframalpha.com/'
 | 
	
		
			
			| 20 |  | -search_url = url + 'input/?{query}'
 | 
	
		
			
			| 21 | 21 |  
 | 
	
		
			
			| 22 | 22 |  search_url = url + 'input/json.jsp'\
 | 
	
		
			
			| 23 | 23 |      '?async=true'\
 | 
	
	
		
			
			|  | @@ -33,13 +33,25 @@ search_url = url + 'input/json.jsp'\
 | 
	
		
			
			| 33 | 33 |      '&sponsorcategories=true'\
 | 
	
		
			
			| 34 | 34 |      '&statemethod=deploybutton'
 | 
	
		
			
			| 35 | 35 |  
 | 
	
		
			
			| 36 |  | -# xpath variables
 | 
	
		
			
			| 37 |  | -scripts_xpath = '//script'
 | 
	
		
			
			| 38 |  | -title_xpath = '//title'
 | 
	
		
			
			| 39 |  | -failure_xpath = '//p[attribute::class="pfail"]'
 | 
	
		
			
			|  | 36 | +referer_url = url + 'input/?{query}'
 | 
	
		
			
			|  | 37 | +
 | 
	
		
			
			| 40 | 38 |  token = {'value': '',
 | 
	
		
			
			| 41 | 39 |           'last_updated': None}
 | 
	
		
			
			| 42 | 40 |  
 | 
	
		
			
			|  | 41 | +# xpath variables
 | 
	
		
			
			|  | 42 | +success_xpath = '/pod[attribute::error="false"]'
 | 
	
		
			
			|  | 43 | +plaintext_xpath = './plaintext'
 | 
	
		
			
			|  | 44 | +title_xpath = './@title'
 | 
	
		
			
			|  | 45 | +image_xpath = './img'
 | 
	
		
			
			|  | 46 | +img_src_xpath = './img/@src'
 | 
	
		
			
			|  | 47 | +img_alt_xpath = './img/@alt'
 | 
	
		
			
			|  | 48 | +
 | 
	
		
			
			|  | 49 | +# pods to display as image in infobox
 | 
	
		
			
			|  | 50 | +# this pods do return a plaintext, but they look better and are more useful as images
 | 
	
		
			
			|  | 51 | +image_pods = {'Visual representation',
 | 
	
		
			
			|  | 52 | +              'Manipulatives illustration',
 | 
	
		
			
			|  | 53 | +              'Symbol'}
 | 
	
		
			
			|  | 54 | +
 | 
	
		
			
			| 43 | 55 |  
 | 
	
		
			
			| 44 | 56 |  # seems, wolframalpha resets its token in every hour
 | 
	
		
			
			| 45 | 57 |  def obtain_token():
 | 
	
	
		
			
			|  | @@ -62,13 +74,42 @@ def request(query, params):
 | 
	
		
			
			| 62 | 74 |      if time() - token['last_updated'] > 3600:
 | 
	
		
			
			| 63 | 75 |          obtain_token()
 | 
	
		
			
			| 64 | 76 |      params['url'] = search_url.format(query=urlencode({'input': query}), token=token['value'])
 | 
	
		
			
			| 65 |  | -    params['headers']['Referer'] = 'https://www.wolframalpha.com/input/?i=' + query
 | 
	
		
			
			|  | 77 | +    params['headers']['Referer'] = referer_url.format(query=urlencode({'i': query}))
 | 
	
		
			
			| 66 | 78 |  
 | 
	
		
			
			| 67 | 79 |      return params
 | 
	
		
			
			| 68 | 80 |  
 | 
	
		
			
			| 69 | 81 |  
 | 
	
		
			
			|  | 82 | +# get additional pod
 | 
	
		
			
			|  | 83 | +# NOTE: this makes an additional requests to server, so the response will take longer and might reach timeout
 | 
	
		
			
			|  | 84 | +def get_async_pod(url):
 | 
	
		
			
			|  | 85 | +    pod = {'subpods': []}
 | 
	
		
			
			|  | 86 | +
 | 
	
		
			
			|  | 87 | +    try:
 | 
	
		
			
			|  | 88 | +        resp = http_get(url, timeout=2.0)
 | 
	
		
			
			|  | 89 | +
 | 
	
		
			
			|  | 90 | +        resp_pod = XML(resp.content)
 | 
	
		
			
			|  | 91 | +        if resp_pod.xpath(success_xpath):
 | 
	
		
			
			|  | 92 | +
 | 
	
		
			
			|  | 93 | +            for subpod in resp_pod:
 | 
	
		
			
			|  | 94 | +                plaintext = subpod.xpath(plaintext_xpath)[0].text
 | 
	
		
			
			|  | 95 | +                if plaintext:
 | 
	
		
			
			|  | 96 | +                    pod['subpods'].append({'title': subpod.xpath(title_xpath)[0],
 | 
	
		
			
			|  | 97 | +                                           'plaintext': plaintext})
 | 
	
		
			
			|  | 98 | +                elif subpod.xpath(image_xpath):
 | 
	
		
			
			|  | 99 | +                    pod['subpods'].append({'title': subpod.xpath(title_xpath)[0],
 | 
	
		
			
			|  | 100 | +                                           'plaintext': '',
 | 
	
		
			
			|  | 101 | +                                           'img': {'src': subpod.xpath(img_src_xpath)[0],
 | 
	
		
			
			|  | 102 | +                                                   'alt': subpod.xpath(img_alt_xpath)[0]}})
 | 
	
		
			
			|  | 103 | +    except:
 | 
	
		
			
			|  | 104 | +        pass
 | 
	
		
			
			|  | 105 | +
 | 
	
		
			
			|  | 106 | +    return pod
 | 
	
		
			
			|  | 107 | +
 | 
	
		
			
			|  | 108 | +
 | 
	
		
			
			| 70 | 109 |  # get response from search-request
 | 
	
		
			
			| 71 | 110 |  def response(resp):
 | 
	
		
			
			|  | 111 | +    results = []
 | 
	
		
			
			|  | 112 | +
 | 
	
		
			
			| 72 | 113 |      resp_json = loads(resp.text)
 | 
	
		
			
			| 73 | 114 |  
 | 
	
		
			
			| 74 | 115 |      if not resp_json['queryresult']['success']:
 | 
	
	
		
			
			|  | @@ -76,20 +117,45 @@ def response(resp):
 | 
	
		
			
			| 76 | 117 |  
 | 
	
		
			
			| 77 | 118 |      # TODO handle resp_json['queryresult']['assumptions']
 | 
	
		
			
			| 78 | 119 |      result_chunks = []
 | 
	
		
			
			|  | 120 | +    infobox_title = None
 | 
	
		
			
			| 79 | 121 |      for pod in resp_json['queryresult']['pods']:
 | 
	
		
			
			| 80 | 122 |          pod_title = pod.get('title', '')
 | 
	
		
			
			|  | 123 | +
 | 
	
		
			
			| 81 | 124 |          if 'subpods' not in pod:
 | 
	
		
			
			| 82 |  | -            continue
 | 
	
		
			
			|  | 125 | +            # comment this section if your requests always reach timeout
 | 
	
		
			
			|  | 126 | +            if pod['async']:
 | 
	
		
			
			|  | 127 | +                result = get_async_pod(pod['async'])
 | 
	
		
			
			|  | 128 | +                if result:
 | 
	
		
			
			|  | 129 | +                    pod = result
 | 
	
		
			
			|  | 130 | +            else:
 | 
	
		
			
			|  | 131 | +                continue
 | 
	
		
			
			|  | 132 | +
 | 
	
		
			
			|  | 133 | +        # infobox title is input or text content on first pod
 | 
	
		
			
			|  | 134 | +        if pod_title.startswith('Input') or not infobox_title:
 | 
	
		
			
			|  | 135 | +            try:
 | 
	
		
			
			|  | 136 | +                infobox_title = pod['subpods'][0]['plaintext']
 | 
	
		
			
			|  | 137 | +            except:
 | 
	
		
			
			|  | 138 | +                infobox_title = ''
 | 
	
		
			
			|  | 139 | +                pass
 | 
	
		
			
			|  | 140 | +
 | 
	
		
			
			| 83 | 141 |          for subpod in pod['subpods']:
 | 
	
		
			
			| 84 |  | -            if 'img' in subpod:
 | 
	
		
			
			| 85 |  | -                result_chunks.append(u'<p>{0}<br /><img src="{1}" alt="{2}" /></p>'
 | 
	
		
			
			| 86 |  | -                                     .format(escape(pod_title or subpod['img']['alt']),
 | 
	
		
			
			| 87 |  | -                                             escape(subpod['img']['src']),
 | 
	
		
			
			| 88 |  | -                                             escape(subpod['img']['alt'])))
 | 
	
		
			
			|  | 142 | +            if subpod['plaintext'] != '' and pod_title not in image_pods:
 | 
	
		
			
			|  | 143 | +                # append unless it's not an actual answer
 | 
	
		
			
			|  | 144 | +                if subpod['plaintext'] != '(requires interactivity)':
 | 
	
		
			
			|  | 145 | +                    result_chunks.append({'label': pod_title, 'value': subpod['plaintext']})
 | 
	
		
			
			|  | 146 | +
 | 
	
		
			
			|  | 147 | +            elif 'img' in subpod:
 | 
	
		
			
			|  | 148 | +                result_chunks.append({'label': pod_title, 'image': subpod['img']})
 | 
	
		
			
			| 89 | 149 |  
 | 
	
		
			
			| 90 | 150 |      if not result_chunks:
 | 
	
		
			
			| 91 | 151 |          return []
 | 
	
		
			
			| 92 | 152 |  
 | 
	
		
			
			| 93 |  | -    return [{'url': resp.request.headers['Referer'].decode('utf-8'),
 | 
	
		
			
			| 94 |  | -             'title': 'Wolframalpha',
 | 
	
		
			
			| 95 |  | -             'content': ''.join(result_chunks)}]
 | 
	
		
			
			|  | 153 | +    results.append({'infobox': infobox_title,
 | 
	
		
			
			|  | 154 | +                    'attributes': result_chunks,
 | 
	
		
			
			|  | 155 | +                    'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}]})
 | 
	
		
			
			|  | 156 | +
 | 
	
		
			
			|  | 157 | +    results.append({'url': resp.request.headers['Referer'],
 | 
	
		
			
			|  | 158 | +                    'title': 'Wolfram|Alpha',
 | 
	
		
			
			|  | 159 | +                    'content': infobox_title})
 | 
	
		
			
			|  | 160 | +
 | 
	
		
			
			|  | 161 | +    return results
 |