wolframalpha_noapi.py 4.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. # Wolfram|Alpha (Science)
  2. #
  3. # @website https://www.wolframalpha.com/
  4. # @provide-api yes (https://api.wolframalpha.com/v2/)
  5. #
  6. # @using-api no
  7. # @results JSON
  8. # @stable no
  9. # @parse url, infobox
  10. from cgi import escape
  11. from json import loads
  12. from time import time
  13. from urllib import urlencode
  14. from lxml.etree import XML
  15. from searx.poolrequests import get as http_get
  16. # search-url
  17. url = 'https://www.wolframalpha.com/'
  18. search_url = url + 'input/json.jsp'\
  19. '?async=true'\
  20. '&banners=raw'\
  21. '&debuggingdata=false'\
  22. '&format=image,plaintext,imagemap,minput,moutput'\
  23. '&formattimeout=2'\
  24. '&{query}'\
  25. '&output=JSON'\
  26. '&parsetimeout=2'\
  27. '&proxycode={token}'\
  28. '&scantimeout=0.5'\
  29. '&sponsorcategories=true'\
  30. '&statemethod=deploybutton'
  31. referer_url = url + 'input/?{query}'
  32. token = {'value': '',
  33. 'last_updated': None}
  34. # xpath variables
  35. success_xpath = '/pod[attribute::error="false"]'
  36. plaintext_xpath = './plaintext'
  37. title_xpath = './@title'
  38. image_xpath = './img'
  39. img_src_xpath = './img/@src'
  40. img_alt_xpath = './img/@alt'
  41. # pods to display as image in infobox
  42. # this pods do return a plaintext, but they look better and are more useful as images
  43. image_pods = {'Visual representation',
  44. 'Manipulatives illustration',
  45. 'Symbol'}
  46. # seems, wolframalpha resets its token in every hour
  47. def obtain_token():
  48. update_time = time() - (time() % 3600)
  49. try:
  50. token_response = http_get('https://www.wolframalpha.com/input/api/v1/code?ts=9999999999999999999', timeout=2.0)
  51. token['value'] = loads(token_response.text)['code']
  52. token['last_updated'] = update_time
  53. except:
  54. pass
  55. return token
  56. obtain_token()
  57. # do search-request
  58. def request(query, params):
  59. # obtain token if last update was more than an hour
  60. if time() - token['last_updated'] > 3600:
  61. obtain_token()
  62. params['url'] = search_url.format(query=urlencode({'input': query}), token=token['value'])
  63. params['headers']['Referer'] = referer_url.format(query=urlencode({'i': query}))
  64. return params
  65. # get additional pod
  66. # NOTE: this makes an additional requests to server, so the response will take longer and might reach timeout
  67. def get_async_pod(url):
  68. pod = {'subpods': []}
  69. try:
  70. resp = http_get(url, timeout=2.0)
  71. resp_pod = XML(resp.content)
  72. if resp_pod.xpath(success_xpath):
  73. for subpod in resp_pod:
  74. plaintext = subpod.xpath(plaintext_xpath)[0].text
  75. if plaintext:
  76. pod['subpods'].append({'title': subpod.xpath(title_xpath)[0],
  77. 'plaintext': plaintext})
  78. elif subpod.xpath(image_xpath):
  79. pod['subpods'].append({'title': subpod.xpath(title_xpath)[0],
  80. 'plaintext': '',
  81. 'img': {'src': subpod.xpath(img_src_xpath)[0],
  82. 'alt': subpod.xpath(img_alt_xpath)[0]}})
  83. except:
  84. pass
  85. return pod
  86. # get response from search-request
  87. def response(resp):
  88. results = []
  89. resp_json = loads(resp.text)
  90. if not resp_json['queryresult']['success']:
  91. return []
  92. # TODO handle resp_json['queryresult']['assumptions']
  93. result_chunks = []
  94. infobox_title = None
  95. for pod in resp_json['queryresult']['pods']:
  96. pod_title = pod.get('title', '')
  97. if 'subpods' not in pod:
  98. # comment this section if your requests always reach timeout
  99. if pod['async']:
  100. result = get_async_pod(pod['async'])
  101. if result:
  102. pod = result
  103. else:
  104. continue
  105. # infobox title is input or text content on first pod
  106. if pod_title.startswith('Input') or not infobox_title:
  107. try:
  108. infobox_title = pod['subpods'][0]['plaintext']
  109. except:
  110. infobox_title = ''
  111. pass
  112. for subpod in pod['subpods']:
  113. if subpod['plaintext'] != '' and pod_title not in image_pods:
  114. # append unless it's not an actual answer
  115. if subpod['plaintext'] != '(requires interactivity)':
  116. result_chunks.append({'label': pod_title, 'value': subpod['plaintext']})
  117. elif 'img' in subpod:
  118. result_chunks.append({'label': pod_title, 'image': subpod['img']})
  119. if not result_chunks:
  120. return []
  121. results.append({'infobox': infobox_title,
  122. 'attributes': result_chunks,
  123. 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}]})
  124. results.append({'url': resp.request.headers['Referer'],
  125. 'title': 'Wolfram|Alpha',
  126. 'content': infobox_title})
  127. return results