related issues: #508 #509

9 anos atrás · d06178139f
--- a/searx/engines/wolframalpha_noapi.py
+++ b/searx/engines/wolframalpha_noapi.py
@@ -8,79 +8,85 @@
 
				 # @stable      no
			
 
				 # @parse       answer
			
 
				 
			
 
				-from re import search, sub
			
 
				+from cgi import escape
			
 
				 from json import loads
			
 
				+from time import time
			
 
				 from urllib import urlencode
			
 
				-from lxml import html
			
 
				-import HTMLParser
			
 
				+
			
 
				+from searx.poolrequests import get as http_get
			
 
				 
			
 
				 # search-url
			
 
				-url = 'http://www.wolframalpha.com/'
			
 
				+url = 'https://www.wolframalpha.com/'
			
 
				 search_url = url + 'input/?{query}'
			
 
				 
			
 
				+search_url = url + 'input/json.jsp'\
			
 
				+    '?async=true'\
			
 
				+    '&banners=raw'\
			
 
				+    '&debuggingdata=false'\
			
 
				+    '&format=image,plaintext,imagemap,minput,moutput'\
			
 
				+    '&formattimeout=2'\
			
 
				+    '&{query}'\
			
 
				+    '&output=JSON'\
			
 
				+    '&parsetimeout=2'\
			
 
				+    '&proxycode={token}'\
			
 
				+    '&scantimeout=0.5'\
			
 
				+    '&sponsorcategories=true'\
			
 
				+    '&statemethod=deploybutton'
			
 
				+
			
 
				 # xpath variables
			
 
				 scripts_xpath = '//script'
			
 
				 title_xpath = '//title'
			
 
				 failure_xpath = '//p[attribute::class="pfail"]'
			
 
				+token = {'value': '',
			
 
				+         'last_updated': None}
			
 
				+
			
 
				+
			
 
				+# seems, wolframalpha resets its token in every hour
			
 
				+def obtain_token():
			
 
				+    update_time = time() - (time() % 3600)
			
 
				+    token_response = http_get('https://www.wolframalpha.com/input/api/v1/code?ts=9999999999999999999', timeout=2.0)
			
 
				+    token['value'] = loads(token_response.text)['code']
			
 
				+    token['last_updated'] = update_time
			
 
				+    return token
			
 
				+
			
 
				+
			
 
				+obtain_token()
			
 
				 
			
 
				 
			
 
				 # do search-request
			
 
				 def request(query, params):
			
 
				-    params['url'] = search_url.format(query=urlencode({'i': query}))
			
 
				+    # obtain token if last update was more than an hour
			
 
				+    if time() - token['last_updated'] > 3600:
			
 
				+        obtain_token()
			
 
				+    params['url'] = search_url.format(query=urlencode({'input': query}), token=token['value'])
			
 
				+    params['headers']['Referer'] = 'https://www.wolframalpha.com/input/?i=' + query
			
 
				 
			
 
				     return params
			
 
				 
			
 
				 
			
 
				 # get response from search-request
			
 
				 def response(resp):
			
 
				-    results = []
			
 
				-    line = None
			
 
				-
			
 
				-    dom = html.fromstring(resp.text)
			
 
				-    scripts = dom.xpath(scripts_xpath)
			
 
				-
			
 
				-    # the answer is inside a js function
			
 
				-    # answer can be located in different 'pods', although by default it should be in pod_0200
			
 
				-    possible_locations = ['pod_0200\.push\((.*)',
			
 
				-                          'pod_0100\.push\((.*)']
			
 
				-
			
 
				-    # failed result
			
 
				-    if dom.xpath(failure_xpath):
			
 
				-        return results
			
 
				-
			
 
				-    # get line that matches the pattern
			
 
				-    for pattern in possible_locations:
			
 
				-        for script in scripts:
			
 
				-            try:
			
 
				-                line = search(pattern, script.text_content()).group(1)
			
 
				-                break
			
 
				-            except AttributeError:
			
 
				-                continue
			
 
				-        if line:
			
 
				-            break
			
 
				-
			
 
				-    if line:
			
 
				-        # extract answer from json
			
 
				-        answer = line[line.find('{'):line.rfind('}') + 1]
			
 
				-        try:
			
 
				-            answer = loads(answer)
			
 
				-        except Exception:
			
 
				-            answer = loads(answer.encode('unicode-escape'))
			
 
				-        answer = answer['stringified']
			
 
				-
			
 
				-        # clean plaintext answer
			
 
				-        h = HTMLParser.HTMLParser()
			
 
				-        answer = h.unescape(answer.decode('unicode-escape'))
			
 
				-        answer = sub(r'\\', '', answer)
			
 
				-
			
 
				-        results.append({'answer': answer})
			
 
				-
			
 
				-    # user input is in first part of title
			
 
				-    title = dom.xpath(title_xpath)[0].text.encode('utf-8')
			
 
				-    result_url = request(title[:-16], {})['url']
			
 
				-
			
 
				-    # append result
			
 
				-    results.append({'url': result_url,
			
 
				-                    'title': title.decode('utf-8')})
			
 
				-
			
 
				-    return results
			
 
				+    resp_json = loads(resp.text)
			
 
				+
			
 
				+    if not resp_json['queryresult']['success']:
			
 
				+        return []
			
 
				+
			
 
				+    # TODO handle resp_json['queryresult']['assumptions']
			
 
				+    result_chunks = []
			
 
				+    for pod in resp_json['queryresult']['pods']:
			
 
				+        pod_title = pod.get('title', '')
			
 
				+        if 'subpods' not in pod:
			
 
				+            continue
			
 
				+        for subpod in pod['subpods']:
			
 
				+            if 'img' in subpod:
			
 
				+                result_chunks.append(u'<p>{0}<br /><img src="{1}" alt="{2}" /></p>'
			
 
				+                                     .format(escape(pod_title or subpod['img']['alt']),
			
 
				+                                             escape(subpod['img']['src']),
			
 
				+                                             escape(subpod['img']['alt'])))
			
 
				+
			
 
				+    if not result_chunks:
			
 
				+        return []
			
 
				+
			
 
				+    return [{'url': resp.request.headers['Referer'],
			
 
				+             'title': 'Wolframalpha',
			
 
				+             'content': ''.join(result_chunks)}]
			
--- a/tests/unit/engines/test_wolframalpha_noapi.py
+++ b/tests/unit/engines/test_wolframalpha_noapi.py
@@ -1,6 +1,5 @@
 
				 # -*- coding: utf-8 -*-
			
 
				 from collections import defaultdict
			
 
				-import mock
			
 
				 from searx.engines import wolframalpha_noapi
			
 
				 from searx.testing import SearxTestCase
			
 
				 
			
@@ -21,173 +20,4 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase):
 
				         self.assertRaises(AttributeError, wolframalpha_noapi.response, [])
			
 
				         self.assertRaises(AttributeError, wolframalpha_noapi.response, '')
			
 
				         self.assertRaises(AttributeError, wolframalpha_noapi.response, '[]')
			
 
				-
			
 
				-        html = """
			
 
				-        <!DOCTYPE html>
			
 
				-            <title> Parangaricutirimícuaro - Wolfram|Alpha</title>
			
 
				-            <meta charset="utf-8" />
			
 
				-            <body>
			
 
				-                <div id="closest">
			
 
				-                    <p class="pfail">Wolfram|Alpha doesn't know how to interpret your input.</p>
			
 
				-                    <div id="dtips">
			
 
				-                        <div class="tip">
			
 
				-                            <span class="tip-title">Tip:&nbsp;</span>
			
 
				-                                Check your spelling, and use English
			
 
				-                            <span class="tip-extra"></span>
			
 
				-                        </div>
			
 
				-                    </div>
			
 
				-                </div>
			
 
				-            </body>
			
 
				-        </html>
			
 
				-        """
			
 
				-        # test failed query
			
 
				-        response = mock.Mock(text=html)
			
 
				-        self.assertEqual(wolframalpha_noapi.response(response), [])
			
 
				-
			
 
				-        html = """
			
 
				-        <!DOCTYPE html>
			
 
				-            <title> sqrt(-1) - Wolfram|Alpha</title>
			
 
				-            <meta charset="utf-8" />
			
 
				-            <body>
			
 
				-                <script type="text/javascript">
			
 
				-                  try {
			
 
				-                    if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) {
			
 
				-                      context.jsonArray.popups.pod_0100 = [];
			
 
				-                    }
			
 
				-                    context.jsonArray.popups.pod_0100.push( {"stringified": "sqrt(-1)","mInput": "","mOutput": ""});
			
 
				-                  } catch(e) { }
			
 
				-
			
 
				-                  try {
			
 
				-                    if (typeof context.jsonArray.popups.pod_0200 == "undefined" ) {
			
 
				-                      context.jsonArray.popups.pod_0200 = [];
			
 
				-                    }
			
 
				-                    context.jsonArray.popups.pod_0200.push( {"stringified": "i","mInput": "","mOutput": ""});
			
 
				-                  } catch(e) { }
			
 
				-                </script>
			
 
				-            </body>
			
 
				-        </html>
			
 
				-        """
			
 
				-        # test plaintext
			
 
				-        response = mock.Mock(text=html)
			
 
				-        results = wolframalpha_noapi.response(response)
			
 
				-        self.assertEqual(type(results), list)
			
 
				-        self.assertEqual(len(results), 2)
			
 
				-        self.assertEquals('i', results[0]['answer'])
			
 
				-        self.assertIn('sqrt(-1) - Wolfram|Alpha', results[1]['title'])
			
 
				-        self.assertEquals('http://www.wolframalpha.com/input/?i=+sqrt%28-1%29', results[1]['url'])
			
 
				-
			
 
				-        html = """
			
 
				-        <!DOCTYPE html>
			
 
				-            <title> integral 1/x - Wolfram|Alpha</title>
			
 
				-            <meta charset="utf-8" />
			
 
				-            <body>
			
 
				-                <script type="text/javascript">
			
 
				-                  try {
			
 
				-                    if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) {
			
 
				-                      context.jsonArray.popups.pod_0100 = [];
			
 
				-                    }
			
 
				-                    context.jsonArray.popups.pod_0100.push( {"stringified": "integral 1\/x dx = log(x)+constant"});
			
 
				-                  } catch(e) { }
			
 
				-                </script>
			
 
				-            </body>
			
 
				-        </html>
			
 
				-        """
			
 
				-        # test integral
			
 
				-        response = mock.Mock(text=html)
			
 
				-        results = wolframalpha_noapi.response(response)
			
 
				-        self.assertEqual(type(results), list)
			
 
				-        self.assertEqual(len(results), 2)
			
 
				-        self.assertIn('log(x)+c', results[0]['answer'])
			
 
				-        self.assertIn('integral 1/x - Wolfram|Alpha', results[1]['title'])
			
 
				-        self.assertEquals('http://www.wolframalpha.com/input/?i=+integral+1%2Fx', results[1]['url'])
			
 
				-
			
 
				-        html = """
			
 
				-        <!DOCTYPE html>
			
 
				-            <title> &int;1&#x2f;x &#xf74c;x - Wolfram|Alpha</title>
			
 
				-            <meta charset="utf-8" />
			
 
				-            <body>
			
 
				-                <script type="text/javascript">
			
 
				-                  try {
			
 
				-                    if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) {
			
 
				-                      context.jsonArray.popups.pod_0100 = [];
			
 
				-                    }
			
 
				-                    context.jsonArray.popups.pod_0100.push( {"stringified": "integral 1\/x dx = log(x)+constant"});
			
 
				-                  } catch(e) { }
			
 
				-                </script>
			
 
				-            </body>
			
 
				-        </html>
			
 
				-        """
			
 
				-        # test input in mathematical notation
			
 
				-        response = mock.Mock(text=html)
			
 
				-        results = wolframalpha_noapi.response(response)
			
 
				-        self.assertEqual(type(results), list)
			
 
				-        self.assertEqual(len(results), 2)
			
 
				-        self.assertIn('log(x)+c', results[0]['answer'])
			
 
				-        self.assertIn('∫1/x x - Wolfram|Alpha'.decode('utf-8'), results[1]['title'])
			
 
				-        self.assertEquals('http://www.wolframalpha.com/input/?i=+%E2%88%AB1%2Fx+%EF%9D%8Cx', results[1]['url'])
			
 
				-
			
 
				-        html = """
			
 
				-        <!DOCTYPE html>
			
 
				-            <title> 1 euro to yen - Wolfram|Alpha</title>
			
 
				-            <meta charset="utf-8" />
			
 
				-            <body>
			
 
				-                <script type="text/javascript">
			
 
				-                  try {
			
 
				-                    if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) {
			
 
				-                      context.jsonArray.popups.pod_0100 = [];
			
 
				-                    }
			
 
				-                  context.jsonArray.popups.pod_0100.push( {"stringified": "convert euro1  (euro) to Japanese yen"});
			
 
				-                  } catch(e) { }
			
 
				-
			
 
				-                  try {
			
 
				-                    if (typeof context.jsonArray.popups.pod_0200 == "undefined" ) {
			
 
				-                      context.jsonArray.popups.pod_0200 = [];
			
 
				-                    }
			
 
				-                    context.jsonArray.popups.pod_0200.push( {"stringified": "&yen;130.5  (Japanese yen)"});
			
 
				-                  } catch(e) { }
			
 
				-                </script>
			
 
				-            </body>
			
 
				-        </html>
			
 
				-        """
			
 
				-        # test output with htmlentity
			
 
				-        response = mock.Mock(text=html)
			
 
				-        results = wolframalpha_noapi.response(response)
			
 
				-        self.assertEqual(type(results), list)
			
 
				-        self.assertEqual(len(results), 2)
			
 
				-        self.assertIn('¥'.decode('utf-8'), results[0]['answer'])
			
 
				-        self.assertIn('1 euro to yen - Wolfram|Alpha', results[1]['title'])
			
 
				-        self.assertEquals('http://www.wolframalpha.com/input/?i=+1+euro+to+yen', results[1]['url'])
			
 
				-
			
 
				-        html = """
			
 
				-        <!DOCTYPE html>
			
 
				-            <title> distance from nairobi to kyoto in inches - Wolfram|Alpha</title>
			
 
				-            <meta charset="utf-8" />
			
 
				-            <body>
			
 
				-                <script type="text/javascript">
			
 
				-                  try {
			
 
				-                    if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) {
			
 
				-                      context.jsonArray.popups.pod_0100 = [];
			
 
				-                    }
			
 
				-[...].pod_0100.push( {"stringified": "convert distance | from | Nairobi, Kenya\nto | Kyoto, Japan to inches"});
			
 
				-                  } catch(e) { }
			
 
				-
			
 
				-                  try {
			
 
				-                    if (typeof context.jsonArray.popups.pod_0200 == "undefined" ) {
			
 
				-                      context.jsonArray.popups.pod_0200 = [];
			
 
				-                    }
			
 
				-pod_0200.push({"stringified": "4.295&times;10^8 inches","mOutput": "Quantity[4.295×10^8,&amp;quot;Inches&amp;quot;]"});
			
 
				-
			
 
				-                  } catch(e) { }
			
 
				-                </script>
			
 
				-            </body>
			
 
				-        </html>
			
 
				-        """
			
 
				-        # test output with utf-8 character
			
 
				-        response = mock.Mock(text=html)
			
 
				-        results = wolframalpha_noapi.response(response)
			
 
				-        self.assertEqual(type(results), list)
			
 
				-        self.assertEqual(len(results), 2)
			
 
				-        self.assertIn('4.295×10^8 inches'.decode('utf-8'), results[0]['answer'])
			
 
				-        self.assertIn('distance from nairobi to kyoto in inches - Wolfram|Alpha', results[1]['title'])
			
 
				-        self.assertEquals('http://www.wolframalpha.com/input/?i=+distance+from+nairobi+to+kyoto+in+inches',
			
 
				-                          results[1]['url'])
			
 
				+        # TODO