Browse Source

Wolfram Alpha (no API needed now)

a01200356 9 years ago
parent
commit
b51ba32f61
2 changed files with 70 additions and 4 deletions
  1. 66
    0
      searx/engines/wolframalpha_noapi.py
  2. 4
    4
      searx/settings.yml

+ 66
- 0
searx/engines/wolframalpha_noapi.py View File

@@ -0,0 +1,66 @@
1
+# WolframAlpha (Maths)
2
+#
3
+# @website     http://www.wolframalpha.com/
4
+#
5
+# @using-api   no
6
+# @results     HTML, JS
7
+# @stable      no
8
+# @parse       answer
9
+
10
+import re
11
+import json
12
+from urllib import urlencode
13
+from lxml import html
14
+from searx.engines.xpath import extract_text
15
+
16
+# search-url
17
+url = 'http://www.wolframalpha.com/'
18
+search_url = url+'input/?{query}'
19
+
20
+
21
+# do search-request
22
+def request(query, params):
23
+    params['url'] = search_url.format(query=urlencode({'i': query}))
24
+
25
+    return params
26
+
27
+
28
+# tries to find answer under the pattern given
29
+def extract_answer(script_list, pattern):
30
+    answer = None
31
+
32
+    # get line that matches the pattern
33
+    for script in script_list:
34
+        try:
35
+            line = re.search(pattern, script.text_content()).group(1)
36
+        except AttributeError:
37
+            continue
38
+
39
+        # extract answer from json
40
+        answer = line[line.find('{') : line.rfind('}')+1]
41
+        answer = json.loads(answer.encode('unicode-escape'))
42
+        answer = answer['stringified'].decode('unicode-escape')
43
+
44
+    return answer
45
+
46
+
47
+# get response from search-request
48
+def response(resp):
49
+
50
+    dom = html.fromstring(resp.text)
51
+
52
+    # the answer is inside a js script
53
+    scripts = dom.xpath('//script')
54
+
55
+    results = []
56
+
57
+    # answer can be located in different 'pods', although by default it should be in pod_0200
58
+    answer = extract_answer(scripts, 'pod_0200\.push(.*)\n')
59
+    if not answer:
60
+        answer = extract_answer(scripts, 'pod_0100\.push(.*)\n')
61
+        if answer:
62
+            results.append({'answer': answer})
63
+    else:
64
+        results.append({'answer': answer})
65
+    
66
+    return results

+ 4
- 4
searx/settings.yml View File

@@ -302,11 +302,11 @@ engines:
302 302
 
303 303
 # You can use the engine using the official stable API, but you need an API key
304 304
 # See : http://products.wolframalpha.com/api/
305
-#  - name : wolframalpha
306
-#    shortcut : wa
307
-#    engine : wolframalpha_api
305
+  - name : wolframalpha
306
+    shortcut : wa
307
+    engine : wolframalpha_noapi
308 308
 #    api_key: 'apikey' # required!
309
-#    timeout: 6.0
309
+    timeout: 6.0
310 310
 
311 311
 #The blekko technology and team have joined IBM Watson! -> https://blekko.com/
312 312
 #  - name : blekko images