Browse Source

Remove unnecessary code in wolframalpha_noapi engine

The answer is scraped from a js function, so parsing the html tree
doesn't achieve anything here.
a01200356 9 years ago
parent
commit
d827fc49a1
1 changed files with 18 additions and 31 deletions
  1. 18
    31
      searx/engines/wolframalpha_noapi.py

+ 18
- 31
searx/engines/wolframalpha_noapi.py View File

@@ -10,8 +10,6 @@
10 10
 import re
11 11
 import json
12 12
 from urllib import urlencode
13
-from lxml import html
14
-from searx.engines.xpath import extract_text
15 13
 
16 14
 # search-url
17 15
 url = 'http://www.wolframalpha.com/'
@@ -25,42 +23,31 @@ def request(query, params):
25 23
     return params
26 24
 
27 25
 
28
-# tries to find answer under the pattern given
29
-def extract_answer(script_list, pattern):
30
-    answer = None
26
+# get response from search-request
27
+def response(resp):
28
+    results = []
29
+    
30
+    # the answer is inside a js function
31
+    # answer can be located in different 'pods', although by default it should be in pod_0200
32
+    possible_locations = ['pod_0200\.push(.*)\n',
33
+                          'pod_0100\.push(.*)\n']
31 34
 
32 35
     # get line that matches the pattern
33
-    for script in script_list:
36
+    for pattern in possible_locations:
34 37
         try:
35
-            line = re.search(pattern, script.text_content()).group(1)
38
+            line = re.search(pattern, resp.text).group(1)
39
+            break
36 40
         except AttributeError:
37 41
             continue
38 42
 
39
-        # extract answer from json
40
-        answer = line[line.find('{') : line.rfind('}')+1]
41
-        answer = json.loads(answer.encode('unicode-escape'))
42
-        answer = answer['stringified'].decode('unicode-escape')
43
-
44
-    return answer
45
-
43
+    if not line:
44
+        return results
46 45
 
47
-# get response from search-request
48
-def response(resp):
49
-
50
-    dom = html.fromstring(resp.text)
51
-
52
-    # the answer is inside a js script
53
-    scripts = dom.xpath('//script')
46
+    # extract answer from json
47
+    answer = line[line.find('{') : line.rfind('}')+1]
48
+    answer = json.loads(answer.encode('unicode-escape'))
49
+    answer = answer['stringified'].decode('unicode-escape')
54 50
 
55
-    results = []
56
-
57
-    # answer can be located in different 'pods', although by default it should be in pod_0200
58
-    answer = extract_answer(scripts, 'pod_0200\.push(.*)\n')
59
-    if not answer:
60
-        answer = extract_answer(scripts, 'pod_0100\.push(.*)\n')
61
-        if answer:
62
-            results.append({'answer': answer})
63
-    else:
64
-        results.append({'answer': answer})
51
+    results.append({'answer': answer})
65 52
     
66 53
     return results