瀏覽代碼

Wolfie kinda works using API

a01200356 9 年之前
父節點
當前提交
4578575c28
共有 4 個檔案被更改,包括 137 行新增1 行删除
  1. 60
    0
      searx/engines/wolframalpha.py
  2. 70
    0
      searx/engines/wolframalpha_api.py
  3. 1
    1
      searx/search.py
  4. 6
    0
      searx/settings.yml

+ 60
- 0
searx/engines/wolframalpha.py 查看文件

@@ -0,0 +1,60 @@
1
+"""
2
+ WolframAlpha
3
+
4
+ @website     http://www.wolframalpha.com/
5
+
6
+ @using-api   yes
7
+ @results     no c
8
+ @stable      i guess so
9
+ @parse       result
10
+"""
11
+
12
+import wolframalpha
13
+
14
+# engine dependent config
15
+paging = False
16
+
17
+# search-url
18
+# url = 'http://www.wolframalpha.com/'
19
+# search_url = url+'input/?{query}'
20
+
21
+client_id = '5952JX-X52L3VKWT8'
22
+'''
23
+# do search-request
24
+def request(query, params):
25
+    params['url'] = search_url.format(query=urlencode({'i': query}))
26
+    print params
27
+
28
+    return params
29
+
30
+
31
+# get response from search-request
32
+def response(resp):
33
+    print resp
34
+
35
+    dom = html.fromstring(resp.text)
36
+    #resshit = dom.find_class('output pnt')
37
+    #for shit in resshit:
38
+        #print shit.text_content()
39
+    results = []
40
+    #results.append({'url': 'https://wikipedia.org', 'title': 'Wolfie, lol', 'content': 'es kwatro'})
41
+    #print results
42
+    #return results
43
+
44
+    # parse results
45
+    for result in dom.xpath(results_xpath):
46
+        print result
47
+        
48
+        link = result.xpath(link_xpath)[0]
49
+        href = urljoin(url, link.attrib.get('href'))
50
+        title = escape(extract_text(link))
51
+        content = escape(extract_text(result.xpath(content_xpath)))
52
+
53
+        # append result
54
+        results.append({'url': href,
55
+                        'title': title,
56
+                        'content': content})
57
+
58
+    print results
59
+    return results
60
+'''

+ 70
- 0
searx/engines/wolframalpha_api.py 查看文件

@@ -0,0 +1,70 @@
1
+# Wolfram Alpha (Maths)
2
+#
3
+# @website     http://www.wolframalpha.com
4
+# @provide-api yes (http://api.wolframalpha.com/v2/)
5
+#
6
+# @using-api   yes
7
+# @results     XML
8
+# @stable      yes
9
+# @parse       result
10
+
11
+from urllib import urlencode
12
+from lxml import etree
13
+from searx.engines.xpath import extract_text
14
+from searx.utils import html_to_text
15
+
16
+# search-url
17
+base_url = 'http://api.wolframalpha.com/v2/query'
18
+search_url = base_url + '?appid={api_key}&{query}&format=plaintext'
19
+site_url = 'http://wolframalpha.com/input/?{query}'
20
+
21
+#embedded_url = '<iframe width="540" height="304" ' +\
22
+#    'data-src="//www.youtube-nocookie.com/embed/{videoid}" ' +\
23
+#    'frameborder="0" allowfullscreen></iframe>'
24
+
25
+# do search-request
26
+def request(query, params):
27
+    params['url'] = search_url.format(query=urlencode({'input': query}),
28
+                                      api_key=api_key)
29
+
30
+    # need this for url in response
31
+    global my_query
32
+    my_query = query
33
+
34
+    return params
35
+
36
+# replace private user area characters to make text legible
37
+def replace_pua_chars(text):
38
+    pua_chars = { u'\uf74c': 'd',
39
+                  u'\uf74d': u'\u212f',
40
+                  u'\uf74e': 'i',
41
+                  u'\uf7d9': '=' }
42
+
43
+    for k, v in pua_chars.iteritems():
44
+        text = text.replace(k, v)
45
+
46
+    return text
47
+
48
+# get response from search-request
49
+def response(resp):
50
+    results = []
51
+
52
+    search_results = etree.XML(resp.content)
53
+
54
+    # return empty array if there are no results
55
+    if search_results.xpath('/queryresult[attribute::success="false"]'):
56
+        return []
57
+
58
+    # parse result
59
+    result = search_results.xpath('//pod[attribute::primary="true"]/subpod/plaintext')[0].text
60
+    result = replace_pua_chars(result)
61
+
62
+    # bind url from site
63
+    result_url = site_url.format(query=urlencode({'i': my_query}))
64
+
65
+    # append result
66
+    results.append({'url': result_url,
67
+                    'title': result})
68
+
69
+    # return results
70
+    return results

+ 1
- 1
searx/search.py 查看文件

@@ -98,7 +98,7 @@ def make_callback(engine_name, callback, params, result_container):
98 98
         with threading.RLock():
99 99
             engines[engine_name].stats['page_load_time'] += search_duration
100 100
 
101
-        timeout_overhead = 0.2  # seconds
101
+        timeout_overhead = 0.5  # seconds
102 102
         timeout_limit = engines[engine_name].timeout + timeout_overhead
103 103
 
104 104
         if search_duration > timeout_limit:

+ 6
- 0
searx/settings.yml 查看文件

@@ -300,6 +300,12 @@ engines:
300 300
     engine : vimeo
301 301
     shortcut : vm
302 302
 
303
+  - name : wolframalpha
304
+    shortcut : wa
305
+    engine : wolframalpha_api
306
+    api_key: '5952JX-X52L3VKWT8'
307
+    timeout: 6.0
308
+
303 309
 #The blekko technology and team have joined IBM Watson! -> https://blekko.com/
304 310
 #  - name : blekko images
305 311
 #    engine : blekko_images