瀏覽代碼

Add 500px and Searchcode engines

Allow to search for images on 500px. It doesn't use the official API, but the page result. Less stable, but less API key to possess...

Two engines were necessary for Searchcode because there are to search mode : search for documentation or search for code example. Both use open APIs.
Cqoicebordel 10 年之前
父節點
當前提交
56399cf1ea
共有 4 個文件被更改,包括 183 次插入0 次删除
  1. 57
    0
      searx/engines/500px.py
  2. 65
    0
      searx/engines/searchcode_code.py
  3. 49
    0
      searx/engines/searchcode_doc.py
  4. 12
    0
      searx/settings.yml

+ 57
- 0
searx/engines/500px.py 查看文件

@@ -0,0 +1,57 @@
1
+## 500px (Images)
2
+#
3
+# @website     https://500px.com
4
+# @provide-api yes (https://developers.500px.com/)
5
+#
6
+# @using-api   no
7
+# @results     HTML
8
+# @stable      no (HTML can change)
9
+# @parse       url, title, thumbnail, img_src, content
10
+#
11
+# @todo        rewrite to api
12
+
13
+
14
+from urllib import urlencode
15
+from urlparse import urljoin
16
+from lxml import html
17
+
18
+# engine dependent config
19
+categories = ['images']
20
+paging = True
21
+
22
+# search-url
23
+base_url = 'https://500px.com'
24
+search_url = base_url+'/search?search?page={pageno}&type=photos&{query}'
25
+
26
+
27
+# do search-request
28
+def request(query, params):
29
+    params['url'] = search_url.format(pageno=params['pageno'],
30
+                                      query=urlencode({'q': query}))
31
+
32
+    return params
33
+
34
+
35
+# get response from search-request
36
+def response(resp):
37
+    results = []
38
+    
39
+    dom = html.fromstring(resp.text)
40
+    
41
+    # parse results
42
+    for result in dom.xpath('//div[@class="photo"]'):
43
+        link = result.xpath('.//a')[0]
44
+        url = urljoin(base_url, link.attrib.get('href'))
45
+        title = result.xpath('.//div[@class="title"]//text()')[0]
46
+        img_src = link.xpath('.//img')[0].attrib['src']
47
+        content = result.xpath('.//div[@class="info"]//text()')[0]
48
+
49
+        # append result
50
+        results.append({'url': url,
51
+                        'title': title,
52
+                        'img_src': img_src,
53
+                        'content': content,
54
+                        'template': 'images.html'})
55
+
56
+    # return results
57
+    return results

+ 65
- 0
searx/engines/searchcode_code.py 查看文件

@@ -0,0 +1,65 @@
1
+## Searchcode (It)
2
+#
3
+# @website     https://searchcode.com/
4
+# @provide-api yes (https://searchcode.com/api/)
5
+#
6
+# @using-api   yes
7
+# @results     JSON
8
+# @stable      yes
9
+# @parse       url, title, content
10
+
11
+from urllib import urlencode
12
+from json import loads
13
+import cgi
14
+import re
15
+
16
+# engine dependent config
17
+categories = ['it']
18
+paging = True
19
+
20
+# search-url
21
+url = 'https://searchcode.com/'
22
+search_url = url+'api/codesearch_I/?{query}&p={pageno}'
23
+
24
+
25
+# do search-request
26
+def request(query, params):
27
+    params['url'] = search_url.format(query=urlencode({'q': query}),
28
+                                      pageno=params['pageno']-1)
29
+
30
+    return params
31
+
32
+
33
+# get response from search-request
34
+def response(resp):
35
+    results = []
36
+    
37
+    search_results = loads(resp.text)
38
+
39
+    # parse results
40
+    for result in search_results['results']:
41
+        href = result['url']
42
+        title = "" + result['name'] + " - " + result['filename']
43
+        content = result['repo'] + "<br />"
44
+        
45
+        lines = dict()
46
+        for line, code in result['lines'].items():
47
+            lines[int(line)] = code
48
+
49
+        content = content + '<pre class="code-formatter"><table class="code">'
50
+        for line, code in sorted(lines.items()):
51
+            content = content + '<tr><td class="line-number" style="padding-right:5px;">' 
52
+            content = content + str(line) + '</td><td class="code-snippet">' 
53
+            # Replace every two spaces with ' &nbps;' to keep formatting while allowing the browser to break the line if necessary
54
+            content = content + cgi.escape(code).replace('\t', '    ').replace('  ', '&nbsp; ').replace('  ', ' &nbsp;') 
55
+            content = content + "</td></tr>"
56
+            
57
+        content = content + "</table></pre>"
58
+        
59
+        # append result
60
+        results.append({'url': href,
61
+                        'title': title,
62
+                        'content': content})
63
+
64
+    # return results
65
+    return results

+ 49
- 0
searx/engines/searchcode_doc.py 查看文件

@@ -0,0 +1,49 @@
1
+## Searchcode (It)
2
+#
3
+# @website     https://searchcode.com/
4
+# @provide-api yes (https://searchcode.com/api/)
5
+#
6
+# @using-api   yes
7
+# @results     JSON
8
+# @stable      yes
9
+# @parse       url, title, content
10
+
11
+from urllib import urlencode
12
+from json import loads
13
+
14
+# engine dependent config
15
+categories = ['it']
16
+paging = True
17
+
18
+# search-url
19
+url = 'https://searchcode.com/'
20
+search_url = url+'api/search_IV/?{query}&p={pageno}'
21
+
22
+
23
+# do search-request
24
+def request(query, params):
25
+    params['url'] = search_url.format(query=urlencode({'q': query}),
26
+                                      pageno=params['pageno']-1)
27
+
28
+    return params
29
+
30
+
31
+# get response from search-request
32
+def response(resp):
33
+    results = []
34
+    
35
+    search_results = loads(resp.text)
36
+
37
+    # parse results
38
+    for result in search_results['results']:
39
+        href = result['url']
40
+        title = "[" + result['type'] + "] " + result['namespace'] + " " + result['name']
41
+        content = '<span class="highlight">[' + result['type'] + "] " + result['name'] + " " + result['synopsis'] + "</span><br />" + result['description']
42
+        
43
+        # append result
44
+        results.append({'url': href,
45
+                        'title': title,
46
+                        'content': content})
47
+
48
+    # return results
49
+    return results

+ 12
- 0
searx/settings.yml 查看文件

@@ -64,6 +64,10 @@ engines:
64 64
 #    engine : filecrop
65 65
 #    categories : files
66 66
 #    shortcut : fc
67
+    
68
+  - name : 500px
69
+    engine : 500px
70
+    shortcut : px
67 71
 
68 72
   - name : flickr
69 73
     engine : flickr
@@ -114,6 +118,14 @@ engines:
114 118
   - name : stackoverflow
115 119
     engine : stackoverflow
116 120
     shortcut : st
121
+    
122
+  - name : searchcode doc
123
+    engine : searchcode_doc
124
+    shortcut : scd
125
+    
126
+  - name : searchcode code
127
+    engine : searchcode_code
128
+    shortcut : scc
117 129
 
118 130
   - name : startpage
119 131
     engine : startpage