소스 검색

Merge pull request #158 from Cqoicebordel/Moar-Engines

Add 500px and Searchcode engines
Adam Tauber 10 년 전
부모
커밋
0b3d632cd0
4개의 변경된 파일183개의 추가작업 그리고 0개의 파일을 삭제
  1. 57
    0
      searx/engines/500px.py
  2. 65
    0
      searx/engines/searchcode_code.py
  3. 49
    0
      searx/engines/searchcode_doc.py
  4. 12
    0
      searx/settings.yml

+ 57
- 0
searx/engines/500px.py 파일 보기

@@ -0,0 +1,57 @@
1
+## 500px (Images)
2
+#
3
+# @website     https://500px.com
4
+# @provide-api yes (https://developers.500px.com/)
5
+#
6
+# @using-api   no
7
+# @results     HTML
8
+# @stable      no (HTML can change)
9
+# @parse       url, title, thumbnail, img_src, content
10
+#
11
+# @todo        rewrite to api
12
+
13
+
14
+from urllib import urlencode
15
+from urlparse import urljoin
16
+from lxml import html
17
+
18
+# engine dependent config
19
+categories = ['images']
20
+paging = True
21
+
22
+# search-url
23
+base_url = 'https://500px.com'
24
+search_url = base_url+'/search?search?page={pageno}&type=photos&{query}'
25
+
26
+
27
+# do search-request
28
+def request(query, params):
29
+    params['url'] = search_url.format(pageno=params['pageno'],
30
+                                      query=urlencode({'q': query}))
31
+
32
+    return params
33
+
34
+
35
+# get response from search-request
36
+def response(resp):
37
+    results = []
38
+    
39
+    dom = html.fromstring(resp.text)
40
+    
41
+    # parse results
42
+    for result in dom.xpath('//div[@class="photo"]'):
43
+        link = result.xpath('.//a')[0]
44
+        url = urljoin(base_url, link.attrib.get('href'))
45
+        title = result.xpath('.//div[@class="title"]//text()')[0]
46
+        img_src = link.xpath('.//img')[0].attrib['src']
47
+        content = result.xpath('.//div[@class="info"]//text()')[0]
48
+
49
+        # append result
50
+        results.append({'url': url,
51
+                        'title': title,
52
+                        'img_src': img_src,
53
+                        'content': content,
54
+                        'template': 'images.html'})
55
+
56
+    # return results
57
+    return results

+ 65
- 0
searx/engines/searchcode_code.py 파일 보기

@@ -0,0 +1,65 @@
1
+## Searchcode (It)
2
+#
3
+# @website     https://searchcode.com/
4
+# @provide-api yes (https://searchcode.com/api/)
5
+#
6
+# @using-api   yes
7
+# @results     JSON
8
+# @stable      yes
9
+# @parse       url, title, content
10
+
11
+from urllib import urlencode
12
+from json import loads
13
+import cgi
14
+import re
15
+
16
+# engine dependent config
17
+categories = ['it']
18
+paging = True
19
+
20
+# search-url
21
+url = 'https://searchcode.com/'
22
+search_url = url+'api/codesearch_I/?{query}&p={pageno}'
23
+
24
+
25
+# do search-request
26
+def request(query, params):
27
+    params['url'] = search_url.format(query=urlencode({'q': query}),
28
+                                      pageno=params['pageno']-1)
29
+
30
+    return params
31
+
32
+
33
+# get response from search-request
34
+def response(resp):
35
+    results = []
36
+    
37
+    search_results = loads(resp.text)
38
+
39
+    # parse results
40
+    for result in search_results['results']:
41
+        href = result['url']
42
+        title = "" + result['name'] + " - " + result['filename']
43
+        content = result['repo'] + "<br />"
44
+        
45
+        lines = dict()
46
+        for line, code in result['lines'].items():
47
+            lines[int(line)] = code
48
+
49
+        content = content + '<pre class="code-formatter"><table class="code">'
50
+        for line, code in sorted(lines.items()):
51
+            content = content + '<tr><td class="line-number" style="padding-right:5px;">' 
52
+            content = content + str(line) + '</td><td class="code-snippet">' 
53
+            # Replace every two spaces with ' &nbps;' to keep formatting while allowing the browser to break the line if necessary
54
+            content = content + cgi.escape(code).replace('\t', '    ').replace('  ', '&nbsp; ').replace('  ', ' &nbsp;') 
55
+            content = content + "</td></tr>"
56
+            
57
+        content = content + "</table></pre>"
58
+        
59
+        # append result
60
+        results.append({'url': href,
61
+                        'title': title,
62
+                        'content': content})
63
+
64
+    # return results
65
+    return results

+ 49
- 0
searx/engines/searchcode_doc.py 파일 보기

@@ -0,0 +1,49 @@
1
+## Searchcode (It)
2
+#
3
+# @website     https://searchcode.com/
4
+# @provide-api yes (https://searchcode.com/api/)
5
+#
6
+# @using-api   yes
7
+# @results     JSON
8
+# @stable      yes
9
+# @parse       url, title, content
10
+
11
+from urllib import urlencode
12
+from json import loads
13
+
14
+# engine dependent config
15
+categories = ['it']
16
+paging = True
17
+
18
+# search-url
19
+url = 'https://searchcode.com/'
20
+search_url = url+'api/search_IV/?{query}&p={pageno}'
21
+
22
+
23
+# do search-request
24
+def request(query, params):
25
+    params['url'] = search_url.format(query=urlencode({'q': query}),
26
+                                      pageno=params['pageno']-1)
27
+
28
+    return params
29
+
30
+
31
+# get response from search-request
32
+def response(resp):
33
+    results = []
34
+    
35
+    search_results = loads(resp.text)
36
+
37
+    # parse results
38
+    for result in search_results['results']:
39
+        href = result['url']
40
+        title = "[" + result['type'] + "] " + result['namespace'] + " " + result['name']
41
+        content = '<span class="highlight">[' + result['type'] + "] " + result['name'] + " " + result['synopsis'] + "</span><br />" + result['description']
42
+        
43
+        # append result
44
+        results.append({'url': href,
45
+                        'title': title,
46
+                        'content': content})
47
+
48
+    # return results
49
+    return results

+ 12
- 0
searx/settings.yml 파일 보기

@@ -64,6 +64,10 @@ engines:
64 64
 #    engine : filecrop
65 65
 #    categories : files
66 66
 #    shortcut : fc
67
+    
68
+  - name : 500px
69
+    engine : 500px
70
+    shortcut : px
67 71
 
68 72
   - name : flickr
69 73
     engine : flickr
@@ -114,6 +118,14 @@ engines:
114 118
   - name : stackoverflow
115 119
     engine : stackoverflow
116 120
     shortcut : st
121
+    
122
+  - name : searchcode doc
123
+    engine : searchcode_doc
124
+    shortcut : scd
125
+    
126
+  - name : searchcode code
127
+    engine : searchcode_code
128
+    shortcut : scc
117 129
 
118 130
   - name : startpage
119 131
     engine : startpage