Browse Source

Merge pull request #194 from Cqoicebordel/moar-engines

Moar engines
Adam Tauber 10 years ago
parent
commit
c169fc3aa2
4 changed files with 178 additions and 0 deletions
  1. 109
    0
      searx/engines/btdigg.py
  2. 59
    0
      searx/engines/mixcloud.py
  3. 8
    0
      searx/settings.yml
  4. 2
    0
      searx/templates/oscar/macros.html

+ 109
- 0
searx/engines/btdigg.py View File

@@ -0,0 +1,109 @@
1
+## BTDigg (Videos, Music, Files)
2
+#
3
+# @website     https://btdigg.org
4
+# @provide-api yes (on demand)
5
+#
6
+# @using-api   no
7
+# @results     HTML (using search portal)
8
+# @stable      no (HTML can change)
9
+# @parse       url, title, content, seed, leech, magnetlink
10
+
11
+from urlparse import urljoin
12
+from cgi import escape
13
+from urllib import quote
14
+from lxml import html
15
+from operator import itemgetter
16
+from searx.engines.xpath import extract_text
17
+
18
+# engine dependent config
19
+categories = ['videos', 'music', 'files']
20
+paging = True
21
+
22
+# search-url
23
+url = 'https://btdigg.org'
24
+search_url = url + '/search?q=22%20jump%20street&p=1'
25
+
26
+# specific xpath variables
27
+magnet_xpath = './/a[@title="Torrent magnet link"]'
28
+torrent_xpath = './/a[@title="Download torrent file"]'
29
+content_xpath = './/span[@class="font11px lightgrey block"]'
30
+
31
+
32
+# do search-request
33
+def request(query, params):
34
+    params['url'] = search_url.format(search_term=quote(query),
35
+                                      pageno=params['pageno']-1)
36
+
37
+    return params
38
+
39
+
40
+# get response from search-request
41
+def response(resp):
42
+    results = []
43
+
44
+    dom = html.fromstring(resp.text)
45
+
46
+    search_res = dom.xpath('//div[@id="search_res"]/table/tr')
47
+
48
+    # return empty array if nothing is found
49
+    if not search_res:
50
+        return []
51
+
52
+    # parse results
53
+    for result in search_res:
54
+        link = result.xpath('.//td[@class="torrent_name"]//a')[0]
55
+        href = urljoin(url, link.attrib['href'])
56
+        title = escape(extract_text(link.xpath('.//text()')))
57
+        content = escape(extract_text(result.xpath('.//pre[@class="snippet"]')[0]))
58
+        content = "<br />".join(content.split("\n"))
59
+
60
+        filesize = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[0]
61
+        filesize_multiplier = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[1]
62
+        files = result.xpath('.//span[@class="attr_val"]/text()')[1]
63
+        seed = result.xpath('.//span[@class="attr_val"]/text()')[2]
64
+
65
+        # convert seed to int if possible
66
+        if seed.isdigit():
67
+            seed = int(seed)
68
+        else:
69
+            seed = 0
70
+
71
+        leech = 0
72
+
73
+        # convert filesize to byte if possible
74
+        try:
75
+            filesize = float(filesize)
76
+
77
+            # convert filesize to byte
78
+            if filesize_multiplier == 'TB':
79
+                filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
80
+            elif filesize_multiplier == 'GB':
81
+                filesize = int(filesize * 1024 * 1024 * 1024)
82
+            elif filesize_multiplier == 'MB':
83
+                filesize = int(filesize * 1024 * 1024)
84
+            elif filesize_multiplier == 'kb':
85
+                filesize = int(filesize * 1024)
86
+        except:
87
+            filesize = None
88
+
89
+        # convert files to int if possible
90
+        if files.isdigit():
91
+            files = int(files)
92
+        else:
93
+            files = None
94
+
95
+        magnetlink = result.xpath('.//td[@class="ttth"]//a')[0].attrib['href']
96
+
97
+        # append result
98
+        results.append({'url': href,
99
+                        'title': title,
100
+                        'content': content,
101
+                        'seed': seed,
102
+                        'leech': leech,
103
+                        'filesize': filesize,
104
+                        'files': files,
105
+                        'magnetlink': magnetlink,
106
+                        'template': 'torrent.html'})
107
+
108
+    # return results sorted by seeder
109
+    return sorted(results, key=itemgetter('seed'), reverse=True)

+ 59
- 0
searx/engines/mixcloud.py View File

@@ -0,0 +1,59 @@
1
+## Mixcloud (Music)
2
+#
3
+# @website     https://http://www.mixcloud.com/
4
+# @provide-api yes (http://www.mixcloud.com/developers/
5
+#
6
+# @using-api   yes
7
+# @results     JSON
8
+# @stable      yes
9
+# @parse       url, title, content, embedded, publishedDate
10
+
11
+from json import loads
12
+from urllib import urlencode
13
+from dateutil import parser
14
+
15
+# engine dependent config
16
+categories = ['music']
17
+paging = True
18
+
19
+# search-url
20
+url = 'http://api.mixcloud.com/'
21
+search_url = url + 'search/?{query}&type=cloudcast&limit=10&offset={offset}'
22
+
23
+embedded_url = '<iframe scrolling="no" frameborder="0" allowTransparency="true" ' +\
24
+    'data-src="https://www.mixcloud.com/widget/iframe/?feed={url}" width="300" height="300"></iframe>'
25
+
26
+
27
+# do search-request
28
+def request(query, params):
29
+    offset = (params['pageno'] - 1) * 10
30
+
31
+    params['url'] = search_url.format(query=urlencode({'q': query}),
32
+                                      offset=offset)
33
+
34
+    return params
35
+
36
+
37
+# get response from search-request
38
+def response(resp):
39
+    results = []
40
+
41
+    search_res = loads(resp.text)
42
+
43
+    # parse results
44
+    for result in search_res.get('data', []):
45
+        title = result['name']
46
+        url = result['url']
47
+        content = result['user']['name']
48
+        embedded = embedded_url.format(url=url)
49
+        publishedDate = parser.parse(result['created_time'])
50
+
51
+        # append result
52
+        results.append({'url': url,
53
+                        'title': title,
54
+                        'embedded': embedded,
55
+                        'publishedDate': publishedDate,
56
+                        'content': content})
57
+
58
+    # return results
59
+    return results

+ 8
- 0
searx/settings.yml View File

@@ -33,6 +33,10 @@ engines:
33 33
     locale : en-US
34 34
     shortcut : bin
35 35
 
36
+  - name : btdigg
37
+    engine : btdigg
38
+    shortcut : bt
39
+
36 40
   - name : currency
37 41
     engine : currency_convert
38 42
     categories : general
@@ -136,6 +140,10 @@ engines:
136 140
     categories : music
137 141
     shortcut : gps
138 142
 
143
+  - name : mixcloud
144
+    engine : mixcloud
145
+    shortcut : mc
146
+
139 147
   - name : openstreetmap
140 148
     engine : openstreetmap
141 149
     shortcut : osm

+ 2
- 0
searx/templates/oscar/macros.html View File

@@ -18,6 +18,8 @@
18 18
 {% macro result_sub_header(result) -%}
19 19
     {% if result.publishedDate %}<time class="text-muted" datetime="{{ result.pubdate }}" >{{ result.publishedDate }}</time>{% endif %}
20 20
     <small><a class="text-info" href="https://web.archive.org/web/{{ result.url }}">{{ icon('link') }} {{ _('cached') }}</a></small>
21
+    {% if result.magnetlink %}<small> &bull; <a href="{{ result.magnetlink }}" class="magnetlink">{{ icon('magnet') }} {{ _('magnet link') }}</a></small>{% endif %}
22
+    {% if result.torrentfile %}<small> &bull; <a href="{{ result.torrentfile }}" class="torrentfile">{{ icon('download-alt') }} {{ _('torrent file') }}</a></small>{% endif %}
21 23
 {%- endmacro %}
22 24
 
23 25
 <!-- Draw result footer -->