Browse Source

Added seepeer to config and added seepeer search parser

Pydo 8 years ago
parent
commit
2c2123b2e8
2 changed files with 82 additions and 0 deletions
  1. 78
    0
      searx/engines/seedpeer.py
  2. 4
    0
      searx/settings.yml

+ 78
- 0
searx/engines/seedpeer.py View File

@@ -0,0 +1,78 @@
1
+#  Seedpeer (Videos, Music, Files)
2
+#
3
+# @website     http://seedpeer.eu
4
+# @provide-api no (nothing found)
5
+#
6
+# @using-api   no
7
+# @results     HTML (using search portal)
8
+# @stable      yes (HTML can change)
9
+# @parse       url, title, content, seed, leech, magnetlink
10
+
11
+from urlparse import urljoin
12
+from cgi import escape
13
+from urllib import quote
14
+from lxml import html
15
+from operator import itemgetter
16
+from searx.engines.xpath import extract_text
17
+
18
+
19
+url = 'http://www.seedpeer.eu/'
20
+search_url = url + 'search/{search_term}/7/{page_no}.html'
21
+# specific xpath variables
22
+torrent_xpath = '//*[@id="body"]/center/center/table[2]/tr/td/a'
23
+alternative_torrent_xpath = '//*[@id="body"]/center/center/table[1]/tr/td/a'
24
+title_xpath = '//*[@id="body"]/center/center/table[2]/tr/td/a/text()'
25
+alternative_title_xpath = '//*[@id="body"]/center/center/table/tr/td/a'
26
+seeds_xpath = '//*[@id="body"]/center/center/table[2]/tr/td[4]/font/text()'
27
+alternative_seeds_xpath = '//*[@id="body"]/center/center/table/tr/td[4]/font/text()'
28
+peers_xpath = '//*[@id="body"]/center/center/table[2]/tr/td[5]/font/text()'
29
+alternative_peers_xpath = '//*[@id="body"]/center/center/table/tr/td[5]/font/text()'
30
+age_xpath = '//*[@id="body"]/center/center/table[2]/tr/td[2]/text()'
31
+alternative_age_xpath = '//*[@id="body"]/center/center/table/tr/td[2]/text()'
32
+size_xpath = '//*[@id="body"]/center/center/table[2]/tr/td[3]/text()'
33
+alternative_size_xpath = '//*[@id="body"]/center/center/table/tr/td[3]/text()'
34
+
35
+
36
+# do search-request
37
+def request(query, params):
38
+    params['url'] = search_url.format(search_term=quote(query),
39
+                                      page_no=params['pageno'] - 1)
40
+    return params
41
+
42
+
43
+# get response from search-request
44
+def response(resp):
45
+    results = []
46
+    dom = html.fromstring(resp.text)
47
+    torrent_links = dom.xpath(torrent_xpath)
48
+    if len(torrent_links) > 0:
49
+        seeds = dom.xpath(seeds_xpath)
50
+        peers = dom.xpath(peers_xpath)
51
+        titles = dom.xpath(title_xpath)
52
+        sizes = dom.xpath(size_xpath)
53
+        ages = dom.xpath(age_xpath)
54
+    else:  # under ~5 results uses a different xpath
55
+        torrent_links = dom.xpath(alternative_torrent_xpath)
56
+        seeds = dom.xpath(alternative_seeds_xpath)
57
+        peers = dom.xpath(alternative_peers_xpath)
58
+        titles = dom.xpath(alternative_title_xpath)
59
+        sizes = dom.xpath(alternative_size_xpath)
60
+        ages = dom.xpath(alternative_age_xpath)
61
+    # return empty array if nothing is found
62
+    if not torrent_links:
63
+        return []
64
+
65
+    # parse results
66
+    for index, result in enumerate(torrent_links):
67
+        link = result.attrib.get('href')
68
+        href = urljoin(url, link)
69
+        results.append({'url': href,
70
+                        'title': titles[index].text_content(),
71
+                        'content': '{}, {}'.format(sizes[index], ages[index]),
72
+                        'seed': seeds[index],
73
+                        'leech': peers[index],
74
+
75
+                        'template': 'torrent.html'})
76
+
77
+    # return results sorted by seeder
78
+    return sorted(results, key=itemgetter('seed'), reverse=True)

+ 4
- 0
searx/settings.yml View File

@@ -495,6 +495,10 @@ engines:
495 495
     timeout: 6.0
496 496
     categories : science
497 497
 
498
+  - name : seedpeer
499
+    engine : seedpeer
500
+    shortcut: speu
501
+
498 502
 #The blekko technology and team have joined IBM Watson! -> https://blekko.com/
499 503
 #  - name : blekko images
500 504
 #    engine : blekko_images