|  | @@ -1,39 +1,61 @@
 | 
	
		
			
			|  | 1 | +## Piratebay (Videos, Music, Files)
 | 
	
		
			
			|  | 2 | +# 
 | 
	
		
			
			|  | 3 | +# @website     https://thepiratebay.se
 | 
	
		
			
			|  | 4 | +# @provide-api no (nothing found)
 | 
	
		
			
			|  | 5 | +# 
 | 
	
		
			
			|  | 6 | +# @using-api   no
 | 
	
		
			
			|  | 7 | +# @results     HTML (using search portal)
 | 
	
		
			
			|  | 8 | +# @stable      yes (HTML can change)
 | 
	
		
			
			|  | 9 | +# @parse       url, title, content, seed, leech, magnetlink
 | 
	
		
			
			|  | 10 | +
 | 
	
		
			
			| 1 | 11 |  from urlparse import urljoin
 | 
	
		
			
			| 2 | 12 |  from cgi import escape
 | 
	
		
			
			| 3 | 13 |  from urllib import quote
 | 
	
		
			
			| 4 | 14 |  from lxml import html
 | 
	
		
			
			| 5 | 15 |  from operator import itemgetter
 | 
	
		
			
			| 6 | 16 |  
 | 
	
		
			
			| 7 |  | -categories = ['videos', 'music']
 | 
	
		
			
			|  | 17 | +# engine dependent config
 | 
	
		
			
			|  | 18 | +categories = ['videos', 'music', 'files']
 | 
	
		
			
			|  | 19 | +paging = True
 | 
	
		
			
			| 8 | 20 |  
 | 
	
		
			
			|  | 21 | +# search-url
 | 
	
		
			
			| 9 | 22 |  url = 'https://thepiratebay.se/'
 | 
	
		
			
			| 10 | 23 |  search_url = url + 'search/{search_term}/{pageno}/99/{search_type}'
 | 
	
		
			
			| 11 |  | -search_types = {'videos': '200',
 | 
	
		
			
			|  | 24 | +
 | 
	
		
			
			|  | 25 | +# piratebay specific type-definitions
 | 
	
		
			
			|  | 26 | +search_types = {'files': '0',                
 | 
	
		
			
			| 12 | 27 |                  'music': '100',
 | 
	
		
			
			| 13 |  | -                'files': '0'}
 | 
	
		
			
			|  | 28 | +                'videos': '200'}
 | 
	
		
			
			| 14 | 29 |  
 | 
	
		
			
			|  | 30 | +# specific xpath variables
 | 
	
		
			
			| 15 | 31 |  magnet_xpath = './/a[@title="Download this torrent using magnet"]'
 | 
	
		
			
			| 16 | 32 |  content_xpath = './/font[@class="detDesc"]//text()'
 | 
	
		
			
			| 17 | 33 |  
 | 
	
		
			
			| 18 |  | -paging = True
 | 
	
		
			
			| 19 |  | -
 | 
	
		
			
			| 20 | 34 |  
 | 
	
		
			
			|  | 35 | +# do search-request
 | 
	
		
			
			| 21 | 36 |  def request(query, params):
 | 
	
		
			
			| 22 |  | -    search_type = search_types.get(params['category'], '200')
 | 
	
		
			
			|  | 37 | +    search_type = search_types.get(params['category'], '0')
 | 
	
		
			
			|  | 38 | +
 | 
	
		
			
			| 23 | 39 |      params['url'] = search_url.format(search_term=quote(query),
 | 
	
		
			
			| 24 | 40 |                                        search_type=search_type,
 | 
	
		
			
			| 25 | 41 |                                        pageno=params['pageno'] - 1)
 | 
	
		
			
			|  | 42 | +
 | 
	
		
			
			| 26 | 43 |      return params
 | 
	
		
			
			| 27 | 44 |  
 | 
	
		
			
			| 28 | 45 |  
 | 
	
		
			
			|  | 46 | +# get response from search-request
 | 
	
		
			
			| 29 | 47 |  def response(resp):
 | 
	
		
			
			| 30 | 48 |      results = []
 | 
	
		
			
			|  | 49 | +
 | 
	
		
			
			| 31 | 50 |      dom = html.fromstring(resp.text)
 | 
	
		
			
			|  | 51 | +
 | 
	
		
			
			| 32 | 52 |      search_res = dom.xpath('//table[@id="searchResult"]//tr')
 | 
	
		
			
			| 33 | 53 |  
 | 
	
		
			
			|  | 54 | +    # return empty array if nothing is found
 | 
	
		
			
			| 34 | 55 |      if not search_res:
 | 
	
		
			
			| 35 |  | -        return results
 | 
	
		
			
			|  | 56 | +        return []
 | 
	
		
			
			| 36 | 57 |  
 | 
	
		
			
			|  | 58 | +    # parse results
 | 
	
		
			
			| 37 | 59 |      for result in search_res[1:]:
 | 
	
		
			
			| 38 | 60 |          link = result.xpath('.//div[@class="detName"]//a')[0]
 | 
	
		
			
			| 39 | 61 |          href = urljoin(url, link.attrib.get('href'))
 | 
	
	
		
			
			|  | @@ -41,17 +63,21 @@ def response(resp):
 | 
	
		
			
			| 41 | 63 |          content = escape(' '.join(result.xpath(content_xpath)))
 | 
	
		
			
			| 42 | 64 |          seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]
 | 
	
		
			
			| 43 | 65 |  
 | 
	
		
			
			|  | 66 | +        # convert seed to int if possible
 | 
	
		
			
			| 44 | 67 |          if seed.isdigit():
 | 
	
		
			
			| 45 | 68 |              seed = int(seed)
 | 
	
		
			
			| 46 | 69 |          else:
 | 
	
		
			
			| 47 | 70 |              seed = 0
 | 
	
		
			
			| 48 | 71 |  
 | 
	
		
			
			|  | 72 | +        # convert leech to int if possible
 | 
	
		
			
			| 49 | 73 |          if leech.isdigit():
 | 
	
		
			
			| 50 | 74 |              leech = int(leech)
 | 
	
		
			
			| 51 | 75 |          else:
 | 
	
		
			
			| 52 | 76 |              leech = 0
 | 
	
		
			
			| 53 | 77 |  
 | 
	
		
			
			| 54 | 78 |          magnetlink = result.xpath(magnet_xpath)[0]
 | 
	
		
			
			|  | 79 | +
 | 
	
		
			
			|  | 80 | +        # append result
 | 
	
		
			
			| 55 | 81 |          results.append({'url': href,
 | 
	
		
			
			| 56 | 82 |                          'title': title,
 | 
	
		
			
			| 57 | 83 |                          'content': content,
 | 
	
	
		
			
			|  | @@ -60,4 +86,5 @@ def response(resp):
 | 
	
		
			
			| 60 | 86 |                          'magnetlink': magnetlink.attrib['href'],
 | 
	
		
			
			| 61 | 87 |                          'template': 'torrent.html'})
 | 
	
		
			
			| 62 | 88 |  
 | 
	
		
			
			|  | 89 | +    # return results sorted by seeder
 | 
	
		
			
			| 63 | 90 |      return sorted(results, key=itemgetter('seed'), reverse=True)
 |