|  | @@ -1,35 +1,60 @@
 | 
	
		
			
			|  | 1 | +## General Files (Files)
 | 
	
		
			
			|  | 2 | +# 
 | 
	
		
			
			|  | 3 | +# @website     http://www.general-files.org
 | 
	
		
			
			|  | 4 | +# @provide-api no (nothing found)
 | 
	
		
			
			|  | 5 | +# 
 | 
	
		
			
			|  | 6 | +# @using-api   no (because nothing found)
 | 
	
		
			
			|  | 7 | +# @results     HTML (using search portal)
 | 
	
		
			
			|  | 8 | +# @stable      no (HTML can change)
 | 
	
		
			
			|  | 9 | +# @parse       url, title, content
 | 
	
		
			
			|  | 10 | +#
 | 
	
		
			
			|  | 11 | +# @todo        detect torrents?
 | 
	
		
			
			|  | 12 | +
 | 
	
		
			
			| 1 | 13 |  from lxml import html
 | 
	
		
			
			| 2 | 14 |  
 | 
	
		
			
			|  | 15 | +# engine dependent config
 | 
	
		
			
			|  | 16 | +categories = ['files']
 | 
	
		
			
			|  | 17 | +paging = True
 | 
	
		
			
			| 3 | 18 |  
 | 
	
		
			
			|  | 19 | +# search-url
 | 
	
		
			
			| 4 | 20 |  base_url = 'http://www.general-file.com'
 | 
	
		
			
			| 5 | 21 |  search_url = base_url + '/files-{letter}/{query}/{pageno}'
 | 
	
		
			
			| 6 | 22 |  
 | 
	
		
			
			|  | 23 | +# specific xpath variables
 | 
	
		
			
			| 7 | 24 |  result_xpath = '//table[@class="block-file"]'
 | 
	
		
			
			| 8 | 25 |  title_xpath = './/h2/a//text()'
 | 
	
		
			
			| 9 | 26 |  url_xpath = './/h2/a/@href'
 | 
	
		
			
			| 10 | 27 |  content_xpath = './/p//text()'
 | 
	
		
			
			| 11 | 28 |  
 | 
	
		
			
			| 12 |  | -paging = True
 | 
	
		
			
			| 13 |  | -
 | 
	
		
			
			| 14 | 29 |  
 | 
	
		
			
			|  | 30 | +# do search-request
 | 
	
		
			
			| 15 | 31 |  def request(query, params):
 | 
	
		
			
			|  | 32 | +
 | 
	
		
			
			| 16 | 33 |      params['url'] = search_url.format(query=query,
 | 
	
		
			
			| 17 | 34 |                                        letter=query[0],
 | 
	
		
			
			| 18 | 35 |                                        pageno=params['pageno'])
 | 
	
		
			
			|  | 36 | +
 | 
	
		
			
			| 19 | 37 |      return params
 | 
	
		
			
			| 20 | 38 |  
 | 
	
		
			
			| 21 | 39 |  
 | 
	
		
			
			|  | 40 | +# get response from search-request
 | 
	
		
			
			| 22 | 41 |  def response(resp):
 | 
	
		
			
			| 23 |  | -
 | 
	
		
			
			| 24 | 42 |      results = []
 | 
	
		
			
			|  | 43 | +
 | 
	
		
			
			| 25 | 44 |      dom = html.fromstring(resp.text)
 | 
	
		
			
			|  | 45 | +
 | 
	
		
			
			|  | 46 | +    # parse results
 | 
	
		
			
			| 26 | 47 |      for result in dom.xpath(result_xpath):
 | 
	
		
			
			| 27 | 48 |          url = result.xpath(url_xpath)[0]
 | 
	
		
			
			|  | 49 | +
 | 
	
		
			
			| 28 | 50 |          # skip fast download links
 | 
	
		
			
			| 29 | 51 |          if not url.startswith('/'):
 | 
	
		
			
			| 30 | 52 |              continue
 | 
	
		
			
			|  | 53 | +
 | 
	
		
			
			|  | 54 | +        # append result
 | 
	
		
			
			| 31 | 55 |          results.append({'url': base_url + url,
 | 
	
		
			
			| 32 | 56 |                          'title': ''.join(result.xpath(title_xpath)),
 | 
	
		
			
			| 33 | 57 |                          'content': ''.join(result.xpath(content_xpath))})
 | 
	
		
			
			| 34 | 58 |  
 | 
	
		
			
			|  | 59 | +    # return results
 | 
	
		
			
			| 35 | 60 |      return results
 |