|  | @@ -0,0 +1,35 @@
 | 
	
		
			
			|  | 1 | +from lxml import html
 | 
	
		
			
			|  | 2 | +
 | 
	
		
			
			|  | 3 | +
 | 
	
		
			
			|  | 4 | +base_url = 'http://www.general-file.com'
 | 
	
		
			
			|  | 5 | +search_url = base_url + '/files-{letter}/{query}/{pageno}'
 | 
	
		
			
			|  | 6 | +
 | 
	
		
			
			|  | 7 | +result_xpath = '//table[@class="block-file"]'
 | 
	
		
			
			|  | 8 | +title_xpath = './/h2/a//text()'
 | 
	
		
			
			|  | 9 | +url_xpath = './/h2/a/@href'
 | 
	
		
			
			|  | 10 | +content_xpath = './/p//text()'
 | 
	
		
			
			|  | 11 | +
 | 
	
		
			
			|  | 12 | +paging = True
 | 
	
		
			
			|  | 13 | +
 | 
	
		
			
			|  | 14 | +
 | 
	
		
			
			|  | 15 | +def request(query, params):
 | 
	
		
			
			|  | 16 | +    params['url'] = search_url.format(query=query,
 | 
	
		
			
			|  | 17 | +                                      letter=query[0],
 | 
	
		
			
			|  | 18 | +                                      pageno=params['pageno'])
 | 
	
		
			
			|  | 19 | +    return params
 | 
	
		
			
			|  | 20 | +
 | 
	
		
			
			|  | 21 | +
 | 
	
		
			
			|  | 22 | +def response(resp):
 | 
	
		
			
			|  | 23 | +
 | 
	
		
			
			|  | 24 | +    results = []
 | 
	
		
			
			|  | 25 | +    dom = html.fromstring(resp.text)
 | 
	
		
			
			|  | 26 | +    for result in dom.xpath(result_xpath):
 | 
	
		
			
			|  | 27 | +        url = result.xpath(url_xpath)[0]
 | 
	
		
			
			|  | 28 | +        # skip fast download links
 | 
	
		
			
			|  | 29 | +        if not url.startswith('/'):
 | 
	
		
			
			|  | 30 | +            continue
 | 
	
		
			
			|  | 31 | +        results.append({'url': base_url + url,
 | 
	
		
			
			|  | 32 | +                        'title': ''.join(result.xpath(title_xpath)),
 | 
	
		
			
			|  | 33 | +                        'content': ''.join(result.xpath(content_xpath))})
 | 
	
		
			
			|  | 34 | +
 | 
	
		
			
			|  | 35 | +    return results
 |