|  | @@ -0,0 +1,81 @@
 | 
	
		
			
			|  | 1 | +## 1x (Images)
 | 
	
		
			
			|  | 2 | +#
 | 
	
		
			
			|  | 3 | +# @website     http://1x.com/
 | 
	
		
			
			|  | 4 | +# @provide-api no
 | 
	
		
			
			|  | 5 | +#
 | 
	
		
			
			|  | 6 | +# @using-api   no
 | 
	
		
			
			|  | 7 | +# @results     HTML
 | 
	
		
			
			|  | 8 | +# @stable      no (HTML can change)
 | 
	
		
			
			|  | 9 | +# @parse       url, title, thumbnail, img_src, content
 | 
	
		
			
			|  | 10 | +
 | 
	
		
			
			|  | 11 | +
 | 
	
		
			
			|  | 12 | +from urllib import urlencode
 | 
	
		
			
			|  | 13 | +from urlparse import urljoin
 | 
	
		
			
			|  | 14 | +from lxml import html
 | 
	
		
			
			|  | 15 | +import string
 | 
	
		
			
			|  | 16 | +import re
 | 
	
		
			
			|  | 17 | +
 | 
	
		
			
			|  | 18 | +# engine dependent config
 | 
	
		
			
			|  | 19 | +categories = ['images']
 | 
	
		
			
			|  | 20 | +paging = False
 | 
	
		
			
			|  | 21 | +
 | 
	
		
			
			|  | 22 | +# search-url
 | 
	
		
			
			|  | 23 | +base_url = 'http://1x.com'
 | 
	
		
			
			|  | 24 | +search_url = base_url+'/backend/search.php?{query}'
 | 
	
		
			
			|  | 25 | +
 | 
	
		
			
			|  | 26 | +
 | 
	
		
			
			|  | 27 | +# do search-request
 | 
	
		
			
			|  | 28 | +def request(query, params):
 | 
	
		
			
			|  | 29 | +    params['url'] = search_url.format(query=urlencode({'q': query}))
 | 
	
		
			
			|  | 30 | +
 | 
	
		
			
			|  | 31 | +    return params
 | 
	
		
			
			|  | 32 | +
 | 
	
		
			
			|  | 33 | +
 | 
	
		
			
			|  | 34 | +# get response from search-request
 | 
	
		
			
			|  | 35 | +def response(resp):
 | 
	
		
			
			|  | 36 | +    results = []
 | 
	
		
			
			|  | 37 | +
 | 
	
		
			
			|  | 38 | +    # get links from result-text
 | 
	
		
			
			|  | 39 | +    results_parts = re.split(r'(</a>|<a)', resp.text)
 | 
	
		
			
			|  | 40 | +
 | 
	
		
			
			|  | 41 | +    cur_element = ''
 | 
	
		
			
			|  | 42 | +
 | 
	
		
			
			|  | 43 | +    # iterate over link parts
 | 
	
		
			
			|  | 44 | +    for result_part in results_parts:
 | 
	
		
			
			|  | 45 | +        # processed start and end of link
 | 
	
		
			
			|  | 46 | +        if result_part == '<a':
 | 
	
		
			
			|  | 47 | +            cur_element = result_part
 | 
	
		
			
			|  | 48 | +            continue
 | 
	
		
			
			|  | 49 | +        elif result_part != '</a>':
 | 
	
		
			
			|  | 50 | +            cur_element += result_part
 | 
	
		
			
			|  | 51 | +            continue
 | 
	
		
			
			|  | 52 | +
 | 
	
		
			
			|  | 53 | +        cur_element += result_part
 | 
	
		
			
			|  | 54 | +
 | 
	
		
			
			|  | 55 | +        # fix xml-error
 | 
	
		
			
			|  | 56 | +        cur_element = string.replace(cur_element, '"></a>', '"/></a>')
 | 
	
		
			
			|  | 57 | +
 | 
	
		
			
			|  | 58 | +        dom = html.fromstring(cur_element)
 | 
	
		
			
			|  | 59 | +        link = dom.xpath('//a')[0]
 | 
	
		
			
			|  | 60 | +
 | 
	
		
			
			|  | 61 | +        url = urljoin(base_url, link.attrib.get('href'))
 | 
	
		
			
			|  | 62 | +        title = link.attrib.get('title', '')
 | 
	
		
			
			|  | 63 | +
 | 
	
		
			
			|  | 64 | +        thumbnail_src = urljoin(base_url, link.xpath('.//img')[0].attrib['src'])
 | 
	
		
			
			|  | 65 | +        # TODO: get image with higher resolution
 | 
	
		
			
			|  | 66 | +        img_src = thumbnail_src
 | 
	
		
			
			|  | 67 | +
 | 
	
		
			
			|  | 68 | +        # check if url is showing to a photo
 | 
	
		
			
			|  | 69 | +        if '/photo/' not in url:
 | 
	
		
			
			|  | 70 | +            continue
 | 
	
		
			
			|  | 71 | +
 | 
	
		
			
			|  | 72 | +        # append result
 | 
	
		
			
			|  | 73 | +        results.append({'url': url,
 | 
	
		
			
			|  | 74 | +                        'title': title,
 | 
	
		
			
			|  | 75 | +                        'img_src': img_src,
 | 
	
		
			
			|  | 76 | +                        'content': '',
 | 
	
		
			
			|  | 77 | +                        'thumbnail_src': thumbnail_src,
 | 
	
		
			
			|  | 78 | +                        'template': 'images.html'})
 | 
	
		
			
			|  | 79 | +
 | 
	
		
			
			|  | 80 | +    # return results
 | 
	
		
			
			|  | 81 | +    return results
 |