|  | @@ -0,0 +1,73 @@
 | 
	
		
			
			|  | 1 | +from json import loads
 | 
	
		
			
			|  | 2 | +from urllib import urlencode
 | 
	
		
			
			|  | 3 | +from searx.utils import html_to_text
 | 
	
		
			
			|  | 4 | +from HTMLParser import HTMLParser
 | 
	
		
			
			|  | 5 | +
 | 
	
		
			
			|  | 6 | +url = 'http://www.filecrop.com/'
 | 
	
		
			
			|  | 7 | +search_url = url + '/search.php?w={query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1'
 | 
	
		
			
			|  | 8 | +
 | 
	
		
			
			|  | 9 | +class FilecropResultParser(HTMLParser):
 | 
	
		
			
			|  | 10 | +    def __init__(self):
 | 
	
		
			
			|  | 11 | +        HTMLParser.__init__(self)
 | 
	
		
			
			|  | 12 | +        self.__start_processing = False
 | 
	
		
			
			|  | 13 | +        
 | 
	
		
			
			|  | 14 | +        self.results = []
 | 
	
		
			
			|  | 15 | +        self.result = {}
 | 
	
		
			
			|  | 16 | +
 | 
	
		
			
			|  | 17 | +        self.tr_counter = 0
 | 
	
		
			
			|  | 18 | +        self.data_counter = 0
 | 
	
		
			
			|  | 19 | +
 | 
	
		
			
			|  | 20 | +    def handle_starttag(self, tag, attrs):
 | 
	
		
			
			|  | 21 | +
 | 
	
		
			
			|  | 22 | +        if tag == 'tr':
 | 
	
		
			
			|  | 23 | +            if ('bgcolor', '#edeff5') in attrs or ('bgcolor', '#ffffff') in attrs:
 | 
	
		
			
			|  | 24 | +                self.__start_processing = True
 | 
	
		
			
			|  | 25 | +                
 | 
	
		
			
			|  | 26 | +        if not self.__start_processing:
 | 
	
		
			
			|  | 27 | +            return
 | 
	
		
			
			|  | 28 | +
 | 
	
		
			
			|  | 29 | +        if tag == 'label':
 | 
	
		
			
			|  | 30 | +            self.result['title'] = [attr[1] for attr in attrs if attr[0] == 'title'][0]
 | 
	
		
			
			|  | 31 | +        elif tag == 'a' and ('rel', 'nofollow') in attrs and ('class', 'sourcelink') in attrs:
 | 
	
		
			
			|  | 32 | +            if 'content' in self.result:
 | 
	
		
			
			|  | 33 | +                self.result['content'] += [attr[1] for attr in attrs if attr[0] == 'title'][0]
 | 
	
		
			
			|  | 34 | +            else:
 | 
	
		
			
			|  | 35 | +                self.result['content'] = [attr[1] for attr in attrs if attr[0] == 'title'][0]
 | 
	
		
			
			|  | 36 | +            self.result['content'] += ' '
 | 
	
		
			
			|  | 37 | +        elif tag == 'a':
 | 
	
		
			
			|  | 38 | +            self.result['url'] = url + [attr[1] for attr in attrs if attr[0] == 'href'][0]
 | 
	
		
			
			|  | 39 | +
 | 
	
		
			
			|  | 40 | +    def handle_endtag(self, tag):
 | 
	
		
			
			|  | 41 | +        if self.__start_processing is False:
 | 
	
		
			
			|  | 42 | +            return
 | 
	
		
			
			|  | 43 | +
 | 
	
		
			
			|  | 44 | +        if tag == 'tr':
 | 
	
		
			
			|  | 45 | +            self.tr_counter += 1
 | 
	
		
			
			|  | 46 | +
 | 
	
		
			
			|  | 47 | +            if self.tr_counter == 2:
 | 
	
		
			
			|  | 48 | +                self.__start_processing = False
 | 
	
		
			
			|  | 49 | +                self.tr_counter = 0
 | 
	
		
			
			|  | 50 | +                self.data_counter = 0
 | 
	
		
			
			|  | 51 | +                self.results.append(self.result)
 | 
	
		
			
			|  | 52 | +                self.result = {}
 | 
	
		
			
			|  | 53 | +                                
 | 
	
		
			
			|  | 54 | +    def handle_data(self, data):
 | 
	
		
			
			|  | 55 | +        if not self.__start_processing:
 | 
	
		
			
			|  | 56 | +            return
 | 
	
		
			
			|  | 57 | +
 | 
	
		
			
			|  | 58 | +        if 'content' in self.result:
 | 
	
		
			
			|  | 59 | +            self.result['content'] += data + ' '
 | 
	
		
			
			|  | 60 | +        else:
 | 
	
		
			
			|  | 61 | +            self.result['content'] = data + ' '
 | 
	
		
			
			|  | 62 | +        
 | 
	
		
			
			|  | 63 | +        self.data_counter += 1
 | 
	
		
			
			|  | 64 | +
 | 
	
		
			
			|  | 65 | +def request(query, params):
 | 
	
		
			
			|  | 66 | +    params['url'] = search_url.format(query=urlencode({'q': query}))
 | 
	
		
			
			|  | 67 | +    return params
 | 
	
		
			
			|  | 68 | +
 | 
	
		
			
			|  | 69 | +def response(resp):
 | 
	
		
			
			|  | 70 | +    parser = FilecropResultParser()
 | 
	
		
			
			|  | 71 | +    parser.feed(resp.text)
 | 
	
		
			
			|  | 72 | +
 | 
	
		
			
			|  | 73 | +    return parser.results
 |