| 
				
			 | 
			
			
				@@ -1,48 +1,38 @@ 
			 | 
		
	
		
			
			| 
				1
			 | 
			
				1
			 | 
			
			
				 from urllib import urlencode 
			 | 
		
	
		
			
			| 
				2
			 | 
			
				2
			 | 
			
			
				 from HTMLParser import HTMLParser 
			 | 
		
	
		
			
			| 
				3
			 | 
			
				
			 | 
			
			
				-from xpath import * 
			 | 
		
	
		
			
			| 
				
			 | 
			
				3
			 | 
			
			
				+from xpath import extract_text 
			 | 
		
	
		
			
			| 
				
			 | 
			
				4
			 | 
			
			
				+from lxml import html 
			 | 
		
	
		
			
			| 
				4
			 | 
			
				5
			 | 
			
			
				  
			 | 
		
	
		
			
			| 
				5
			 | 
			
				
			 | 
			
			
				-categories = ['dev'] 
			 | 
		
	
		
			
			| 
				6
			 | 
			
				
			 | 
			
			
				- 
			 | 
		
	
		
			
			| 
				7
			 | 
			
				
			 | 
			
			
				-search_url = 'http://vimeo.com/search?{query}' 
			 | 
		
	
		
			
			| 
				
			 | 
			
				6
			 | 
			
			
				+base_url = 'http://vimeo.com' 
			 | 
		
	
		
			
			| 
				
			 | 
			
				7
			 | 
			
			
				+search_url = base_url + '/search?{query}' 
			 | 
		
	
		
			
			| 
				8
			 | 
			
				8
			 | 
			
			
				  
			 | 
		
	
		
			
			| 
				
			 | 
			
				9
			 | 
			
			
				+# the cookie set by vime contains all the following values, but only __utma seems to be requiered  
			 | 
		
	
		
			
			| 
				9
			 | 
			
				10
			 | 
			
			
				 Cookie = { 
			 | 
		
	
		
			
			| 
				10
			 | 
			
				
			 | 
			
			
				-    'vuid':'918282893.1027205400' 
			 | 
		
	
		
			
			| 
				11
			 | 
			
				
			 | 
			
			
				-    , 'ab_bs':'%7B%223%22%3A279%7D' 
			 | 
		
	
		
			
			| 
				12
			 | 
			
				
			 | 
			
			
				-    , '__utma':'18302654.101#6140782.1388942090.1388942090.1388942090.1' 
			 | 
		
	
		
			
			| 
				13
			 | 
			
				
			 | 
			
			
				-    , '__utmb':'18302654.1.10.1388942090' 
			 | 
		
	
		
			
			| 
				14
			 | 
			
				
			 | 
			
			
				-    , '__utmc':'18302654' 
			 | 
		
	
		
			
			| 
				15
			 | 
			
				
			 | 
			
			
				-    , '__utmz':'18#302654.1388942090.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)' 
			 | 
		
	
		
			
			| 
				16
			 | 
			
				
			 | 
			
			
				-    , '__utml':'search' 
			 | 
		
	
		
			
			| 
				
			 | 
			
				11
			 | 
			
			
				+    #'vuid':'918282893.1027205400' 
			 | 
		
	
		
			
			| 
				
			 | 
			
				12
			 | 
			
			
				+    # 'ab_bs':'%7B%223%22%3A279%7D' 
			 | 
		
	
		
			
			| 
				
			 | 
			
				13
			 | 
			
			
				+     '__utma':'00000000.000#0000000.0000000000.0000000000.0000000000.0' 
			 | 
		
	
		
			
			| 
				
			 | 
			
				14
			 | 
			
			
				+    # '__utmb':'18302654.1.10.1388942090' 
			 | 
		
	
		
			
			| 
				
			 | 
			
				15
			 | 
			
			
				+    #, '__utmc':'18302654' 
			 | 
		
	
		
			
			| 
				
			 | 
			
				16
			 | 
			
			
				+    #, '__utmz':'18#302654.1388942090.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)' 
			 | 
		
	
		
			
			| 
				
			 | 
			
				17
			 | 
			
			
				+    #, '__utml':'search' 
			 | 
		
	
		
			
			| 
				17
			 | 
			
				18
			 | 
			
			
				 } 
			 | 
		
	
		
			
			| 
				18
			 | 
			
				19
			 | 
			
			
				  
			 | 
		
	
		
			
			| 
				19
			 | 
			
				
			 | 
			
			
				-#'vuid=918282893.1027205400& ab_bs=%7B%223%22%3A279%7D& player="scaling=1&volume=1"& __utma=18302654.101#6140782.1388942090.1388942090.1388942090.1& __utmb=18302654.1.10.1388942090& __utmc=18302654& __utmz=18#302654.1388942090.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)& __utmli=search' 
			 | 
		
	
		
			
			| 
				20
			 | 
			
				
			 | 
			
			
				- 
			 | 
		
	
		
			
			| 
				21
			 | 
			
				20
			 | 
			
			
				 def request(query, params): 
			 | 
		
	
		
			
			| 
				22
			 | 
			
				21
			 | 
			
			
				     params['url'] = search_url.format(query=urlencode({'q' :query})) 
			 | 
		
	
		
			
			| 
				23
			 | 
			
				22
			 | 
			
			
				     print params['url'] 
			 | 
		
	
		
			
			| 
				24
			 | 
			
				23
			 | 
			
			
				     params['cookies'] = Cookie 
			 | 
		
	
		
			
			| 
				25
			 | 
			
				24
			 | 
			
			
				     return params 
			 | 
		
	
		
			
			| 
				26
			 | 
			
				25
			 | 
			
			
				  
			 | 
		
	
		
			
			| 
				27
			 | 
			
				
			 | 
			
			
				- 
			 | 
		
	
		
			
			| 
				28
			 | 
			
				26
			 | 
			
			
				 def response(resp): 
			 | 
		
	
		
			
			| 
				29
			 | 
			
				27
			 | 
			
			
				     results = [] 
			 | 
		
	
		
			
			| 
				30
			 | 
			
				
			 | 
			
			
				-     
			 | 
		
	
		
			
			| 
				31
			 | 
			
				28
			 | 
			
			
				     dom = html.fromstring(resp.text) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				29
			 | 
			
			
				+     
			 | 
		
	
		
			
			| 
				
			 | 
			
				30
			 | 
			
			
				+    p = HTMLParser() 
			 | 
		
	
		
			
			| 
				32
			 | 
			
				31
			 | 
			
			
				  
			 | 
		
	
		
			
			| 
				33
			 | 
			
				
			 | 
			
			
				-    if results_xpath: 
			 | 
		
	
		
			
			| 
				34
			 | 
			
				
			 | 
			
			
				-        for result in dom.xpath(results_xpath): 
			 | 
		
	
		
			
			| 
				35
			 | 
			
				
			 | 
			
			
				-            url = extract_url(result.xpath(url_xpath)) 
			 | 
		
	
		
			
			| 
				36
			 | 
			
				
			 | 
			
			
				- 
			 | 
		
	
		
			
			| 
				37
			 | 
			
				
			 | 
			
			
				-            title = extract_text(result.xpath(title_xpath)[0 ]) 
			 | 
		
	
		
			
			| 
				38
			 | 
			
				
			 | 
			
			
				-            content = extract_text(result.xpath(content_xpath)[0]) 
			 | 
		
	
		
			
			| 
				39
			 | 
			
				
			 | 
			
			
				-            results.append({'url': url, 'title': title, 'content': content}) 
			 | 
		
	
		
			
			| 
				40
			 | 
			
				
			 | 
			
			
				-    else: 
			 | 
		
	
		
			
			| 
				41
			 | 
			
				
			 | 
			
			
				-        for url, title, content in zip(     
			 | 
		
	
		
			
			| 
				42
			 | 
			
				
			 | 
			
			
				-            map(extract_url, dom.xpath(url_xpath)), \ 
			 | 
		
	
		
			
			| 
				43
			 | 
			
				
			 | 
			
			
				-            map(extract_text, dom.xpath(title_xpath)), \ 
			 | 
		
	
		
			
			| 
				44
			 | 
			
				
			 | 
			
			
				-            map(extract_text, dom.xpath(content_xpath)), \ 
			 | 
		
	
		
			
			| 
				45
			 | 
			
				
			 | 
			
			
				-                ): 
			 | 
		
	
		
			
			| 
				46
			 | 
			
				
			 | 
			
			
				-            results.append({'url': url, 'title': title, 'content': content}) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				32
			 | 
			
			
				+    for result in dom.xpath(results_xpath): 
			 | 
		
	
		
			
			| 
				
			 | 
			
				33
			 | 
			
			
				+        url = base_url + result.xpath(url_xpath)[0] 
			 | 
		
	
		
			
			| 
				
			 | 
			
				34
			 | 
			
			
				+        title = p.unescape(extract_text(result.xpath(title_xpath))) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				35
			 | 
			
			
				+        content = '<a href="{0}">  <img src="{2}"/> </a>'.format(url, title, extract_text(result.xpath(content_xpath)[0])) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				36
			 | 
			
			
				+        results.append({'url': url, 'title': title, 'content': content}) 
			 | 
		
	
		
			
			| 
				47
			 | 
			
				37
			 | 
			
			
				  
			 | 
		
	
		
			
			| 
				48
			 | 
			
				38
			 | 
			
			
				     return results 
			 |