| 
				
			 | 
			
			
				@@ -10,17 +10,19 @@ 
			 | 
		
	
		
			
			| 
				10
			 | 
			
				10
			 | 
			
			
				  @parse       url, title, content, publishedDate, embedded 
			 | 
		
	
		
			
			| 
				11
			 | 
			
				11
			 | 
			
			
				 """ 
			 | 
		
	
		
			
			| 
				12
			 | 
			
				12
			 | 
			
			
				  
			 | 
		
	
		
			
			| 
				
			 | 
			
				13
			 | 
			
			
				+import re 
			 | 
		
	
		
			
			| 
				
			 | 
			
				14
			 | 
			
			
				+from StringIO import StringIO 
			 | 
		
	
		
			
			| 
				13
			 | 
			
				15
			 | 
			
			
				 from json import loads 
			 | 
		
	
		
			
			| 
				
			 | 
			
				16
			 | 
			
			
				+from lxml import etree 
			 | 
		
	
		
			
			| 
				14
			 | 
			
				17
			 | 
			
			
				 from urllib import urlencode, quote_plus 
			 | 
		
	
		
			
			| 
				15
			 | 
			
				18
			 | 
			
			
				 from dateutil import parser 
			 | 
		
	
		
			
			| 
				
			 | 
			
				19
			 | 
			
			
				+from searx import logger 
			 | 
		
	
		
			
			| 
				
			 | 
			
				20
			 | 
			
			
				+from searx.poolrequests import get as http_get 
			 | 
		
	
		
			
			| 
				16
			 | 
			
				21
			 | 
			
			
				  
			 | 
		
	
		
			
			| 
				17
			 | 
			
				22
			 | 
			
			
				 # engine dependent config 
			 | 
		
	
		
			
			| 
				18
			 | 
			
				23
			 | 
			
			
				 categories = ['music'] 
			 | 
		
	
		
			
			| 
				19
			 | 
			
				24
			 | 
			
			
				 paging = True 
			 | 
		
	
		
			
			| 
				20
			 | 
			
				25
			 | 
			
			
				  
			 | 
		
	
		
			
			| 
				21
			 | 
			
				
			 | 
			
			
				-# api-key 
			 | 
		
	
		
			
			| 
				22
			 | 
			
				
			 | 
			
			
				-guest_client_id = 'b45b1aa10f1ac2941910a7f0d10f8e28' 
			 | 
		
	
		
			
			| 
				23
			 | 
			
				
			 | 
			
			
				- 
			 | 
		
	
		
			
			| 
				24
			 | 
			
				26
			 | 
			
			
				 # search-url 
			 | 
		
	
		
			
			| 
				25
			 | 
			
				27
			 | 
			
			
				 url = 'https://api.soundcloud.com/' 
			 | 
		
	
		
			
			| 
				26
			 | 
			
				28
			 | 
			
			
				 search_url = url + 'search?{query}'\ 
			 | 
		
	
	
		
			
			| 
				
			 | 
			
			
				@@ -35,6 +37,30 @@ embedded_url = '<iframe width="100%" height="166" ' +\ 
			 | 
		
	
		
			
			| 
				35
			 | 
			
				37
			 | 
			
			
				     'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>' 
			 | 
		
	
		
			
			| 
				36
			 | 
			
				38
			 | 
			
			
				  
			 | 
		
	
		
			
			| 
				37
			 | 
			
				39
			 | 
			
			
				  
			 | 
		
	
		
			
			| 
				
			 | 
			
				40
			 | 
			
			
				+def get_client_id(): 
			 | 
		
	
		
			
			| 
				
			 | 
			
				41
			 | 
			
			
				+    response = http_get("https://soundcloud.com") 
			 | 
		
	
		
			
			| 
				
			 | 
			
				42
			 | 
			
			
				+    rx_namespace = {"re": "http://exslt.org/regular-expressions"} 
			 | 
		
	
		
			
			| 
				
			 | 
			
				43
			 | 
			
			
				+ 
			 | 
		
	
		
			
			| 
				
			 | 
			
				44
			 | 
			
			
				+    if response.ok: 
			 | 
		
	
		
			
			| 
				
			 | 
			
				45
			 | 
			
			
				+        tree = etree.parse(StringIO(response.content), etree.HTMLParser()) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				46
			 | 
			
			
				+        script_tags = tree.xpath("//script[re:match(@src, '(.*app.*js)')]", namespaces=rx_namespace) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				47
			 | 
			
			
				+        app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None] 
			 | 
		
	
		
			
			| 
				
			 | 
			
				48
			 | 
			
			
				+ 
			 | 
		
	
		
			
			| 
				
			 | 
			
				49
			 | 
			
			
				+        # extracts valid app_js urls from soundcloud.com content 
			 | 
		
	
		
			
			| 
				
			 | 
			
				50
			 | 
			
			
				+        for app_js_url in app_js_urls: 
			 | 
		
	
		
			
			| 
				
			 | 
			
				51
			 | 
			
			
				+            # gets app_js and searches for the clientid 
			 | 
		
	
		
			
			| 
				
			 | 
			
				52
			 | 
			
			
				+            response = http_get(app_js_url) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				53
			 | 
			
			
				+            if response.ok: 
			 | 
		
	
		
			
			| 
				
			 | 
			
				54
			 | 
			
			
				+                cids = re.search(r'client_id:"([^"]*)"', response.content, re.M | re.I) 
			 | 
		
	
		
			
			| 
				
			 | 
			
				55
			 | 
			
			
				+                if cids is not None and len(cids.groups()): 
			 | 
		
	
		
			
			| 
				
			 | 
			
				56
			 | 
			
			
				+                    return cids.groups()[0] 
			 | 
		
	
		
			
			| 
				
			 | 
			
				57
			 | 
			
			
				+    logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!") 
			 | 
		
	
		
			
			| 
				
			 | 
			
				58
			 | 
			
			
				+    return "" 
			 | 
		
	
		
			
			| 
				
			 | 
			
				59
			 | 
			
			
				+ 
			 | 
		
	
		
			
			| 
				
			 | 
			
				60
			 | 
			
			
				+# api-key 
			 | 
		
	
		
			
			| 
				
			 | 
			
				61
			 | 
			
			
				+guest_client_id = get_client_id() 
			 | 
		
	
		
			
			| 
				
			 | 
			
				62
			 | 
			
			
				+ 
			 | 
		
	
		
			
			| 
				
			 | 
			
				63
			 | 
			
			
				+ 
			 | 
		
	
		
			
			| 
				38
			 | 
			
				64
			 | 
			
			
				 # do search-request 
			 | 
		
	
		
			
			| 
				39
			 | 
			
				65
			 | 
			
			
				 def request(query, params): 
			 | 
		
	
		
			
			| 
				40
			 | 
			
				66
			 | 
			
			
				     offset = (params['pageno'] - 1) * 20 
			 |