|  | @@ -10,17 +10,19 @@
 | 
	
		
			
			| 10 | 10 |   @parse       url, title, content, publishedDate, embedded
 | 
	
		
			
			| 11 | 11 |  """
 | 
	
		
			
			| 12 | 12 |  
 | 
	
		
			
			|  | 13 | +import re
 | 
	
		
			
			|  | 14 | +from StringIO import StringIO
 | 
	
		
			
			| 13 | 15 |  from json import loads
 | 
	
		
			
			|  | 16 | +from lxml import etree
 | 
	
		
			
			| 14 | 17 |  from urllib import urlencode, quote_plus
 | 
	
		
			
			| 15 | 18 |  from dateutil import parser
 | 
	
		
			
			|  | 19 | +from searx import logger
 | 
	
		
			
			|  | 20 | +from searx.poolrequests import get as http_get
 | 
	
		
			
			| 16 | 21 |  
 | 
	
		
			
			| 17 | 22 |  # engine dependent config
 | 
	
		
			
			| 18 | 23 |  categories = ['music']
 | 
	
		
			
			| 19 | 24 |  paging = True
 | 
	
		
			
			| 20 | 25 |  
 | 
	
		
			
			| 21 |  | -# api-key
 | 
	
		
			
			| 22 |  | -guest_client_id = 'b45b1aa10f1ac2941910a7f0d10f8e28'
 | 
	
		
			
			| 23 |  | -
 | 
	
		
			
			| 24 | 26 |  # search-url
 | 
	
		
			
			| 25 | 27 |  url = 'https://api.soundcloud.com/'
 | 
	
		
			
			| 26 | 28 |  search_url = url + 'search?{query}'\
 | 
	
	
		
			
			|  | @@ -35,6 +37,30 @@ embedded_url = '<iframe width="100%" height="166" ' +\
 | 
	
		
			
			| 35 | 37 |      'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>'
 | 
	
		
			
			| 36 | 38 |  
 | 
	
		
			
			| 37 | 39 |  
 | 
	
		
			
			|  | 40 | +def get_client_id():
 | 
	
		
			
			|  | 41 | +    response = http_get("https://soundcloud.com")
 | 
	
		
			
			|  | 42 | +    rx_namespace = {"re": "http://exslt.org/regular-expressions"}
 | 
	
		
			
			|  | 43 | +
 | 
	
		
			
			|  | 44 | +    if response.ok:
 | 
	
		
			
			|  | 45 | +        tree = etree.parse(StringIO(response.content), etree.HTMLParser())
 | 
	
		
			
			|  | 46 | +        script_tags = tree.xpath("//script[re:match(@src, '(.*app.*js)')]", namespaces=rx_namespace)
 | 
	
		
			
			|  | 47 | +        app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None]
 | 
	
		
			
			|  | 48 | +
 | 
	
		
			
			|  | 49 | +        # extracts valid app_js urls from soundcloud.com content
 | 
	
		
			
			|  | 50 | +        for app_js_url in app_js_urls:
 | 
	
		
			
			|  | 51 | +            # gets app_js and searches for the clientid
 | 
	
		
			
			|  | 52 | +            response = http_get(app_js_url)
 | 
	
		
			
			|  | 53 | +            if response.ok:
 | 
	
		
			
			|  | 54 | +                cids = re.search(r'client_id:"([^"]*)"', response.content, re.M | re.I)
 | 
	
		
			
			|  | 55 | +                if cids is not None and len(cids.groups()):
 | 
	
		
			
			|  | 56 | +                    return cids.groups()[0]
 | 
	
		
			
			|  | 57 | +    logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")
 | 
	
		
			
			|  | 58 | +    return ""
 | 
	
		
			
			|  | 59 | +
 | 
	
		
			
			|  | 60 | +# api-key
 | 
	
		
			
			|  | 61 | +guest_client_id = get_client_id()
 | 
	
		
			
			|  | 62 | +
 | 
	
		
			
			|  | 63 | +
 | 
	
		
			
			| 38 | 64 |  # do search-request
 | 
	
		
			
			| 39 | 65 |  def request(query, params):
 | 
	
		
			
			| 40 | 66 |      offset = (params['pageno'] - 1) * 20
 |