|  | @@ -0,0 +1,108 @@
 | 
	
		
			
			|  | 1 | +## Farro (Web, News)
 | 
	
		
			
			|  | 2 | +# 
 | 
	
		
			
			|  | 3 | +# @website     http://www.faroo.com
 | 
	
		
			
			|  | 4 | +# @provide-api yes (http://www.faroo.com/hp/api/api.html), require API-key
 | 
	
		
			
			|  | 5 | +# 
 | 
	
		
			
			|  | 6 | +# @using-api   yes
 | 
	
		
			
			|  | 7 | +# @results     JSON
 | 
	
		
			
			|  | 8 | +# @stable      yes
 | 
	
		
			
			|  | 9 | +# @parse       url, title, content, publishedDate, img_src
 | 
	
		
			
			|  | 10 | +
 | 
	
		
			
			|  | 11 | +from urllib import urlencode
 | 
	
		
			
			|  | 12 | +from json import loads
 | 
	
		
			
			|  | 13 | +import datetime
 | 
	
		
			
			|  | 14 | +from searx.utils import searx_useragent
 | 
	
		
			
			|  | 15 | +
 | 
	
		
			
			|  | 16 | +# engine dependent config
 | 
	
		
			
			|  | 17 | +categories = ['general', 'news']
 | 
	
		
			
			|  | 18 | +paging = True
 | 
	
		
			
			|  | 19 | +language_support = True
 | 
	
		
			
			|  | 20 | +number_of_results = 10
 | 
	
		
			
			|  | 21 | +api_key = None
 | 
	
		
			
			|  | 22 | +
 | 
	
		
			
			|  | 23 | +# search-url
 | 
	
		
			
			|  | 24 | +url = 'http://www.faroo.com/'
 | 
	
		
			
			|  | 25 | +search_url = url + 'api?{query}&start={offset}&length={number_of_results}&l={language}&src={categorie}&i=false&f=json&key={api_key}'
 | 
	
		
			
			|  | 26 | +
 | 
	
		
			
			|  | 27 | +search_category = {'general': 'web',                
 | 
	
		
			
			|  | 28 | +                'news': 'news'}
 | 
	
		
			
			|  | 29 | +
 | 
	
		
			
			|  | 30 | +# do search-request
 | 
	
		
			
			|  | 31 | +def request(query, params):
 | 
	
		
			
			|  | 32 | +    offset = (params['pageno'] - 1) * number_of_results
 | 
	
		
			
			|  | 33 | +    categorie = search_category.get(params['category'], 'web')
 | 
	
		
			
			|  | 34 | +
 | 
	
		
			
			|  | 35 | +    if params['language'] == 'all':
 | 
	
		
			
			|  | 36 | +        language = 'en'
 | 
	
		
			
			|  | 37 | +    else:
 | 
	
		
			
			|  | 38 | +        language = params['language'].split('_')[0]
 | 
	
		
			
			|  | 39 | +
 | 
	
		
			
			|  | 40 | +    # skip, if language is not supported
 | 
	
		
			
			|  | 41 | +    if language != 'en' and\
 | 
	
		
			
			|  | 42 | +       language != 'de' and\
 | 
	
		
			
			|  | 43 | +       language != 'zh':
 | 
	
		
			
			|  | 44 | +        return params
 | 
	
		
			
			|  | 45 | +
 | 
	
		
			
			|  | 46 | +    params['url'] = search_url.format(offset=offset,
 | 
	
		
			
			|  | 47 | +                                      number_of_results=number_of_results,
 | 
	
		
			
			|  | 48 | +                                      query=urlencode({'q': query}),
 | 
	
		
			
			|  | 49 | +                                      language=language,
 | 
	
		
			
			|  | 50 | +                                      categorie=categorie,
 | 
	
		
			
			|  | 51 | +                                      api_key=api_key )
 | 
	
		
			
			|  | 52 | +
 | 
	
		
			
			|  | 53 | +    # using searx User-Agent
 | 
	
		
			
			|  | 54 | +    params['headers']['User-Agent'] = searx_useragent()
 | 
	
		
			
			|  | 55 | +
 | 
	
		
			
			|  | 56 | +    return params
 | 
	
		
			
			|  | 57 | +
 | 
	
		
			
			|  | 58 | +
 | 
	
		
			
			|  | 59 | +# get response from search-request
 | 
	
		
			
			|  | 60 | +def response(resp):
 | 
	
		
			
			|  | 61 | +    # HTTP-Code 401: api-key is not valide
 | 
	
		
			
			|  | 62 | +    if resp.status_code == 401:
 | 
	
		
			
			|  | 63 | +        raise Exception("API key is not valide")
 | 
	
		
			
			|  | 64 | +        return []
 | 
	
		
			
			|  | 65 | +
 | 
	
		
			
			|  | 66 | +    # HTTP-Code 429: rate limit exceeded
 | 
	
		
			
			|  | 67 | +    if resp.status_code == 429:
 | 
	
		
			
			|  | 68 | +        raise Exception("rate limit has been exceeded!")
 | 
	
		
			
			|  | 69 | +        return []
 | 
	
		
			
			|  | 70 | +
 | 
	
		
			
			|  | 71 | +    results = []
 | 
	
		
			
			|  | 72 | +
 | 
	
		
			
			|  | 73 | +    search_res = loads(resp.text)
 | 
	
		
			
			|  | 74 | +
 | 
	
		
			
			|  | 75 | +    # return empty array if there are no results
 | 
	
		
			
			|  | 76 | +    if not search_res.get('results', {}):
 | 
	
		
			
			|  | 77 | +        return []
 | 
	
		
			
			|  | 78 | +
 | 
	
		
			
			|  | 79 | +    # parse results
 | 
	
		
			
			|  | 80 | +    for result in search_res['results']:
 | 
	
		
			
			|  | 81 | +        if result['news'] == 'true':
 | 
	
		
			
			|  | 82 | +            # timestamp (how many milliseconds have passed between now and the beginning of 1970)
 | 
	
		
			
			|  | 83 | +            publishedDate = datetime.datetime.fromtimestamp(result['date']/1000.0)
 | 
	
		
			
			|  | 84 | +
 | 
	
		
			
			|  | 85 | +            # append news result
 | 
	
		
			
			|  | 86 | +            results.append({'url': result['url'],
 | 
	
		
			
			|  | 87 | +                            'title': result['title'],
 | 
	
		
			
			|  | 88 | +                            'publishedDate': publishedDate,
 | 
	
		
			
			|  | 89 | +                            'content': result['kwic']})
 | 
	
		
			
			|  | 90 | +
 | 
	
		
			
			|  | 91 | +        else:
 | 
	
		
			
			|  | 92 | +            # append general result
 | 
	
		
			
			|  | 93 | +            # TODO, publishedDate correct?
 | 
	
		
			
			|  | 94 | +            results.append({'url': result['url'],
 | 
	
		
			
			|  | 95 | +                            'title': result['title'],
 | 
	
		
			
			|  | 96 | +                            'content': result['kwic']})
 | 
	
		
			
			|  | 97 | +
 | 
	
		
			
			|  | 98 | +            # append image result if image url is set
 | 
	
		
			
			|  | 99 | +            # TODO, show results with an image like in faroo
 | 
	
		
			
			|  | 100 | +            if result['iurl']:
 | 
	
		
			
			|  | 101 | +                results.append({'template': 'images.html',
 | 
	
		
			
			|  | 102 | +                                'url': result['url'],
 | 
	
		
			
			|  | 103 | +                                'title': result['title'],
 | 
	
		
			
			|  | 104 | +                                'content': result['kwic'],  
 | 
	
		
			
			|  | 105 | +                                'img_src': result['iurl']})
 | 
	
		
			
			|  | 106 | +
 | 
	
		
			
			|  | 107 | +    # return results
 | 
	
		
			
			|  | 108 | +    return results
 |