faroo.py 3.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. ## Faroo (Web, News)
  2. #
  3. # @website http://www.faroo.com
  4. # @provide-api yes (http://www.faroo.com/hp/api/api.html), require API-key
  5. #
  6. # @using-api yes
  7. # @results JSON
  8. # @stable yes
  9. # @parse url, title, content, publishedDate, img_src
  10. from urllib import urlencode
  11. from json import loads
  12. import datetime
  13. from searx.utils import searx_useragent
  14. # engine dependent config
  15. categories = ['general', 'news']
  16. paging = True
  17. language_support = True
  18. number_of_results = 10
  19. api_key = None
  20. # search-url
  21. url = 'http://www.faroo.com/'
  22. search_url = url + 'api?{query}&start={offset}&length={number_of_results}&l={language}&src={categorie}&i=false&f=json&key={api_key}'
  23. search_category = {'general': 'web',
  24. 'news': 'news'}
  25. # do search-request
  26. def request(query, params):
  27. offset = (params['pageno']-1) * number_of_results + 1
  28. categorie = search_category.get(params['category'], 'web')
  29. if params['language'] == 'all':
  30. language = 'en'
  31. else:
  32. language = params['language'].split('_')[0]
  33. # skip, if language is not supported
  34. if language != 'en' and\
  35. language != 'de' and\
  36. language != 'zh':
  37. return params
  38. params['url'] = search_url.format(offset=offset,
  39. number_of_results=number_of_results,
  40. query=urlencode({'q': query}),
  41. language=language,
  42. categorie=categorie,
  43. api_key=api_key)
  44. # using searx User-Agent
  45. params['headers']['User-Agent'] = searx_useragent()
  46. return params
  47. # get response from search-request
  48. def response(resp):
  49. # HTTP-Code 401: api-key is not valide
  50. if resp.status_code == 401:
  51. raise Exception("API key is not valide")
  52. return []
  53. # HTTP-Code 429: rate limit exceeded
  54. if resp.status_code == 429:
  55. raise Exception("rate limit has been exceeded!")
  56. return []
  57. results = []
  58. search_res = loads(resp.text)
  59. # return empty array if there are no results
  60. if not search_res.get('results', {}):
  61. return []
  62. # parse results
  63. for result in search_res['results']:
  64. if result['news']:
  65. # timestamp (how many milliseconds have passed between now and the beginning of 1970)
  66. publishedDate = datetime.datetime.fromtimestamp(result['date']/1000.0)
  67. # append news result
  68. results.append({'url': result['url'],
  69. 'title': result['title'],
  70. 'publishedDate': publishedDate,
  71. 'content': result['kwic']})
  72. else:
  73. # append general result
  74. # TODO, publishedDate correct?
  75. results.append({'url': result['url'],
  76. 'title': result['title'],
  77. 'content': result['kwic']})
  78. # append image result if image url is set
  79. # TODO, show results with an image like in faroo
  80. if result['iurl']:
  81. results.append({'template': 'images.html',
  82. 'url': result['url'],
  83. 'title': result['title'],
  84. 'content': result['kwic'],
  85. 'img_src': result['iurl']})
  86. # return results
  87. return results