reddit.py 2.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. """
  2. Reddit
  3. @website https://www.reddit.com/
  4. @provide-api yes (https://www.reddit.com/dev/api)
  5. @using-api yes
  6. @results JSON
  7. @stable yes
  8. @parse url, title, content, thumbnail, publishedDate
  9. """
  10. import json
  11. from urllib import urlencode
  12. from urlparse import urlparse, urljoin
  13. from datetime import datetime
  14. # engine dependent config
  15. categories = ['general', 'images', 'news', 'social media']
  16. page_size = 25
  17. # search-url
  18. base_url = 'https://www.reddit.com/'
  19. search_url = base_url + 'search.json?{query}'
  20. # do search-request
  21. def request(query, params):
  22. query = urlencode({'q': query,
  23. 'limit': page_size})
  24. params['url'] = search_url.format(query=query)
  25. return params
  26. # get response from search-request
  27. def response(resp):
  28. img_results = []
  29. text_results = []
  30. search_results = json.loads(resp.text)
  31. # return empty array if there are no results
  32. if 'data' not in search_results:
  33. return []
  34. posts = search_results.get('data', {}).get('children', [])
  35. # process results
  36. for post in posts:
  37. data = post['data']
  38. # extract post information
  39. params = {
  40. 'url': urljoin(base_url, data['permalink']),
  41. 'title': data['title']
  42. }
  43. # if thumbnail field contains a valid URL, we need to change template
  44. thumbnail = data['thumbnail']
  45. url_info = urlparse(thumbnail)
  46. # netloc & path
  47. if url_info[1] != '' and url_info[2] != '':
  48. params['img_src'] = data['url']
  49. params['thumbnail_src'] = thumbnail
  50. params['template'] = 'images.html'
  51. img_results.append(params)
  52. else:
  53. created = datetime.fromtimestamp(data['created_utc'])
  54. content = data['selftext']
  55. if len(content) > 500:
  56. content = content[:500] + '...'
  57. params['content'] = content
  58. params['publishedDate'] = created
  59. text_results.append(params)
  60. # show images first and text results second
  61. return img_results + text_results