reddit.py 2.1KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. """
  2. Reddit
  3. @website https://www.reddit.com/
  4. @provide-api yes (https://www.reddit.com/dev/api)
  5. @using-api yes
  6. @results JSON
  7. @stable yes
  8. @parse url, title, content, thumbnail, publishedDate
  9. """
  10. import json
  11. from cgi import escape
  12. from urllib import urlencode
  13. from urlparse import urlparse, urljoin
  14. from datetime import datetime
  15. # engine dependent config
  16. categories = ['general', 'images', 'news', 'social media']
  17. page_size = 25
  18. # search-url
  19. base_url = 'https://www.reddit.com/'
  20. search_url = base_url + 'search.json?{query}'
  21. # do search-request
  22. def request(query, params):
  23. query = urlencode({'q': query,
  24. 'limit': page_size})
  25. params['url'] = search_url.format(query=query)
  26. return params
  27. # get response from search-request
  28. def response(resp):
  29. img_results = []
  30. text_results = []
  31. search_results = json.loads(resp.text)
  32. # return empty array if there are no results
  33. if 'data' not in search_results:
  34. return []
  35. posts = search_results.get('data', {}).get('children', [])
  36. # process results
  37. for post in posts:
  38. data = post['data']
  39. # extract post information
  40. params = {
  41. 'url': urljoin(base_url, data['permalink']),
  42. 'title': data['title']
  43. }
  44. # if thumbnail field contains a valid URL, we need to change template
  45. thumbnail = data['thumbnail']
  46. url_info = urlparse(thumbnail)
  47. # netloc & path
  48. if url_info[1] != '' and url_info[2] != '':
  49. params['img_src'] = data['url']
  50. params['thumbnail_src'] = thumbnail
  51. params['template'] = 'images.html'
  52. img_results.append(params)
  53. else:
  54. created = datetime.fromtimestamp(data['created_utc'])
  55. content = escape(data['selftext'])
  56. if len(content) > 500:
  57. content = content[:500] + '...'
  58. params['content'] = content
  59. params['publishedDate'] = created
  60. text_results.append(params)
  61. # show images first and text results second
  62. return img_results + text_results