google_news.py 1.7KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. ## Google (News)
  2. #
  3. # @website https://www.google.com
  4. # @provide-api yes (https://developers.google.com/web-search/docs/), deprecated!
  5. #
  6. # @using-api yes
  7. # @results JSON
  8. # @stable yes (but deprecated)
  9. # @parse url, title, content, publishedDate
  10. from urllib import urlencode
  11. from json import loads
  12. from dateutil import parser
  13. # search-url
  14. categories = ['news']
  15. paging = True
  16. language_support = True
  17. # engine dependent config
  18. url = 'https://ajax.googleapis.com/'
  19. search_url = url + 'ajax/services/search/news?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}' # noqa
  20. # do search-request
  21. def request(query, params):
  22. offset = (params['pageno'] - 1) * 8
  23. language = 'en-US'
  24. if params['language'] != 'all':
  25. language = params['language'].replace('_', '-')
  26. params['url'] = search_url.format(offset=offset,
  27. query=urlencode({'q': query}),
  28. language=language)
  29. return params
  30. # get response from search-request
  31. def response(resp):
  32. results = []
  33. search_res = loads(resp.text)
  34. # return empty array if there are no results
  35. if not search_res.get('responseData', {}).get('results'):
  36. return []
  37. # parse results
  38. for result in search_res['responseData']['results']:
  39. # parse publishedDate
  40. publishedDate = parser.parse(result['publishedDate'])
  41. # append result
  42. results.append({'url': result['unescapedUrl'],
  43. 'title': result['titleNoFormatting'],
  44. 'publishedDate': publishedDate,
  45. 'content': result['content']})
  46. # return results
  47. return results