gigablast.py 1.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. """
  2. Gigablast (Web)
  3. @website http://gigablast.com
  4. @provide-api yes (http://gigablast.com/api.html)
  5. @using-api yes
  6. @results XML
  7. @stable yes
  8. @parse url, title, content
  9. """
  10. from urllib import urlencode
  11. from cgi import escape
  12. from lxml import etree
  13. from random import randint
  14. from time import time
  15. # engine dependent config
  16. categories = ['general']
  17. paging = True
  18. number_of_results = 5
  19. # search-url, invalid HTTPS certificate
  20. base_url = 'http://gigablast.com/'
  21. search_string = 'search?{query}&n={number_of_results}&s={offset}&xml=1&qh=0&uxid={uxid}&rand={rand}'
  22. # specific xpath variables
  23. results_xpath = '//response//result'
  24. url_xpath = './/url'
  25. title_xpath = './/title'
  26. content_xpath = './/sum'
  27. # do search-request
  28. def request(query, params):
  29. offset = (params['pageno'] - 1) * number_of_results
  30. search_path = search_string.format(
  31. query=urlencode({'q': query}),
  32. offset=offset,
  33. number_of_results=number_of_results,
  34. uxid=randint(10000, 10000000),
  35. rand=int(time()))
  36. params['url'] = base_url + search_path
  37. return params
  38. # get response from search-request
  39. def response(resp):
  40. results = []
  41. dom = etree.fromstring(resp.content)
  42. # parse results
  43. for result in dom.xpath(results_xpath):
  44. url = result.xpath(url_xpath)[0].text
  45. title = result.xpath(title_xpath)[0].text
  46. content = escape(result.xpath(content_xpath)[0].text)
  47. # append result
  48. results.append({'url': url,
  49. 'title': title,
  50. 'content': content})
  51. # return results
  52. return results