subtitleseeker.py 1.9KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. ## Subtitleseeker (Video)
  2. #
  3. # @website http://www.subtitleseeker.com
  4. # @provide-api no
  5. #
  6. # @using-api no
  7. # @results HTML
  8. # @stable no (HTML can change)
  9. # @parse url, title, content
  10. from cgi import escape
  11. from urllib import quote_plus
  12. from lxml import html
  13. from searx.languages import language_codes
  14. # engine dependent config
  15. categories = ['videos']
  16. paging = True
  17. language = ""
  18. # search-url
  19. url = 'http://www.subtitleseeker.com/'
  20. search_url = url+'search/TITLES/{query}&p={pageno}'
  21. # specific xpath variables
  22. results_xpath = '//div[@class="boxRows"]'
  23. # do search-request
  24. def request(query, params):
  25. params['url'] = search_url.format(query=quote_plus(query),
  26. pageno=params['pageno'])
  27. return params
  28. # get response from search-request
  29. def response(resp):
  30. results = []
  31. dom = html.fromstring(resp.text)
  32. search_lang = ""
  33. if resp.search_params['language'] != 'all':
  34. search_lang = [lc[1]
  35. for lc in language_codes
  36. if lc[0][:2] == resp.search_params['language']][0]
  37. # parse results
  38. for result in dom.xpath(results_xpath):
  39. link = result.xpath(".//a")[0]
  40. href = link.attrib.get('href')
  41. if language is not "":
  42. href = href + language + '/'
  43. elif search_lang:
  44. href = href + search_lang + '/'
  45. title = escape(link.xpath(".//text()")[0])
  46. content = result.xpath('.//div[contains(@class,"red")]//text()')[0]
  47. content = content + " - "
  48. content = content + html.tostring(result.xpath('.//div[contains(@class,"grey-web")]')[0], method='text')
  49. if result.xpath(".//span") != []:
  50. content = content + " - (" + result.xpath(".//span//text()")[0].strip() + ")"
  51. # append result
  52. results.append({'url': href,
  53. 'title': title,
  54. 'content': escape(content)})
  55. # return results
  56. return results