dictzone.py 1.9KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. import re
  2. from urlparse import urljoin
  3. from lxml import html
  4. from searx.engines.xpath import extract_text
  5. from searx.languages import language_codes
  6. categories = ['general']
  7. url = 'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
  8. weight = 100
  9. parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) (.+)', re.I)
  10. results_xpath = './/table[@id="r"]/tr'
  11. def request(query, params):
  12. m = parser_re.match(unicode(query, 'utf8'))
  13. if not m:
  14. return params
  15. from_lang, to_lang, query = m.groups()
  16. if len(from_lang) == 2:
  17. lan = filter(lambda x: x[0][:2] == from_lang, language_codes)
  18. if lan:
  19. from_lang = lan[0][1].lower()
  20. else:
  21. return params
  22. elif from_lang.lower() not in [x[1].lower() for x in language_codes]:
  23. return params
  24. if len(to_lang) == 2:
  25. lan = filter(lambda x: x[0][:2] == to_lang, language_codes)
  26. if lan:
  27. to_lang = lan[0][1].lower()
  28. else:
  29. return params
  30. elif to_lang.lower() not in [x[1].lower() for x in language_codes]:
  31. return params
  32. params['url'] = url.format(from_lang=from_lang, to_lang=to_lang,query=query)
  33. params['from_lang'] = from_lang
  34. params['to_lang'] = to_lang
  35. params['query'] = query
  36. return params
  37. def response(resp):
  38. results = []
  39. dom = html.fromstring(resp.text)
  40. for k, result in enumerate(dom.xpath(results_xpath)[1:]):
  41. try:
  42. from_result, to_results_raw = result.xpath('./td')
  43. except:
  44. continue
  45. to_results = []
  46. for to_result in to_results_raw.xpath('./p/a'):
  47. t = to_result.text_content()
  48. if t.strip():
  49. to_results.append(to_result.text_content())
  50. results.append({
  51. 'url': urljoin(resp.url, '?%d' % k),
  52. 'title': from_result.text_content(),
  53. 'content': '; '.join(to_results)
  54. })
  55. return results