|
@@ -1,6 +1,7 @@
|
1
|
1
|
#!/usr/bin/env python
|
2
|
2
|
|
3
|
3
|
from urllib import urlencode
|
|
4
|
+from urlparse import unquote
|
4
|
5
|
from lxml import html
|
5
|
6
|
from searx.engines.xpath import extract_text, extract_url
|
6
|
7
|
|
|
@@ -33,7 +34,10 @@ def response(resp):
|
33
|
34
|
dom = html.fromstring(resp.text)
|
34
|
35
|
|
35
|
36
|
for result in dom.xpath(results_xpath):
|
36
|
|
- url = extract_url(result.xpath(url_xpath), search_url)
|
|
37
|
+ url_string = extract_url(result.xpath(url_xpath), search_url)
|
|
38
|
+ start = url_string.find('/RU=')+4
|
|
39
|
+ end = url_string.rfind('/RS')
|
|
40
|
+ url = unquote(url_string[start:end])
|
37
|
41
|
title = extract_text(result.xpath(title_xpath)[0])
|
38
|
42
|
content = extract_text(result.xpath(content_xpath)[0])
|
39
|
43
|
results.append({'url': url, 'title': title, 'content': content})
|