|
@@ -2,6 +2,7 @@ from lxml import html
|
2
|
2
|
from urllib import urlencode, unquote
|
3
|
3
|
from urlparse import urlparse, urljoin
|
4
|
4
|
from lxml.etree import _ElementStringResult
|
|
5
|
+from searx.utils import html_to_text
|
5
|
6
|
|
6
|
7
|
search_url = None
|
7
|
8
|
url_xpath = None
|
|
@@ -33,7 +34,7 @@ def extract_text(xpath_results):
|
33
|
34
|
return ''.join(xpath_results)
|
34
|
35
|
else:
|
35
|
36
|
# it's a element
|
36
|
|
- return xpath_results.text_content()
|
|
37
|
+ return html_to_text(xpath_results.text_content())
|
37
|
38
|
|
38
|
39
|
|
39
|
40
|
def extract_url(xpath_results):
|