Browse Source

[fix] html tag removal

asciimoo 11 years ago
parent
commit
59eeeaab87
1 changed files with 2 additions and 1 deletions
  1. 2
    1
      searx/engines/xpath.py

+ 2
- 1
searx/engines/xpath.py View File

@@ -2,6 +2,7 @@ from lxml import html
2 2
 from urllib import urlencode, unquote
3 3
 from urlparse import urlparse, urljoin
4 4
 from lxml.etree import _ElementStringResult
5
+from searx.utils import html_to_text
5 6
 
6 7
 search_url = None
7 8
 url_xpath = None
@@ -33,7 +34,7 @@ def extract_text(xpath_results):
33 34
         return ''.join(xpath_results)
34 35
     else:
35 36
         # it's a element
36
-        return xpath_results.text_content()
37
+        return html_to_text(xpath_results.text_content())
37 38
 
38 39
 
39 40
 def extract_url(xpath_results):