|
@@ -10,9 +10,11 @@
|
10
|
10
|
@parse url, title, content
|
11
|
11
|
"""
|
12
|
12
|
|
13
|
|
-from urllib import urlencode
|
14
|
|
-from json import loads
|
15
|
13
|
from datetime import datetime
|
|
14
|
+from json import loads
|
|
15
|
+from urllib import urlencode
|
|
16
|
+
|
|
17
|
+from searx.utils import html_to_text
|
16
|
18
|
|
17
|
19
|
# engine dependent config
|
18
|
20
|
categories = None
|
|
@@ -66,9 +68,9 @@ def response(resp):
|
66
|
68
|
# parse results
|
67
|
69
|
for result in res.get('items', {}):
|
68
|
70
|
|
69
|
|
- title = result['title']
|
|
71
|
+ title = html_to_text(result['title'])
|
70
|
72
|
res_url = result['url']
|
71
|
|
- content = result['desc']
|
|
73
|
+ content = html_to_text(result['desc'])
|
72
|
74
|
|
73
|
75
|
if category_to_keyword.get(categories[0], '') == 'web':
|
74
|
76
|
results.append({'title': title,
|