12 years ago · 17bf00ee42
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
 
															 from json import loads
														
 
															 from urllib import urlencode
														
 
															+from searx.utils import html_to_text
														
 
															 url = 'https://duckduckgo.com/'
														
 
															 search_url = url + 'd.js?{query}&l=us-en&p=1&s=0'
														
 
															         if not r.get('t'):
														
 
															             continue
														
 
															         results.append({'title': r['t']
														
 
															-                       ,'content': r['a']
														
 
															+                       ,'content': html_to_text(r['a'])
														
 
															                        ,'url': r['u']
														
 
															                        })
														
 
															     return results
														
--- a/searx/engines/startpage.py
+++ b/searx/engines/startpage.py
 
															-from urllib import quote
														
 
															+from urllib import urlencode
														
 
															 from lxml import html
														
 
															 from urlparse import urlparse
														
 
															 from cgi import escape
														
 
															 def request(query, params):
														
 
															     global search_url
														
 
															-    query = quote(query.replace(' ', '+'), safe='+')
														
 
															+    query = urlencode({'q': query})[2:]
														
 
															     params['url'] = search_url
														
 
															     params['method'] = 'POST'
														
 
															     params['data'] = {'query': query}
														
--- a/searx/engines/twitter.py
+++ b/searx/engines/twitter.py
 
															 from urlparse import urljoin
														
 
															 from urllib import urlencode
														
 
															 from lxml import html
														
 
															+from cgi import escape
														
 
															 categories = ['social media']
														
 
															         link = tweet.xpath('.//small[@class="time"]//a')[0]
														
 
															         url = urljoin(base_url, link.attrib.get('href'))
														
 
															         title = ''.join(tweet.xpath('.//span[@class="username js-action-profile-name"]//text()'))
														
 
															-        content = ''.join(map(html.tostring, tweet.xpath('.//p[@class="js-tweet-text tweet-text"]//*')))
														
 
															+        content = escape(''.join(tweet.xpath('.//p[@class="js-tweet-text tweet-text"]//text()')))
														
 
															         results.append({'url': url, 'title': title, 'content': content})
														
 
															     return results
														
--- a/searx/engines/xpath.py
+++ b/searx/engines/xpath.py
 
															 def response(resp):
														
 
															     results = []
														
 
															     dom = html.fromstring(resp.text)
														
 
															-    query = resp.search_params['query']
														
 
															     if results_xpath:
														
 
															         for result in dom.xpath(results_xpath):
														
 
															             url = extract_url(result.xpath(url_xpath))
														
 
															             title = ' '.join(result.xpath(title_xpath))
														
 
															-            content = escape(' '.join(result.xpath(content_xpath))).replace(query, '<b>{0}</b>'.format(query))
														
 
															+            content = escape(' '.join(result.xpath(content_xpath)))
														
 
															             results.append({'url': url, 'title': title, 'content': content})
														
 
															     else:
														
 
															         for content, url, title in zip(dom.xpath(content_xpath), map(extract_url, dom.xpath(url_xpath)), dom.xpath(title_xpath)):