Add Python3 compatibility

8 years ago · 4cffd78650
--- a/.travis.yml
+++ b/.travis.yml
 
															 language: python
														
 
															 python:
														
 
															   - "2.7"
														
 
															+  - "3.6"
														
 
															 before_install:
														
 
															   - "export DISPLAY=:99.0"
														
 
															   - "sh -e /etc/init.d/xvfb start"
														
 
															   - ./manage.sh styles
														
 
															   - ./manage.sh grunt_build
														
 
															   - ./manage.sh tests
														
 
															-  - ./manage.sh py_test_coverage
														
 
															 after_success:
														
 
															-  coveralls
														
 
															+  - ./manage.sh py_test_coverage
														
 
															+  - coveralls
														
 
															 notifications:
														
 
															   irc:
														
 
															     channels:
														
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
 
															 nose2[coverage-plugin]
														
 
															 pep8==1.7.0
														
 
															 plone.testing==5.0.0
														
 
															-robotframework-selenium2library==1.8.0
														
 
															-robotsuite==1.7.0
														
 
															+splinter==0.7.5
														
 
															 transifex-client==0.12.2
														
 
															 unittest2==1.1.0
														
 
															 zope.testrunner==4.5.1
														
--- a/searx/answerers/__init__.py
+++ b/searx/answerers/__init__.py
 
															 from os import listdir
														
 
															 from os.path import realpath, dirname, join, isdir
														
 
															+from sys import version_info
														
 
															 from searx.utils import load_module
														
 
															 from collections import defaultdict
														
 
															+if version_info[0] == 3:
														
 
															+    unicode = str
														
 
															+
														
 
															 answerers_dir = dirname(realpath(__file__))
														
 
															 def load_answerers():
														
 
															     answerers = []
														
 
															     for filename in listdir(answerers_dir):
														
 
															-        if not isdir(join(answerers_dir, filename)):
														
 
															+        if not isdir(join(answerers_dir, filename)) or filename.startswith('_'):
														
 
															             continue
														
 
															         module = load_module('answerer.py', join(answerers_dir, filename))
														
 
															         if not hasattr(module, 'keywords') or not isinstance(module.keywords, tuple) or not len(module.keywords):
														
 
															 def ask(query):
														
 
															     results = []
														
 
															-    query_parts = filter(None, query.query.split())
														
 
															+    query_parts = list(filter(None, query.query.split()))
														
 
															-    if query_parts[0] not in answerers_by_keywords:
														
 
															+    if query_parts[0].decode('utf-8') not in answerers_by_keywords:
														
 
															         return results
														
 
															-    for answerer in answerers_by_keywords[query_parts[0]]:
														
 
															+    for answerer in answerers_by_keywords[query_parts[0].decode('utf-8')]:
														
 
															         result = answerer(query)
														
 
															         if result:
														
 
															             results.append(result)
														
--- a/searx/answerers/random/answerer.py
+++ b/searx/answerers/random/answerer.py
 
															 import random
														
 
															 import string
														
 
															+import sys
														
 
															 from flask_babel import gettext
														
 
															 # required answerer attribute
														
 
															 random_int_max = 2**31
														
 
															-random_string_letters = string.lowercase + string.digits + string.uppercase
														
 
															+if sys.version_info[0] == 2:
														
 
															+    random_string_letters = string.lowercase + string.digits + string.uppercase
														
 
															+else:
														
 
															+    unicode = str
														
 
															+    random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
														
 
															 def random_string():
														
 
															     return unicode(random.randint(-random_int_max, random_int_max))
														
 
															-random_types = {u'string': random_string,
														
 
															-                u'int': random_int,
														
 
															-                u'float': random_float}
														
 
															+random_types = {b'string': random_string,
														
 
															+                b'int': random_int,
														
 
															+                b'float': random_float}
														
 
															 # required answerer function
														
--- a/searx/answerers/statistics/answerer.py
+++ b/searx/answerers/statistics/answerer.py
 
															+from sys import version_info
														
 
															 from functools import reduce
														
 
															 from operator import mul
														
 
															 from flask_babel import gettext
														
 
															+if version_info[0] == 3:
														
 
															+    unicode = str
														
 
															+
														
 
															 keywords = ('min',
														
 
															             'max',
														
 
															             'avg',
														
 
															         return []
														
 
															     try:
														
 
															-        args = map(float, parts[1:])
														
 
															+        args = list(map(float, parts[1:]))
														
 
															     except:
														
 
															         return []
														
 
															     func = parts[0]
														
 
															     answer = None
														
 
															-    if func == 'min':
														
 
															+    if func == b'min':
														
 
															         answer = min(args)
														
 
															-    elif func == 'max':
														
 
															+    elif func == b'max':
														
 
															         answer = max(args)
														
 
															-    elif func == 'avg':
														
 
															+    elif func == b'avg':
														
 
															         answer = sum(args) / len(args)
														
 
															-    elif func == 'sum':
														
 
															+    elif func == b'sum':
														
 
															         answer = sum(args)
														
 
															-    elif func == 'prod':
														
 
															+    elif func == b'prod':
														
 
															         answer = reduce(mul, args, 1)
														
 
															     if answer is None:
														
--- a/searx/autocomplete.py
+++ b/searx/autocomplete.py
 
															 from lxml import etree
														
 
															 from json import loads
														
 
															-from urllib import urlencode
														
 
															 from searx import settings
														
 
															 from searx.languages import language_codes
														
 
															 from searx.engines import (
														
 
															 )
														
 
															 from searx.poolrequests import get as http_get
														
 
															+try:
														
 
															+    from urllib import urlencode
														
 
															+except:
														
 
															+    from urllib.parse import urlencode
														
 
															+
														
 
															 def get(*args, **kwargs):
														
 
															     if 'timeout' not in kwargs:
														
--- a/searx/engines/1337x.py
+++ b/searx/engines/1337x.py
 
															-from urllib import quote
														
 
															 from lxml import html
														
 
															 from searx.engines.xpath import extract_text
														
 
															 from searx.utils import get_torrent_size
														
 
															-from urlparse import urljoin
														
 
															+from searx.url_utils import quote, urljoin
														
 
															 url = 'https://1337x.to/'
														
 
															 search_url = url + 'search/{search_term}/{pageno}/'
														
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
 
															             if engine_data['categories'] == 'none':
														
 
															                 engine.categories = []
														
 
															             else:
														
 
															-                engine.categories = map(
														
 
															-                    str.strip, engine_data['categories'].split(','))
														
 
															+                engine.categories = list(map(str.strip, engine_data['categories'].split(',')))
														
 
															             continue
														
 
															         setattr(engine, param_name, engine_data[param_name])
														
 
															-    for arg_name, arg_value in engine_default_args.iteritems():
														
 
															+    for arg_name, arg_value in engine_default_args.items():
														
 
															         if not hasattr(engine, arg_name):
														
 
															             setattr(engine, arg_name, arg_value)
														
--- a/searx/engines/archlinux.py
+++ b/searx/engines/archlinux.py
 
															  @parse        url, title
														
 
															 """
														
 
															-from urlparse import urljoin
														
 
															-from urllib import urlencode
														
 
															 from lxml import html
														
 
															 from searx.engines.xpath import extract_text
														
 
															+from searx.url_utils import urlencode, urljoin
														
 
															 # engine dependent config
														
 
															 categories = ['it']
														
--- a/searx/engines/base.py
+++ b/searx/engines/base.py
 
															 """
														
 
															 from lxml import etree
														
 
															-from urllib import urlencode
														
 
															-from searx.utils import searx_useragent
														
 
															 from datetime import datetime
														
 
															 import re
														
 
															+from searx.url_utils import urlencode
														
 
															+from searx.utils import searx_useragent
														
 
															 categories = ['science']
														
 
															 def response(resp):
														
 
															     results = []
														
 
															-    search_results = etree.XML(resp.content)
														
 
															+    search_results = etree.XML(resp.text)
														
 
															     for entry in search_results.xpath('./result/doc'):
														
 
															         content = "No description available"
														
--- a/searx/engines/bing.py
+++ b/searx/engines/bing.py
 
															  @todo        publishedDate
														
 
															 """
														
 
															-from urllib import urlencode
														
 
															 from lxml import html
														
 
															 from searx.engines.xpath import extract_text
														
 
															+from searx.url_utils import urlencode
														
 
															 # engine dependent config
														
 
															 categories = ['general']
														
--- a/searx/engines/bing_images.py
+++ b/searx/engines/bing_images.py
 
															               limited response to 10 images
														
 
															 """
														
 
															-from urllib import urlencode
														
 
															 from lxml import html
														
 
															 from json import loads
														
 
															 import re
														
 
															 from searx.engines.bing import _fetch_supported_languages, supported_languages_url
														
 
															+from searx.url_utils import urlencode
														
 
															 # engine dependent config
														
 
															 categories = ['images']
														
--- a/searx/engines/bing_news.py
+++ b/searx/engines/bing_news.py
 
															  @parse       url, title, content, publishedDate, thumbnail
														
 
															 """
														
 
															-from urllib import urlencode
														
 
															-from urlparse import urlparse, parse_qsl
														
 
															 from datetime import datetime
														
 
															 from dateutil import parser
														
 
															 from lxml import etree
														
 
															 from searx.utils import list_get
														
 
															 from searx.engines.bing import _fetch_supported_languages, supported_languages_url
														
 
															+from searx.url_utils import urlencode, urlparse, parse_qsl
														
 
															 # engine dependent config
														
 
															 categories = ['news']
														
 
															 def response(resp):
														
 
															     results = []
														
 
															-    rss = etree.fromstring(resp.content)
														
 
															+    rss = etree.fromstring(resp.text)
														
 
															     ns = rss.nsmap
														
--- a/searx/engines/blekko_images.py
+++ b/searx/engines/blekko_images.py
 
															 """
														
 
															 from json import loads
														
 
															-from urllib import urlencode
														
 
															+from searx.url_utils import urlencode
														
 
															 # engine dependent config
														
 
															 categories = ['images']
														
--- a/searx/engines/btdigg.py
+++ b/searx/engines/btdigg.py
 
															  @parse       url, title, content, seed, leech, magnetlink
														
 
															 """
														
 
															-from urlparse import urljoin
														
 
															-from urllib import quote
														
 
															 from lxml import html
														
 
															 from operator import itemgetter
														
 
															 from searx.engines.xpath import extract_text
														
 
															+from searx.url_utils import quote, urljoin
														
 
															 from searx.utils import get_torrent_size
														
 
															 # engine dependent config
														
 
															 def response(resp):
														
 
															     results = []
														
 
															-    dom = html.fromstring(resp.content)
														
 
															+    dom = html.fromstring(resp.text)
														
 
															     search_res = dom.xpath('//div[@id="search_res"]/table/tr')
														
--- a/searx/engines/currency_convert.py
+++ b/searx/engines/currency_convert.py
 
															-from datetime import datetime
														
 
															+import json
														
 
															 import re
														
 
															 import os
														
 
															-import json
														
 
															+import sys
														
 
															 import unicodedata
														
 
															+from datetime import datetime
														
 
															+
														
 
															+if sys.version_info[0] == 3:
														
 
															+    unicode = str
														
 
															 categories = []
														
 
															 url = 'https://download.finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X'
														
 
															 weight = 100
														
 
															-parser_re = re.compile(u'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)  # noqa
														
 
															+parser_re = re.compile(b'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
														
 
															 db = 1
														
 
															 def normalize_name(name):
														
 
															-    name = name.lower().replace('-', ' ').rstrip('s')
														
 
															+    name = name.decode('utf-8').lower().replace('-', ' ').rstrip('s')
														
 
															     name = re.sub(' +', ' ', name)
														
 
															     return unicodedata.normalize('NFKD', name).lower()
														
 
															 def request(query, params):
														
 
															-    m = parser_re.match(unicode(query, 'utf8'))
														
 
															+    m = parser_re.match(query)
														
 
															     if not m:
														
 
															         # wrong query
														
 
															         return params
														
--- a/searx/engines/dailymotion.py
+++ b/searx/engines/dailymotion.py
 
															  @todo        set content-parameter with correct data
														
 
															 """
														
 
															-from urllib import urlencode
														
 
															 from json import loads
														
 
															 from datetime import datetime
														
 
															-from requests import get
														
 
															+from searx.url_utils import urlencode
														
 
															 # engine dependent config
														
 
															 categories = ['videos']
														
--- a/searx/engines/deezer.py
+++ b/searx/engines/deezer.py
 
															 """
														
 
															 from json import loads
														
 
															-from urllib import urlencode
														
 
															+from searx.url_utils import urlencode
														
 
															 # engine dependent config
														
 
															 categories = ['music']
														
 
															 def request(query, params):
														
 
															     offset = (params['pageno'] - 1) * 25
														
 
															-    params['url'] = search_url.format(query=urlencode({'q': query}),
														
 
															-                                      offset=offset)
														
 
															+    params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset)
														
 
															     return params
														
--- a/searx/engines/deviantart.py
+++ b/searx/engines/deviantart.py
 
															  @todo        rewrite to api
														
 
															 """
														
 
															-from urllib import urlencode
														
 
															 from lxml import html
														
 
															 import re
														
 
															 from searx.engines.xpath import extract_text
														
 
															+from searx.url_utils import urlencode
														
 
															 # engine dependent config
														
 
															 categories = ['images']
														
--- a/searx/engines/dictzone.py
+++ b/searx/engines/dictzone.py
 
															 """
														
 
															 import re
														
 
															-from urlparse import urljoin
														
 
															 from lxml import html
														
 
															 from searx.utils import is_valid_lang
														
 
															+from searx.url_utils import urljoin
														
 
															 categories = ['general']
														
 
															 url = u'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
														
 
															 weight = 100
														
 
															-parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
														
 
															+parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
														
 
															 results_xpath = './/table[@id="r"]/tr'
														
 
															 def request(query, params):
														
 
															-    m = parser_re.match(unicode(query, 'utf8'))
														
 
															+    m = parser_re.match(query)
														
 
															     if not m:
														
 
															         return params
														
--- a/searx/engines/digbt.py
+++ b/searx/engines/digbt.py
 
															  @parse       url, title, content, magnetlink
														
 
															 """
														
 
															-from urlparse import urljoin
														
 
															+from sys import version_info
														
 
															 from lxml import html
														
 
															 from searx.engines.xpath import extract_text
														
 
															 from searx.utils import get_torrent_size
														
 
															+from searx.url_utils import urljoin
														
 
															+
														
 
															+if version_info[0] == 3:
														
 
															+    unicode = str
														
 
															 categories = ['videos', 'music', 'files']
														
 
															 paging = True
														
 
															 def response(resp):
														
 
															-    dom = html.fromstring(resp.content)
														
 
															+    dom = html.fromstring(resp.text)
														
 
															     search_res = dom.xpath('.//td[@class="x-item"]')
														
 
															     if not search_res:
														
--- a/searx/engines/digg.py
+++ b/searx/engines/digg.py
 
															  @parse       url, title, content, publishedDate, thumbnail
														
 
															 """
														
 
															-from urllib import quote_plus
														
 
															+from dateutil import parser
														
 
															 from json import loads
														
 
															 from lxml import html
														
 
															-from dateutil import parser
														
 
															+from searx.url_utils import quote_plus
														
 
															 # engine dependent config
														
 
															 categories = ['news', 'social media']
														
--- a/searx/engines/doku.py
+++ b/searx/engines/doku.py
 
															 # @stable      yes
														
 
															 # @parse       (general)    url, title, content
														
 
															-from urllib import urlencode
														
 
															 from lxml.html import fromstring
														
 
															 from searx.engines.xpath import extract_text
														
 
															+from searx.url_utils import urlencode
														
 
															 # engine dependent config
														
 
															 categories = ['general']  # TODO , 'images', 'music', 'videos', 'files'
														
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
 
															  @todo        rewrite to api
														
 
															 """
														
 
															-from urllib import urlencode
														
 
															 from lxml.html import fromstring
														
 
															 from requests import get
														
 
															 from json import loads
														
 
															 from searx.engines.xpath import extract_text
														
 
															+from searx.url_utils import urlencode
														
 
															 # engine dependent config
														
 
															 categories = ['general']
														
--- a/searx/engines/duckduckgo_definitions.py
+++ b/searx/engines/duckduckgo_definitions.py
 
															 import json
														
 
															-from urllib import urlencode
														
 
															-from re import compile, sub
														
 
															 from lxml import html
														
 
															-from searx.utils import html_to_text
														
 
															+from re import compile
														
 
															 from searx.engines.xpath import extract_text
														
 
															 from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
														
 
															+from searx.url_utils import urlencode
														
 
															+from searx.utils import html_to_text
														
 
															 url = 'https://api.duckduckgo.com/'\
														
 
															     + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
														
--- a/searx/engines/faroo.py
+++ b/searx/engines/faroo.py
 
															  @parse       url, title, content, publishedDate, img_src
														
 
															 """
														
 
															-from urllib import urlencode
														
 
															 from json import loads
														
 
															 import datetime
														
 
															 from searx.utils import searx_useragent
														
 
															+from searx.url_utils import urlencode
														
 
															 # engine dependent config
														
 
															 categories = ['general', 'news']
														
--- a/searx/engines/fdroid.py
+++ b/searx/engines/fdroid.py
 
															  @parse        url, title, content
														
 
															 """
														
 
															-from urllib import urlencode
														
 
															-from searx.engines.xpath import extract_text
														
 
															 from lxml import html
														
 
															+from searx.engines.xpath import extract_text
														
 
															+from searx.url_utils import urlencode
														
 
															 # engine dependent config
														
 
															 categories = ['files']
														
 
															 # do search-request
														
 
															 def request(query, params):
														
 
															-    query = urlencode({'fdfilter': query,
														
 
															-                       'fdpage': params['pageno']})
														
 
															+    query = urlencode({'fdfilter': query, 'fdpage': params['pageno']})
														
 
															     params['url'] = search_url.format(query=query)
														
 
															     return params
														
--- a/searx/engines/filecrop.py
+++ b/searx/engines/filecrop.py
 
															-from urllib import urlencode
														
 
															-from HTMLParser import HTMLParser
														
 
															+from searx.url_utils import urlencode
														
 
															+
														
 
															+try:
														
 
															+    from HTMLParser import HTMLParser
														
 
															+except:
														
 
															+    from html.parser import HTMLParser
														
 
															 url = 'http://www.filecrop.com/'
														
 
															 search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}'  # noqa
														
 
															 def request(query, params):
														
 
															     index = 1 + (params['pageno'] - 1) * 30
														
 
															-    params['url'] = search_url.format(query=urlencode({'w': query}),
														
 
															-                                      index=index)
														
 
															+    params['url'] = search_url.format(query=urlencode({'w': query}), index=index)
														
 
															     return params
														
--- a/searx/engines/flickr.py
+++ b/searx/engines/flickr.py
 
															  More info on api-key : https://www.flickr.com/services/apps/create/
														
 
															 """
														
 
															-from urllib import urlencode
														
 
															 from json import loads
														
 
															+from searx.url_utils import urlencode
														
 
															 categories = ['images']
														
--- a/searx/engines/flickr_noapi.py
+++ b/searx/engines/flickr_noapi.py
 
															  @parse       url, title, thumbnail, img_src
														
 
															 """
														
 
															-from urllib import urlencode
														
 
															 from json import loads
														
 
															 from time import time
														
 
															 import re
														
 
															 from searx.engines import logger
														
 
															+from searx.url_utils import urlencode
														
 
															 logger = logger.getChild('flickr-noapi')
														
--- a/searx/engines/framalibre.py
+++ b/searx/engines/framalibre.py
 
															  @parse       url, title, content, thumbnail, img_src
														
 
															 """
														
 
															-from urlparse import urljoin
														
 
															 from cgi import escape
														
 
															-from urllib import urlencode
														
 
															 from lxml import html
														
 
															 from searx.engines.xpath import extract_text
														
 
															-from dateutil import parser
														
 
															+from searx.url_utils import urljoin, urlencode
														
 
															 # engine dependent config
														
 
															 categories = ['it']
														
--- a/searx/engines/frinkiac.py
+++ b/searx/engines/frinkiac.py
 
															 """
														
 
															 from json import loads
														
 
															-from urllib import urlencode
														
 
															+from searx.url_utils import urlencode
														
 
															 categories = ['images']
														
--- a/searx/engines/gigablast.py
+++ b/searx/engines/gigablast.py
 
															 """
														
 
															 from json import loads
														
 
															-from random import randint
														
 
															 from time import time
														
 
															-from urllib import urlencode
														
 
															 from lxml.html import fromstring
														
 
															+from searx.url_utils import urlencode
														
 
															 # engine dependent config
														
 
															 categories = ['general']
														
--- a/searx/engines/github.py
+++ b/searx/engines/github.py
 
															  @parse       url, title, content
														
 
															 """
														
 
															-from urllib import urlencode
														
 
															 from json import loads
														
 
															+from searx.url_utils import urlencode
														
 
															 # engine dependent config
														
 
															 categories = ['it']
														
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
 
															 # @parse       url, title, content, suggestion
														
 
															 import re
														
 
															-from urllib import urlencode
														
 
															-from urlparse import urlparse, parse_qsl
														
 
															 from lxml import html, etree
														
 
															 from searx.engines.xpath import extract_text, extract_url
														
 
															-from searx.search import logger
														
 
															+from searx import logger
														
 
															+from searx.url_utils import urlencode, urlparse, parse_qsl
														
 
															 logger = logger.getChild('google engine')
														
--- a/searx/engines/google_images.py
+++ b/searx/engines/google_images.py
 
															 """
														
 
															 from datetime import date, timedelta
														
 
															-from urllib import urlencode
														
 
															 from json import loads
														
 
															 from lxml import html
														
 
															+from searx.url_utils import urlencode
														
 
															 # engine dependent config
														
--- a/searx/engines/google_news.py
+++ b/searx/engines/google_news.py
 
															 """
														
 
															 from lxml import html
														
 
															-from urllib import urlencode
														
 
															-from json import loads
														
 
															 from searx.engines.google import _fetch_supported_languages, supported_languages_url
														
 
															+from searx.url_utils import urlencode
														
 
															 # search-url
														
 
															 categories = ['news']
														
--- a/searx/engines/ina.py
+++ b/searx/engines/ina.py
 
															 # @todo        embedded (needs some md5 from video page)
														
 
															 from json import loads
														
 
															-from urllib import urlencode
														
 
															 from lxml import html
														
 
															-from HTMLParser import HTMLParser
														
 
															-from searx.engines.xpath import extract_text
														
 
															 from dateutil import parser
														
 
															+from searx.engines.xpath import extract_text
														
 
															+from searx.url_utils import urlencode
														
 
															+
														
 
															+try:
														
 
															+    from HTMLParser import HTMLParser
														
 
															+except:
														
 
															+    from html.parser import HTMLParser
														
 
															 # engine dependent config
														
 
															 categories = ['videos']
														
--- a/searx/engines/json_engine.py
+++ b/searx/engines/json_engine.py
 
															-from urllib import urlencode
														
 
															-from json import loads
														
 
															 from collections import Iterable
														
 
															+from json import loads
														
 
															+from sys import version_info
														
 
															+from searx.url_utils import urlencode
														
 
															+
														
 
															+if version_info[0] == 3:
														
 
															+    unicode = str
														
 
															 search_url = None
														
 
															 url_query = None
														
 
															 content_query = None
														
 
															 title_query = None
														
 
															+paging = False
														
 
															 suggestion_query = ''
														
 
															 results_query = ''
														
 
															 def iterate(iterable):
														
 
															     if type(iterable) == dict:
														
 
															-        it = iterable.iteritems()
														
 
															+        it = iterable.items()
														
 
															     else:
														
 
															         it = enumerate(iterable)
														
--- a/searx/engines/kickass.py
+++ b/searx/engines/kickass.py
 
															  @parse       url, title, content, seed, leech, magnetlink
														
 
															 """
														
 
															-from urlparse import urljoin
														
 
															-from urllib import quote
														
 
															 from lxml import html
														
 
															 from operator import itemgetter
														
 
															 from searx.engines.xpath import extract_text
														
 
															 from searx.utils import get_torrent_size, convert_str_to_int
														
 
															+from searx.url_utils import quote, urljoin
														
 
															 # engine dependent config
														
 
															 categories = ['videos', 'music', 'files']
														
--- a/searx/engines/mediawiki.py
+++ b/searx/engines/mediawiki.py
 
															 from json import loads
														
 
															 from string import Formatter
														
 
															-from urllib import urlencode, quote
														
 
															+from searx.url_utils import urlencode, quote
														
 
															 # engine dependent config
														
 
															 categories = ['general']
														
--- a/searx/engines/mixcloud.py
+++ b/searx/engines/mixcloud.py
 
															 """
														
 
															 from json import loads
														
 
															-from urllib import urlencode
														
 
															 from dateutil import parser
														
 
															+from searx.url_utils import urlencode
														
 
															 # engine dependent config
														
 
															 categories = ['music']
														
--- a/searx/engines/nyaa.py
+++ b/searx/engines/nyaa.py
 
															  @parse        url, title, content, seed, leech, torrentfile
														
 
															 """
														
 
															-from urllib import urlencode
														
 
															 from lxml import html
														
 
															 from searx.engines.xpath import extract_text
														
 
															+from searx.url_utils import urlencode
														
 
															 # engine dependent config
														
 
															 categories = ['files', 'images', 'videos', 'music']
														
--- a/searx/engines/openstreetmap.py
+++ b/searx/engines/openstreetmap.py
 
															 """
														
 
															 from json import loads
														
 
															-from searx.utils import searx_useragent
														
 
															 # engine dependent config
														
 
															 categories = ['map']
														
 
															 def request(query, params):
														
 
															     params['url'] = base_url + search_string.format(query=query)
														
 
															-    # using searx User-Agent
														
 
															-    params['headers']['User-Agent'] = searx_useragent()
														
 
															-
														
 
															     return params
														
--- a/searx/engines/photon.py
+++ b/searx/engines/photon.py
 
															  @parse       url, title
														
 
															 """
														
 
															-from urllib import urlencode
														
 
															 from json import loads
														
 
															 from searx.utils import searx_useragent
														
 
															+from searx.url_utils import urlencode
														
 
															 # engine dependent config
														
 
															 categories = ['map']
														
--- a/searx/engines/piratebay.py
+++ b/searx/engines/piratebay.py
 
															 # @stable      yes (HTML can change)
														
 
															 # @parse       url, title, content, seed, leech, magnetlink
														
 
															-from urlparse import urljoin
														
 
															-from urllib import quote
														
 
															 from lxml import html
														
 
															 from operator import itemgetter
														
 
															 from searx.engines.xpath import extract_text
														
 
															+from searx.url_utils import quote, urljoin
														
 
															 # engine dependent config
														
 
															 categories = ['videos', 'music', 'files']
														
--- a/searx/engines/qwant.py
+++ b/searx/engines/qwant.py
 
															 from datetime import datetime
														
 
															 from json import loads
														
 
															-from urllib import urlencode
														
 
															-
														
 
															 from searx.utils import html_to_text
														
 
															+from searx.url_utils import urlencode
														
 
															 # engine dependent config
														
 
															 categories = None
														
--- a/searx/engines/reddit.py
+++ b/searx/engines/reddit.py
 
															 """
														
 
															 import json
														
 
															-from urllib import urlencode
														
 
															-from urlparse import urlparse, urljoin
														
 
															 from datetime import datetime
														
 
															+from searx.url_utils import urlencode, urljoin, urlparse
														
 
															 # engine dependent config
														
 
															 categories = ['general', 'images', 'news', 'social media']
														
 
															 # do search-request
														
 
															 def request(query, params):
														
 
															-    query = urlencode({'q': query,
														
 
															-                       'limit': page_size})
														
 
															+    query = urlencode({'q': query, 'limit': page_size})
														
 
															     params['url'] = search_url.format(query=query)
														
 
															     return params
														
--- a/searx/engines/scanr_structures.py
+++ b/searx/engines/scanr_structures.py
 
															  @parse       url, title, content, img_src
														
 
															 """
														
 
															-from urllib import urlencode
														
 
															 from json import loads, dumps
														
 
															-from dateutil import parser
														
 
															 from searx.utils import html_to_text
														
 
															 # engine dependent config
														
 
															     search_res = loads(resp.text)
														
 
															     # return empty array if there are no results
														
 
															-    if search_res.get('total') < 1:
														
 
															+    if search_res.get('total', 0) < 1:
														
 
															         return []
														
 
															     # parse results
														
--- a/searx/engines/searchcode_code.py
+++ b/searx/engines/searchcode_code.py
 
															  @parse       url, title, content
														
 
															 """
														
 
															-from urllib import urlencode
														
 
															 from json import loads
														
 
															+from searx.url_utils import urlencode
														
 
															 # engine dependent config
														
 
															 # do search-request
														
 
															 def request(query, params):
														
 
															-    params['url'] = search_url.format(query=urlencode({'q': query}),
														
 
															-                                      pageno=params['pageno'] - 1)
														
 
															+    params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'] - 1)
														
 
															     return params
														
--- a/searx/engines/searchcode_doc.py
+++ b/searx/engines/searchcode_doc.py
 
															  @parse       url, title, content
														
 
															 """
														
 
															-from urllib import urlencode
														
 
															 from json import loads
														
 
															+from searx.url_utils import urlencode
														
 
															 # engine dependent config
														
 
															 categories = ['it']
														
 
															 # do search-request
														
 
															 def request(query, params):
														
 
															-    params['url'] = search_url.format(query=urlencode({'q': query}),
														
 
															-                                      pageno=params['pageno'] - 1)
														
 
															+    params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'] - 1)
														
 
															     return params
														
--- a/searx/engines/seedpeer.py
+++ b/searx/engines/seedpeer.py
 
															 # @stable      yes (HTML can change)
														
 
															 # @parse       url, title, content, seed, leech, magnetlink
														
 
															-from urlparse import urljoin
														
 
															-from urllib import quote
														
 
															 from lxml import html
														
 
															 from operator import itemgetter
														
 
															-from searx.engines.xpath import extract_text
														
 
															+from searx.url_utils import quote, urljoin
														
 
															 url = 'http://www.seedpeer.eu/'
														
--- a/searx/engines/soundcloud.py
+++ b/searx/engines/soundcloud.py
 
															 """
														
 
															 import re
														
 
															-from StringIO import StringIO
														
 
															 from json import loads
														
 
															-from lxml import etree
														
 
															-from urllib import urlencode, quote_plus
														
 
															+from lxml import html
														
 
															 from dateutil import parser
														
 
															 from searx import logger
														
 
															 from searx.poolrequests import get as http_get
														
 
															+from searx.url_utils import quote_plus, urlencode
														
 
															+
														
 
															+try:
														
 
															+    from cStringIO import StringIO
														
 
															+except:
														
 
															+    from io import StringIO
														
 
															 # engine dependent config
														
 
															 categories = ['music']
														
 
															     'scrolling="no" frameborder="no" ' +\
														
 
															     'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>'
														
 
															+cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U)
														
 
															+
														
 
															 def get_client_id():
														
 
															     response = http_get("https://soundcloud.com")
														
 
															-    rx_namespace = {"re": "http://exslt.org/regular-expressions"}
														
 
															     if response.ok:
														
 
															-        tree = etree.parse(StringIO(response.content), etree.HTMLParser())
														
 
															-        script_tags = tree.xpath("//script[re:match(@src, '(.*app.*js)')]", namespaces=rx_namespace)
														
 
															+        tree = html.fromstring(response.content)
														
 
															+        script_tags = tree.xpath("//script[contains(@src, '/assets/app')]")
														
 
															         app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None]
														
 
															         # extracts valid app_js urls from soundcloud.com content
														
 
															             # gets app_js and searches for the clientid
														
 
															             response = http_get(app_js_url)
														
 
															             if response.ok:
														
 
															-                cids = re.search(r'client_id:"([^"]*)"', response.content, re.M | re.I)
														
 
															+                cids = cid_re.search(response.text)
														
 
															                 if cids is not None and len(cids.groups()):
														
 
															                     return cids.groups()[0]
														
 
															     logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")
														
--- a/searx/engines/spotify.py
+++ b/searx/engines/spotify.py
 
															 """
														
 
															 from json import loads
														
 
															-from urllib import urlencode
														
 
															+from searx.url_utils import urlencode
														
 
															 # engine dependent config
														
 
															 categories = ['music']
														
 
															 def request(query, params):
														
 
															     offset = (params['pageno'] - 1) * 20
														
 
															-    params['url'] = search_url.format(query=urlencode({'q': query}),
														
 
															-                                      offset=offset)
														
 
															+    params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset)
														
 
															     return params
														
--- a/searx/engines/stackoverflow.py
+++ b/searx/engines/stackoverflow.py
 
															  @parse       url, title, content
														
 
															 """
														
 
															-from urlparse import urljoin
														
 
															-from urllib import urlencode
														
 
															 from lxml import html
														
 
															 from searx.engines.xpath import extract_text
														
 
															+from searx.url_utils import urlencode, urljoin
														
 
															 # engine dependent config
														
 
															 categories = ['it']
														
 
															 # do search-request
														
 
															 def request(query, params):
														
 
															-    params['url'] = search_url.format(query=urlencode({'q': query}),
														
 
															-                                      pageno=params['pageno'])
														
 
															+    params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'])
														
 
															     return params
														
--- a/searx/engines/startpage.py
+++ b/searx/engines/startpage.py
 
															 def response(resp):
														
 
															     results = []
														
 
															-    dom = html.fromstring(resp.content)
														
 
															+    dom = html.fromstring(resp.text)
														
 
															     # parse results
														
 
															     for result in dom.xpath(results_xpath):
														
--- a/searx/engines/subtitleseeker.py
+++ b/searx/engines/subtitleseeker.py
 
															  @parse       url, title, content
														
 
															 """
														
 
															-from urllib import quote_plus
														
 
															 from lxml import html
														
 
															 from searx.languages import language_codes
														
 
															 from searx.engines.xpath import extract_text
														
 
															+from searx.url_utils import quote_plus
														
 
															 # engine dependent config
														
 
															 categories = ['videos']
														
--- a/searx/engines/swisscows.py
+++ b/searx/engines/swisscows.py
 
															 """
														
 
															 from json import loads
														
 
															-from urllib import urlencode, unquote
														
 
															 import re
														
 
															 from lxml.html import fromstring
														
 
															+from searx.url_utils import unquote, urlencode
														
 
															 # engine dependent config
														
 
															 categories = ['general', 'images']
														
 
															 supported_languages_url = base_url
														
 
															 # regex
														
 
															-regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
														
 
															-regex_json_remove_start = re.compile(r'^initialData:\s*')
														
 
															-regex_json_remove_end = re.compile(r',\s*environment$')
														
 
															-regex_img_url_remove_start = re.compile(r'^https?://i\.swisscows\.ch/\?link=')
														
 
															+regex_json = re.compile(b'initialData: {"Request":(.|\n)*},\s*environment')
														
 
															+regex_json_remove_start = re.compile(b'^initialData:\s*')
														
 
															+regex_json_remove_end = re.compile(b',\s*environment$')
														
 
															+regex_img_url_remove_start = re.compile(b'^https?://i\.swisscows\.ch/\?link=')
														
 
															 # do search-request
														
 
															         ui_language = params['language'].split('-')[0]
														
 
															     search_path = search_string.format(
														
 
															-        query=urlencode({'query': query,
														
 
															-                         'uiLanguage': ui_language,
														
 
															-                         'region': region}),
														
 
															-        page=params['pageno'])
														
 
															+        query=urlencode({'query': query, 'uiLanguage': ui_language, 'region': region}),
														
 
															+        page=params['pageno']
														
 
															+    )
														
 
															     # image search query is something like 'image?{query}&page={page}'
														
 
															     if params['category'] == 'images':
														
 
															 def response(resp):
														
 
															     results = []
														
 
															-    json_regex = regex_json.search(resp.content)
														
 
															+    json_regex = regex_json.search(resp.text)
														
 
															     # check if results are returned
														
 
															     if not json_regex:
														
 
															         return []
														
 
															-    json_raw = regex_json_remove_end.sub('', regex_json_remove_start.sub('', json_regex.group()))
														
 
															-    json = loads(json_raw)
														
 
															+    json_raw = regex_json_remove_end.sub(b'', regex_json_remove_start.sub(b'', json_regex.group()))
														
 
															+    json = loads(json_raw.decode('utf-8'))
														
 
															     # parse results
														
 
															     for result in json['Results'].get('items', []):
														
 
															         # parse image results
														
 
															         if result.get('ContentType', '').startswith('image'):
														
 
															-            img_url = unquote(regex_img_url_remove_start.sub('', result['Url']))
														
 
															+            img_url = unquote(regex_img_url_remove_start.sub(b'', result['Url'].encode('utf-8')).decode('utf-8'))
														
 
															             # append result
														
 
															             results.append({'url': result['SourceUrl'],
														
 
															     # parse images
														
 
															     for result in json.get('Images', []):
														
 
															         # decode image url
														
 
															-        img_url = unquote(regex_img_url_remove_start.sub('', result['Url']))
														
 
															+        img_url = unquote(regex_img_url_remove_start.sub(b'', result['Url'].encode('utf-8')).decode('utf-8'))
														
 
															         # append result
														
 
															         results.append({'url': result['SourceUrl'],
														
--- a/searx/engines/tokyotoshokan.py
+++ b/searx/engines/tokyotoshokan.py
 
															 """
														
 
															 import re
														
 
															-from urllib import urlencode
														
 
															 from lxml import html
														
 
															 from searx.engines.xpath import extract_text
														
 
															 from datetime import datetime
														
 
															 from searx.engines.nyaa import int_or_zero, get_filesize_mul
														
 
															+from searx.url_utils import urlencode
														
 
															 # engine dependent config
														
 
															 categories = ['files', 'videos', 'music']
														
 
															 # do search-request
														
 
															 def request(query, params):
														
 
															-    query = urlencode({'page': params['pageno'],
														
 
															-                       'terms': query})
														
 
															+    query = urlencode({'page': params['pageno'], 'terms': query})
														
 
															     params['url'] = search_url.format(query=query)
														
 
															     return params
														
 
															     size_re = re.compile(r'Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE)
														
 
															     # processing the results, two rows at a time
														
 
															-    for i in xrange(0, len(rows), 2):
														
 
															+    for i in range(0, len(rows), 2):
														
 
															         # parse the first row
														
 
															         name_row = rows[i]
														
 
															                     groups = size_re.match(item).groups()
														
 
															                     multiplier = get_filesize_mul(groups[1])
														
 
															                     params['filesize'] = int(multiplier * float(groups[0]))
														
 
															-                except Exception as e:
														
 
															+                except:
														
 
															                     pass
														
 
															             elif item.startswith('Date:'):
														
 
															                 try:
														
 
															                     # Date: 2016-02-21 21:44 UTC
														
 
															                     date = datetime.strptime(item, 'Date: %Y-%m-%d %H:%M UTC')
														
 
															                     params['publishedDate'] = date
														
 
															-                except Exception as e:
														
 
															+                except:
														
 
															                     pass
														
 
															             elif item.startswith('Comment:'):
														
 
															                 params['content'] = item
														
--- a/searx/engines/torrentz.py
+++ b/searx/engines/torrentz.py
 
															 """
														
 
															 import re
														
 
															-from urllib import urlencode
														
 
															 from lxml import html
														
 
															-from searx.engines.xpath import extract_text
														
 
															 from datetime import datetime
														
 
															 from searx.engines.nyaa import int_or_zero, get_filesize_mul
														
 
															+from searx.engines.xpath import extract_text
														
 
															+from searx.url_utils import urlencode
														
 
															 # engine dependent config
														
 
															 categories = ['files', 'videos', 'music']
														
 
															             size_str = result.xpath('./dd/span[@class="s"]/text()')[0]
														
 
															             size, suffix = size_str.split()
														
 
															             params['filesize'] = int(size) * get_filesize_mul(suffix)
														
 
															-        except Exception as e:
														
 
															+        except:
														
 
															             pass
														
 
															         # does our link contain a valid SHA1 sum?
														
 
															             # Fri, 25 Mar 2016 16:29:01
														
 
															             date = datetime.strptime(date_str, '%a, %d %b %Y %H:%M:%S')
														
 
															             params['publishedDate'] = date
														
 
															-        except Exception as e:
														
 
															+        except:
														
 
															             pass
														
 
															         results.append(params)
														
--- a/searx/engines/translated.py
+++ b/searx/engines/translated.py
 
															  @parse       url, title, content
														
 
															 """
														
 
															 import re
														
 
															+from sys import version_info
														
 
															 from searx.utils import is_valid_lang
														
 
															+if version_info[0] == 3:
														
 
															+    unicode = str
														
 
															+
														
 
															 categories = ['general']
														
 
															 url = u'http://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
														
 
															 web_url = u'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
														
--- a/searx/engines/twitter.py
+++ b/searx/engines/twitter.py
 
															  @todo        publishedDate
														
 
															 """
														
 
															-from urlparse import urljoin
														
 
															-from urllib import urlencode
														
 
															 from lxml import html
														
 
															 from datetime import datetime
														
 
															 from searx.engines.xpath import extract_text
														
 
															+from searx.url_utils import urlencode, urljoin
														
 
															 # engine dependent config
														
 
															 categories = ['social media']
														
--- a/searx/engines/vimeo.py
+++ b/searx/engines/vimeo.py
 
															 # @todo        set content-parameter with correct data
														
 
															 from json import loads
														
 
															-from urllib import urlencode
														
 
															 from dateutil import parser
														
 
															+from searx.url_utils import urlencode
														
 
															 # engine dependent config
														
 
															 categories = ['videos']
														
--- a/searx/engines/wikidata.py
+++ b/searx/engines/wikidata.py
 
															 from searx import logger
														
 
															 from searx.poolrequests import get
														
 
															 from searx.engines.xpath import extract_text
														
 
															-from searx.utils import format_date_by_locale
														
 
															 from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
														
 
															+from searx.url_utils import urlencode
														
 
															 from json import loads
														
 
															 from lxml.html import fromstring
														
 
															-from urllib import urlencode
														
 
															 logger = logger.getChild('wikidata')
														
 
															 result_count = 1
														
 
															         language = 'en'
														
 
															     params['url'] = url_search.format(
														
 
															-        query=urlencode({'label': query,
														
 
															-                        'language': language}))
														
 
															+        query=urlencode({'label': query, 'language': language}))
														
 
															     return params
														
 
															 def response(resp):
														
 
															     results = []
														
 
															-    html = fromstring(resp.content)
														
 
															+    html = fromstring(resp.text)
														
 
															     wikidata_ids = html.xpath(wikidata_ids_xpath)
														
 
															     language = resp.search_params['language'].split('-')[0]
														
 
															     # TODO: make requests asynchronous to avoid timeout when result_count > 1
														
 
															     for wikidata_id in wikidata_ids[:result_count]:
														
 
															-        url = url_detail.format(query=urlencode({'page': wikidata_id,
														
 
															-                                                'uselang': language}))
														
 
															+        url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
														
 
															         htmlresponse = get(url)
														
 
															-        jsonresponse = loads(htmlresponse.content)
														
 
															+        jsonresponse = loads(htmlresponse.text)
														
 
															         results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language'])
														
 
															     return results
														
--- a/searx/engines/wikipedia.py
+++ b/searx/engines/wikipedia.py
 
															 """
														
 
															 from json import loads
														
 
															-from urllib import urlencode, quote
														
 
															 from lxml.html import fromstring
														
 
															-
														
 
															+from searx.url_utils import quote, urlencode
														
 
															 # search-url
														
 
															-base_url = 'https://{language}.wikipedia.org/'
														
 
															-search_postfix = 'w/api.php?'\
														
 
															+base_url = u'https://{language}.wikipedia.org/'
														
 
															+search_url = base_url + u'w/api.php?'\
														
 
															     'action=query'\
														
 
															     '&format=json'\
														
 
															     '&{query}'\
														
 
															     else:
														
 
															         language = lang
														
 
															-    return base_url.format(language=language)
														
 
															+    return language
														
 
															 # do search-request
														
 
															 def request(query, params):
														
 
															     if query.islower():
														
 
															-        query += '|' + query.title()
														
 
															+        query = u'{0}|{1}'.format(query.decode('utf-8'), query.decode('utf-8').title()).encode('utf-8')
														
 
															-    params['url'] = url_lang(params['language']) \
														
 
															-        + search_postfix.format(query=urlencode({'titles': query}))
														
 
															+    params['url'] = search_url.format(query=urlencode({'titles': query}),
														
 
															+                                      language=url_lang(params['language']))
														
 
															     return params
														
 
															 def response(resp):
														
 
															     results = []
														
 
															-    search_result = loads(resp.content)
														
 
															+    search_result = loads(resp.text)
														
 
															     # wikipedia article's unique id
														
 
															     # first valid id is assumed to be the requested article
														
 
															     extract = page.get('extract')
														
 
															     summary = extract_first_paragraph(extract, title, image)
														
 
															-    if not summary:
														
 
															-        return []
														
 
															     # link to wikipedia article
														
 
															-    wikipedia_link = url_lang(resp.search_params['language']) \
														
 
															+    wikipedia_link = base_url.format(language=url_lang(resp.search_params['language'])) \
														
 
															         + 'wiki/' + quote(title.replace(' ', '_').encode('utf8'))
														
 
															     results.append({'url': wikipedia_link, 'title': title})
														
--- a/searx/engines/wolframalpha_api.py
+++ b/searx/engines/wolframalpha_api.py
 
															 # @stable      yes
														
 
															 # @parse       url, infobox
														
 
															-from urllib import urlencode
														
 
															 from lxml import etree
														
 
															+from searx.url_utils import urlencode
														
 
															 # search-url
														
 
															 search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}'
														
 
															 # do search-request
														
 
															 def request(query, params):
														
 
															-    params['url'] = search_url.format(query=urlencode({'input': query}),
														
 
															-                                      api_key=api_key)
														
 
															+    params['url'] = search_url.format(query=urlencode({'input': query}), api_key=api_key)
														
 
															     params['headers']['Referer'] = site_url.format(query=urlencode({'i': query}))
														
 
															     return params
														
 
															                  u'\uf74e': 'i',        # imaginary number
														
 
															                  u'\uf7d9': '='}        # equals sign
														
 
															-    for k, v in pua_chars.iteritems():
														
 
															+    for k, v in pua_chars.items():
														
 
															         text = text.replace(k, v)
														
 
															     return text
														
 
															 def response(resp):
														
 
															     results = []
														
 
															-    search_results = etree.XML(resp.content)
														
 
															+    search_results = etree.XML(resp.text)
														
 
															     # return empty array if there are no results
														
 
															     if search_results.xpath(failure_xpath):
														
 
															     # append infobox
														
 
															     results.append({'infobox': infobox_title,
														
 
															                     'attributes': result_chunks,
														
 
															-                    'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
														
 
															+                    'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}]})
														
 
															     # append link to site
														
 
															-    results.append({'url': resp.request.headers['Referer'].decode('utf8'),
														
 
															+    results.append({'url': resp.request.headers['Referer'],
														
 
															                     'title': title,
														
 
															                     'content': result_content})
														
--- a/searx/engines/wolframalpha_noapi.py
+++ b/searx/engines/wolframalpha_noapi.py
 
															 from json import loads
														
 
															 from time import time
														
 
															-from urllib import urlencode
														
 
															-from lxml.etree import XML
														
 
															 from searx.poolrequests import get as http_get
														
 
															+from searx.url_utils import urlencode
														
 
															 # search-url
														
 
															 url = 'https://www.wolframalpha.com/'
														
 
															 # do search-request
														
 
															 def request(query, params):
														
 
															     # obtain token if last update was more than an hour
														
 
															-    if time() - token['last_updated'] > 3600:
														
 
															+    if time() - (token['last_updated'] or 0) > 3600:
														
 
															         obtain_token()
														
 
															     params['url'] = search_url.format(query=urlencode({'input': query}), token=token['value'])
														
 
															     params['headers']['Referer'] = referer_url.format(query=urlencode({'i': query}))
														
 
															     results.append({'infobox': infobox_title,
														
 
															                     'attributes': result_chunks,
														
 
															-                    'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
														
 
															+                    'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}]})
														
 
															-    results.append({'url': resp.request.headers['Referer'].decode('utf8'),
														
 
															+    results.append({'url': resp.request.headers['Referer'],
														
 
															                     'title': 'Wolfram|Alpha (' + infobox_title + ')',
														
 
															                     'content': result_content})
														
--- a/searx/engines/www1x.py
+++ b/searx/engines/www1x.py
 
															  @parse       url, title, thumbnail, img_src, content
														
 
															 """
														
 
															-from urllib import urlencode
														
 
															-from urlparse import urljoin
														
 
															 from lxml import html
														
 
															-import string
														
 
															 import re
														
 
															+from searx.url_utils import urlencode, urljoin
														
 
															 # engine dependent config
														
 
															 categories = ['images']
														
 
															         cur_element += result_part
														
 
															         # fix xml-error
														
 
															-        cur_element = string.replace(cur_element, '"></a>', '"/></a>')
														
 
															+        cur_element = cur_element.replace('"></a>', '"/></a>')
														
 
															         dom = html.fromstring(cur_element)
														
 
															         link = dom.xpath('//a')[0]
														
--- a/searx/engines/www500px.py
+++ b/searx/engines/www500px.py
 
															 """
														
 
															 from json import loads
														
 
															-from urllib import urlencode
														
 
															-from urlparse import urljoin
														
 
															+from searx.url_utils import urlencode, urljoin
														
 
															 # engine dependent config
														
 
															 categories = ['images']
														
--- a/searx/engines/xpath.py
+++ b/searx/engines/xpath.py
 
															 from lxml import html
														
 
															-from urllib import urlencode, unquote
														
 
															-from urlparse import urlparse, urljoin
														
 
															 from lxml.etree import _ElementStringResult, _ElementUnicodeResult
														
 
															 from searx.utils import html_to_text
														
 
															+from searx.url_utils import unquote, urlencode, urljoin, urlparse
														
 
															 search_url = None
														
 
															 url_xpath = None
														
 
															 content_xpath = None
														
 
															 title_xpath = None
														
 
															+paging = False
														
 
															 suggestion_xpath = ''
														
 
															 results_xpath = ''
														
--- a/searx/engines/yacy.py
+++ b/searx/engines/yacy.py
 
															 # @todo        parse video, audio and file results
														
 
															 from json import loads
														
 
															-from urllib import urlencode
														
 
															 from dateutil import parser
														
 
															+from searx.url_utils import urlencode
														
 
															 from searx.utils import html_to_text
														
--- a/searx/engines/yahoo.py
+++ b/searx/engines/yahoo.py
 
															  @parse       url, title, content, suggestion
														
 
															 """
														
 
															-from urllib import urlencode
														
 
															-from urlparse import unquote
														
 
															 from lxml import html
														
 
															 from searx.engines.xpath import extract_text, extract_url
														
 
															+from searx.url_utils import unquote, urlencode
														
 
															 # engine dependent config
														
 
															 categories = ['general']
														
--- a/searx/engines/yahoo_news.py
+++ b/searx/engines/yahoo_news.py
 
															 # @stable      no (HTML can change)
														
 
															 # @parse       url, title, content, publishedDate
														
 
															-from urllib import urlencode
														
 
															+import re
														
 
															+from datetime import datetime, timedelta
														
 
															 from lxml import html
														
 
															 from searx.engines.xpath import extract_text, extract_url
														
 
															 from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url
														
 
															-from datetime import datetime, timedelta
														
 
															-import re
														
 
															 from dateutil import parser
														
 
															+from searx.url_utils import urlencode
														
 
															 # engine dependent config
														
 
															 categories = ['news']
														
--- a/searx/engines/yandex.py
+++ b/searx/engines/yandex.py
 
															  @parse       url, title, content
														
 
															 """
														
 
															-from urllib import urlencode
														
 
															 from lxml import html
														
 
															-from searx.search import logger
														
 
															+from searx import logger
														
 
															+from searx.url_utils import urlencode
														
 
															 logger = logger.getChild('yandex engine')
														
--- a/searx/engines/youtube_api.py
+++ b/searx/engines/youtube_api.py
 
															 # @parse       url, title, content, publishedDate, thumbnail, embedded
														
 
															 from json import loads
														
 
															-from urllib import urlencode
														
 
															 from dateutil import parser
														
 
															+from searx.url_utils import urlencode
														
 
															 # engine dependent config
														
 
															 categories = ['videos', 'music']
														
--- a/searx/engines/youtube_noapi.py
+++ b/searx/engines/youtube_noapi.py
 
															 # @stable      no
														
 
															 # @parse       url, title, content, publishedDate, thumbnail, embedded
														
 
															-from urllib import quote_plus
														
 
															 from lxml import html
														
 
															 from searx.engines.xpath import extract_text
														
 
															 from searx.utils import list_get
														
 
															+from searx.url_utils import quote_plus
														
 
															 # engine dependent config
														
 
															 categories = ['videos', 'music']
														
--- a/searx/plugins/__init__.py
+++ b/searx/plugins/__init__.py
 
															 (C) 2015 by Adam Tauber, <asciimoo@gmail.com>
														
 
															 '''
														
 
															-from sys import exit
														
 
															+from sys import exit, version_info
														
 
															 from searx import logger
														
 
															+if version_info[0] == 3:
														
 
															+    unicode = str
														
 
															+
														
 
															 logger = logger.getChild('plugins')
														
 
															 from searx.plugins import (doai_rewrite,
														
--- a/searx/plugins/doai_rewrite.py
+++ b/searx/plugins/doai_rewrite.py
 
															 from flask_babel import gettext
														
 
															 import re
														
 
															-from urlparse import urlparse, parse_qsl
														
 
															+from searx.url_utils import urlparse, parse_qsl
														
 
															 regex = re.compile(r'10\.\d{4,9}/[^\s]+')
														
--- a/searx/plugins/https_rewrite.py
+++ b/searx/plugins/https_rewrite.py
 
															 '''
														
 
															 import re
														
 
															-from urlparse import urlparse
														
 
															+import sys
														
 
															 from lxml import etree
														
 
															 from os import listdir, environ
														
 
															 from os.path import isfile, isdir, join
														
 
															 from searx.plugins import logger
														
 
															 from flask_babel import gettext
														
 
															 from searx import searx_dir
														
 
															+from searx.url_utils import urlparse
														
 
															+if sys.version_info[0] == 3:
														
 
															+    unicode = str
														
 
															 name = "HTTPS rewrite"
														
 
															 description = gettext('Rewrite HTTP links to HTTPS if possible')
														
--- a/searx/plugins/self_info.py
+++ b/searx/plugins/self_info.py
 
															 # Self User Agent regex
														
 
															-p = re.compile('.*user[ -]agent.*', re.IGNORECASE)
														
 
															+p = re.compile(b'.*user[ -]agent.*', re.IGNORECASE)
														
 
															 # attach callback to the post search hook
														
 
															 def post_search(request, search):
														
 
															     if search.search_query.pageno > 1:
														
 
															         return True
														
 
															-    if search.search_query.query == 'ip':
														
 
															+    if search.search_query.query == b'ip':
														
 
															         x_forwarded_for = request.headers.getlist("X-Forwarded-For")
														
 
															         if x_forwarded_for:
														
 
															             ip = x_forwarded_for[0]
														
--- a/searx/plugins/tracker_url_remover.py
+++ b/searx/plugins/tracker_url_remover.py
 
															 from flask_babel import gettext
														
 
															 import re
														
 
															-from urlparse import urlunparse
														
 
															+from searx.url_utils import urlunparse
														
 
															 regexes = {re.compile(r'utm_[^&]+&?'),
														
 
															            re.compile(r'(wkey|wemail)[^&]+&?'),
														
--- a/searx/preferences.py
+++ b/searx/preferences.py
 
															     def __init__(self, default_value, **kwargs):
														
 
															         super(Setting, self).__init__()
														
 
															         self.value = default_value
														
 
															-        for key, value in kwargs.iteritems():
														
 
															+        for key, value in kwargs.items():
														
 
															             setattr(self, key, value)
														
 
															         self._post_init()
														
 
															         return self.value
														
 
															     def save(self, name, resp):
														
 
															-        resp.set_cookie(name, bytes(self.value), max_age=COOKIE_MAX_AGE)
														
 
															+        resp.set_cookie(name, self.value, max_age=COOKIE_MAX_AGE)
														
 
															 class StringSetting(Setting):
														
 
															     def save(self, name, resp):
														
 
															         if hasattr(self, 'key'):
														
 
															-            resp.set_cookie(name, bytes(self.key), max_age=COOKIE_MAX_AGE)
														
 
															+            resp.set_cookie(name, self.key, max_age=COOKIE_MAX_AGE)
														
 
															 class SwitchableSetting(Setting):
														
 
															     def _post_init(self):
														
 
															         super(EnginesSetting, self)._post_init()
														
 
															         transformed_choices = []
														
 
															-        for engine_name, engine in self.choices.iteritems():
														
 
															+        for engine_name, engine in self.choices.items():
														
 
															             for category in engine.categories:
														
 
															                 transformed_choice = dict()
														
 
															                 transformed_choice['default_on'] = not engine.disabled
														
 
															                                    'language': SearchLanguageSetting(settings['search']['language'],
														
 
															                                                                      choices=LANGUAGE_CODES),
														
 
															                                    'locale': EnumStringSetting(settings['ui']['default_locale'],
														
 
															-                                                               choices=settings['locales'].keys() + ['']),
														
 
															+                                                               choices=list(settings['locales'].keys()) + ['']),
														
 
															                                    'autocomplete': EnumStringSetting(settings['search']['autocomplete'],
														
 
															-                                                                     choices=autocomplete.backends.keys() + ['']),
														
 
															+                                                                     choices=list(autocomplete.backends.keys()) + ['']),
														
 
															                                    'image_proxy': MapSetting(settings['server']['image_proxy'],
														
 
															                                                              map={'': settings['server']['image_proxy'],
														
 
															                                                                   '0': False,
														
 
															         self.unknown_params = {}
														
 
															     def parse_cookies(self, input_data):
														
 
															-        for user_setting_name, user_setting in input_data.iteritems():
														
 
															+        for user_setting_name, user_setting in input_data.items():
														
 
															             if user_setting_name in self.key_value_settings:
														
 
															                 self.key_value_settings[user_setting_name].parse(user_setting)
														
 
															             elif user_setting_name == 'disabled_engines':
														
 
															         disabled_engines = []
														
 
															         enabled_categories = []
														
 
															         disabled_plugins = []
														
 
															-        for user_setting_name, user_setting in input_data.iteritems():
														
 
															+        for user_setting_name, user_setting in input_data.items():
														
 
															             if user_setting_name in self.key_value_settings:
														
 
															                 self.key_value_settings[user_setting_name].parse(user_setting)
														
 
															             elif user_setting_name.startswith('engine_'):
														
 
															             return self.key_value_settings[user_setting_name].get_value()
														
 
															     def save(self, resp):
														
 
															-        for user_setting_name, user_setting in self.key_value_settings.iteritems():
														
 
															+        for user_setting_name, user_setting in self.key_value_settings.items():
														
 
															             user_setting.save(user_setting_name, resp)
														
 
															         self.engines.save(resp)
														
 
															         self.plugins.save(resp)
														
--- a/searx/query.py
+++ b/searx/query.py
 
															 from searx.engines import (
														
 
															     categories, engines, engine_shortcuts
														
 
															 )
														
 
															-import string
														
 
															 import re
														
 
															+import string
														
 
															+import sys
														
 
															+
														
 
															+if sys.version_info[0] == 3:
														
 
															+    unicode = str
														
 
															 VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
														
 
															     """container for all the search parameters (query, language, etc...)"""
														
 
															     def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range):
														
 
															-        self.query = query
														
 
															+        self.query = query.encode('utf-8')
														
 
															         self.engines = engines
														
 
															         self.categories = categories
														
 
															         self.lang = lang
														
--- a/searx/results.py
+++ b/searx/results.py
 
															 import re
														
 
															+import sys
														
 
															 from collections import defaultdict
														
 
															 from operator import itemgetter
														
 
															 from threading import RLock
														
 
															-from urlparse import urlparse, unquote
														
 
															 from searx.engines import engines
														
 
															+from searx.url_utils import urlparse, unquote
														
 
															+
														
 
															+if sys.version_info[0] == 3:
														
 
															+    basestring = str
														
 
															 CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
														
 
															 WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
														
--- a/searx/search.py
+++ b/searx/search.py
 
															 '''
														
 
															 import gc
														
 
															+import sys
														
 
															 import threading
														
 
															-from thread import start_new_thread
														
 
															 from time import time
														
 
															 from uuid import uuid4
														
 
															 import requests.exceptions
														
 
															 from searx.plugins import plugins
														
 
															 from searx.exceptions import SearxParameterException
														
 
															+try:
														
 
															+    from thread import start_new_thread
														
 
															+except:
														
 
															+    from _thread import start_new_thread
														
 
															+
														
 
															+if sys.version_info[0] == 3:
														
 
															+    unicode = str
														
 
															+
														
 
															 logger = logger.getChild('search')
														
 
															 number_of_searches = 0
														
 
															             request_params['time_range'] = search_query.time_range
														
 
															             # append request to list
														
 
															-            requests.append((selected_engine['name'], search_query.query.encode('utf-8'), request_params))
														
 
															+            requests.append((selected_engine['name'], search_query.query, request_params))
														
 
															             # update timeout_limit
														
 
															             timeout_limit = max(timeout_limit, engine.timeout)
														
--- a/searx/settings_robot.yml
+++ b/searx/settings_robot.yml
 
															 ui:
														
 
															     themes_path : ""
														
 
															-    default_theme : legacy
														
 
															+    default_theme : oscar
														
 
															     default_locale : ""
														
 
															 outgoing:
														
--- a/searx/templates/courgette/404.html
+++ b/searx/templates/courgette/404.html
 
															 <div class="center">
														
 
															     <h1>{{ _('Page not found') }}</h1>
														
 
															     {% autoescape false %}
														
 
															-    <p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p>
														
 
															+    <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
														
 
															     {% endautoescape %}
														
 
															 </div>
														
 
															 {% endblock %}
														
--- a/searx/templates/legacy/404.html
+++ b/searx/templates/legacy/404.html
 
															 <div class="center">
														
 
															     <h1>{{ _('Page not found') }}</h1>
														
 
															     {% autoescape false %}
														
 
															-    <p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p>
														
 
															+    <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
														
 
															     {% endautoescape %}
														
 
															 </div>
														
 
															 {% endblock %}
														
--- a/searx/templates/oscar/404.html
+++ b/searx/templates/oscar/404.html
 
															 <div class="text-center">
														
 
															     <h1>{{ _('Page not found') }}</h1>
														
 
															     {% autoescape false %}
														
 
															-    <p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p>
														
 
															+    <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
														
 
															     {% endautoescape %}
														
 
															 </div>
														
 
															 {% endblock %}
														
--- a/searx/templates/pix-art/404.html
+++ b/searx/templates/pix-art/404.html
 
															 <div class="center">
														
 
															     <h1>{{ _('Page not found') }}</h1>
														
 
															     {% autoescape false %}
														
 
															-    <p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p>
														
 
															+    <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
														
 
															     {% endautoescape %}
														
 
															 </div>
														
 
															 {% endblock %}
														
--- a/searx/testing.py
+++ b/searx/testing.py
 
															 # -*- coding: utf-8 -*-
														
 
															 """Shared testing code."""
														
 
															-from plone.testing import Layer
														
 
															-from unittest2 import TestCase
														
 
															-from os.path import dirname, join, abspath
														
 
															-
														
 
															 import os
														
 
															 import subprocess
														
 
															+import traceback
														
 
															+
														
 
															+
														
 
															+from os.path import dirname, join, abspath
														
 
															+
														
 
															+from splinter import Browser
														
 
															+from unittest2 import TestCase
														
 
															 class SearxTestLayer:
														
 
															     testTearDown = classmethod(testTearDown)
														
 
															-class SearxRobotLayer(Layer):
														
 
															+class SearxRobotLayer():
														
 
															     """Searx Robot Test Layer"""
														
 
															     def setUp(self):
														
 
															         del os.environ['SEARX_SETTINGS_PATH']
														
 
															-SEARXROBOTLAYER = SearxRobotLayer()
														
 
															+# SEARXROBOTLAYER = SearxRobotLayer()
														
 
															+def run_robot_tests(tests):
														
 
															+    print('Running {0} tests'.format(len(tests)))
														
 
															+    for test in tests:
														
 
															+        with Browser() as browser:
														
 
															+            test(browser)
														
 
															 class SearxTestCase(TestCase):
														
 
															 if __name__ == '__main__':
														
 
															-    from tests.test_robot import test_suite
														
 
															     import sys
														
 
															-    from zope.testrunner.runner import Runner
														
 
															+    # test cases
														
 
															+    from tests import robot
														
 
															     base_dir = abspath(join(dirname(__file__), '../tests'))
														
 
															     if sys.argv[1] == 'robot':
														
 
															-        r = Runner(['--color',
														
 
															-                    '--auto-progress',
														
 
															-                    '--stop-on-error',
														
 
															-                    '--path',
														
 
															-                    base_dir],
														
 
															-                   found_suites=[test_suite()])
														
 
															-        r.run()
														
 
															-        sys.exit(int(r.failed))
														
 
															+        test_layer = SearxRobotLayer()
														
 
															+        errors = False
														
 
															+        try:
														
 
															+            test_layer.setUp()
														
 
															+            run_robot_tests([getattr(robot, x) for x in dir(robot) if x.startswith('test_')])
														
 
															+        except Exception:
														
 
															+            errors = True
														
 
															+            print('Error occured: {0}'.format(traceback.format_exc()))
														
 
															+        test_layer.tearDown()
														
 
															+        sys.exit(1 if errors else 0)
														
--- a/searx/url_utils.py
+++ b/searx/url_utils.py
 
															+from sys import version_info
														
 
															+
														
 
															+if version_info[0] == 2:
														
 
															+    from urllib import quote, quote_plus, unquote, urlencode
														
 
															+    from urlparse import parse_qsl, urljoin, urlparse, urlunparse, ParseResult
														
 
															+else:
														
 
															+    from urllib.parse import (
														
 
															+        parse_qsl,
														
 
															+        quote,
														
 
															+        quote_plus,
														
 
															+        unquote,
														
 
															+        urlencode,
														
 
															+        urljoin,
														
 
															+        urlparse,
														
 
															+        urlunparse,
														
 
															+        ParseResult
														
 
															+    )
														
 
															+
														
 
															+
														
 
															+__export__ = (parse_qsl,
														
 
															+              quote,
														
 
															+              quote_plus,
														
 
															+              unquote,
														
 
															+              urlencode,
														
 
															+              urljoin,
														
 
															+              urlparse,
														
 
															+              urlunparse,
														
 
															+              ParseResult)
														
--- a/searx/utils.py
+++ b/searx/utils.py
 
															-import cStringIO
														
 
															 import csv
														
 
															 import os
														
 
															 import re
														
 
															 from babel.dates import format_date
														
 
															 from codecs import getincrementalencoder
														
 
															-from HTMLParser import HTMLParser
														
 
															 from imp import load_source
														
 
															 from os.path import splitext, join
														
 
															 from random import choice
														
 
															 from searx import settings
														
 
															 from searx import logger
														
 
															+try:
														
 
															+    from cStringIO import StringIO
														
 
															+except:
														
 
															+    from io import StringIO
														
 
															+
														
 
															+try:
														
 
															+    from HTMLParser import HTMLParser
														
 
															+except:
														
 
															+    from html.parser import HTMLParser
														
 
															+
														
 
															+if sys.version_info[0] == 3:
														
 
															+    unichr = chr
														
 
															+    unicode = str
														
 
															 logger = logger.getChild('utils')
														
 
															     def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
														
 
															         # Redirect output to a queue
														
 
															-        self.queue = cStringIO.StringIO()
														
 
															+        self.queue = StringIO()
														
 
															         self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
														
 
															         self.stream = f
														
 
															         self.encoder = getincrementalencoder(encoding)()
														
 
															                 unicode_row.append(col.encode('utf-8').strip())
														
 
															             else:
														
 
															                 unicode_row.append(col)
														
 
															-        self.writer.writerow(unicode_row)
														
 
															+        self.writer.writerow([x.decode('utf-8') if hasattr(x, 'decode') else x for x in unicode_row])
														
 
															         # Fetch UTF-8 output from the queue ...
														
 
															-        data = self.queue.getvalue()
														
 
															-        data = data.decode("utf-8")
														
 
															+        data = self.queue.getvalue().strip('\x00')
														
 
															         # ... and reencode it into the target encoding
														
 
															         data = self.encoder.encode(data)
														
 
															         # write to the target stream
														
 
															-        self.stream.write(data)
														
 
															+        self.stream.write(data.decode('utf-8'))
														
 
															         # empty queue
														
 
															         self.queue.truncate(0)
														
 
															 def prettify_url(url, max_length=74):
														
 
															     if len(url) > max_length:
														
 
															-        chunk_len = max_length / 2 + 1
														
 
															+        chunk_len = int(max_length / 2 + 1)
														
 
															         return u'{0}[...]{1}'.format(url[:chunk_len], url[-chunk_len:])
														
 
															     else:
														
 
															         return url
														
--- a/searx/webapp.py
+++ b/searx/webapp.py
 
															     from os.path import realpath, dirname
														
 
															     path.append(realpath(dirname(realpath(__file__)) + '/../'))
														
 
															-import cStringIO
														
 
															 import hashlib
														
 
															 import hmac
														
 
															 import json
														
 
															 import os
														
 
															+import sys
														
 
															+
														
 
															 import requests
														
 
															 from searx import logger
														
 
															     exit(1)
														
 
															 from cgi import escape
														
 
															 from datetime import datetime, timedelta
														
 
															-from urllib import urlencode
														
 
															-from urlparse import urlparse, urljoin
														
 
															 from werkzeug.contrib.fixers import ProxyFix
														
 
															 from flask import (
														
 
															     Flask, request, render_template, url_for, Response, make_response,
														
 
															 from flask_babel import Babel, gettext, format_date, format_decimal
														
 
															 from flask.json import jsonify
														
 
															 from searx import settings, searx_dir, searx_debug
														
 
															-from searx.exceptions import SearxException, SearxParameterException
														
 
															+from searx.exceptions import SearxParameterException
														
 
															 from searx.engines import (
														
 
															     categories, engines, engine_shortcuts, get_engines_stats, initialize_engines
														
 
															 )
														
 
															 from searx.plugins import plugins
														
 
															 from searx.preferences import Preferences, ValidationException
														
 
															 from searx.answerers import answerers
														
 
															+from searx.url_utils import urlencode, urlparse, urljoin
														
 
															 # check if the pyopenssl package is installed.
														
 
															 # It is needed for SSL connection without trouble, see #298
														
 
															     logger.critical("The pyopenssl package has to be installed.\n"
														
 
															                     "Some HTTPS connections will fail")
														
 
															+try:
														
 
															+    from cStringIO import StringIO
														
 
															+except:
														
 
															+    from io import StringIO
														
 
															+
														
 
															+
														
 
															+if sys.version_info[0] == 3:
														
 
															+    unicode = str
														
 
															+
														
 
															 # serve pages with HTTP/1.1
														
 
															 from werkzeug.serving import WSGIRequestHandler
														
 
															 WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0'))
														
 
															     kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab')
														
 
															+    kwargs['unicode'] = unicode
														
 
															+
														
 
															     kwargs['scripts'] = set()
														
 
															     for plugin in request.user_plugins:
														
 
															         for script in plugin.js_dependencies:
														
 
															 def pre_request():
														
 
															     request.errors = []
														
 
															-    preferences = Preferences(themes, categories.keys(), engines, plugins)
														
 
															+    preferences = Preferences(themes, list(categories.keys()), engines, plugins)
														
 
															     request.preferences = preferences
														
 
															     try:
														
 
															         preferences.parse_cookies(request.cookies)
														
 
															     for result in results:
														
 
															         if output_format == 'html':
														
 
															             if 'content' in result and result['content']:
														
 
															-                result['content'] = highlight_content(escape(result['content'][:1024]),
														
 
															-                                                      search_query.query.encode('utf-8'))
														
 
															-            result['title'] = highlight_content(escape(result['title'] or u''),
														
 
															-                                                search_query.query.encode('utf-8'))
														
 
															+                result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query)
														
 
															+            result['title'] = highlight_content(escape(result['title'] or u''), search_query.query)
														
 
															         else:
														
 
															             if result.get('content'):
														
 
															                 result['content'] = html_to_text(result['content']).strip()
														
 
															                     result['publishedDate'] = format_date(result['publishedDate'])
														
 
															     if output_format == 'json':
														
 
															-        return Response(json.dumps({'query': search_query.query,
														
 
															+        return Response(json.dumps({'query': search_query.query.decode('utf-8'),
														
 
															                                     'number_of_results': number_of_results,
														
 
															                                     'results': results,
														
 
															                                     'answers': list(result_container.answers),
														
 
															                                     'suggestions': list(result_container.suggestions)}),
														
 
															                         mimetype='application/json')
														
 
															     elif output_format == 'csv':
														
 
															-        csv = UnicodeWriter(cStringIO.StringIO())
														
 
															+        csv = UnicodeWriter(StringIO())
														
 
															         keys = ('title', 'url', 'content', 'host', 'engine', 'score')
														
 
															         csv.writerow(keys)
														
 
															         for row in results:
														
 
															             csv.writerow([row.get(key, '') for key in keys])
														
 
															         csv.stream.seek(0)
														
 
															         response = Response(csv.stream.read(), mimetype='application/csv')
														
 
															-        cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.encode('utf-8'))
														
 
															+        cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query)
														
 
															         response.headers.add('Content-Disposition', cont_disp)
														
 
															         return response
														
 
															     elif output_format == 'rss':
														
 
															     disabled_engines = request.preferences.engines.get_disabled()
														
 
															     # parse query
														
 
															-    raw_text_query = RawTextQuery(request.form.get('q', '').encode('utf-8'), disabled_engines)
														
 
															+    raw_text_query = RawTextQuery(request.form.get('q', u'').encode('utf-8'), disabled_engines)
														
 
															     raw_text_query.parse_query()
														
 
															     # check if search query is set
														
 
															 def run():
														
 
															+    logger.debug('starting webserver on %s:%s', settings['server']['port'], settings['server']['bind_address'])
														
 
															     app.run(
														
 
															         debug=searx_debug,
														
 
															         use_debugger=searx_debug,
														
--- a/tests/robot/__init__.py
+++ b/tests/robot/__init__.py
 
															+# -*- coding: utf-8 -*-
														
 
															+
														
 
															+from time import sleep
														
 
															+
														
 
															+url = "http://localhost:11111/"
														
 
															+
														
 
															+
														
 
															+def test_index(browser):
														
 
															+    # Visit URL
														
 
															+    browser.visit(url)
														
 
															+    assert browser.is_text_present('about')
														
 
															+
														
 
															+
														
 
															+def test_404(browser):
														
 
															+    # Visit URL
														
 
															+    browser.visit(url + 'missing_link')
														
 
															+    assert browser.is_text_present('Page not found')
														
 
															+
														
 
															+
														
 
															+def test_about(browser):
														
 
															+    browser.visit(url)
														
 
															+    browser.click_link_by_text('about')
														
 
															+    assert browser.is_text_present('Why use searx?')
														
 
															+
														
 
															+
														
 
															+def test_preferences(browser):
														
 
															+    browser.visit(url)
														
 
															+    browser.click_link_by_text('preferences')
														
 
															+    assert browser.is_text_present('Preferences')
														
 
															+    assert browser.is_text_present('Cookies')
														
 
															+
														
 
															+    assert browser.is_element_present_by_xpath('//label[@for="checkbox_dummy"]')
														
 
															+
														
 
															+
														
 
															+def test_preferences_engine_select(browser):
														
 
															+    browser.visit(url)
														
 
															+    browser.click_link_by_text('preferences')
														
 
															+
														
 
															+    assert browser.is_element_present_by_xpath('//a[@href="#tab_engine"]')
														
 
															+    browser.find_by_xpath('//a[@href="#tab_engine"]').first.click()
														
 
															+
														
 
															+    assert not browser.find_by_xpath('//input[@id="engine_general_dummy__general"]').first.checked
														
 
															+    browser.find_by_xpath('//label[@for="engine_general_dummy__general"]').first.check()
														
 
															+    browser.find_by_xpath('//input[@value="save"]').first.click()
														
 
															+
														
 
															+    # waiting for the redirect - without this the test is flaky..
														
 
															+    sleep(1)
														
 
															+
														
 
															+    browser.visit(url)
														
 
															+    browser.click_link_by_text('preferences')
														
 
															+    browser.find_by_xpath('//a[@href="#tab_engine"]').first.click()
														
 
															+
														
 
															+    assert browser.find_by_xpath('//input[@id="engine_general_dummy__general"]').first.checked
														
 
															+
														
 
															+
														
 
															+def test_preferences_locale(browser):
														
 
															+    browser.visit(url)
														
 
															+    browser.click_link_by_text('preferences')
														
 
															+
														
 
															+    browser.select('locale', 'hu')
														
 
															+    browser.find_by_xpath('//input[@value="save"]').first.click()
														
 
															+
														
 
															+    # waiting for the redirect - without this the test is flaky..
														
 
															+    sleep(1)
														
 
															+
														
 
															+    browser.visit(url)
														
 
															+    browser.click_link_by_text('beállítások')
														
 
															+    browser.is_text_present('Beállítások')
														
 
															+
														
 
															+
														
 
															+def test_search(browser):
														
 
															+    browser.visit(url)
														
 
															+    browser.fill('q', 'test search query')
														
 
															+    browser.find_by_xpath('//button[@type="submit"]').first.click()
														
 
															+    assert browser.is_text_present('didn\'t find any results')
														
--- a/tests/robot/test_basic.robot
+++ b/tests/robot/test_basic.robot
 
															-*** Settings ***
														
 
															-Library         Selenium2Library  timeout=10  implicit_wait=0.5
														
 
															-Test Setup      Open Browser  http://localhost:11111/
														
 
															-Test Teardown   Close All Browsers
														
 
															-
														
 
															-
														
 
															-*** Keywords ***
														
 
															-Submit Preferences
														
 
															-    Set Selenium Speed  2 seconds
														
 
															-    Submit Form  id=search_form
														
 
															-    Location Should Be  http://localhost:11111/
														
 
															-    Set Selenium Speed  0 seconds
														
 
															-
														
 
															-
														
 
															-*** Test Cases ***
														
 
															-Front page
														
 
															-    Page Should Contain  about
														
 
															-    Page Should Contain  preferences
														
 
															-
														
 
															-404 page
														
 
															-    Go To  http://localhost:11111/no-such-page
														
 
															-    Page Should Contain  Page not found
														
 
															-    Page Should Contain  Go to search page
														
 
															-
														
 
															-About page
														
 
															-    Click Element  link=about
														
 
															-    Page Should Contain  Why use searx?
														
 
															-    Page Should Contain Element  link=search engines
														
 
															-
														
 
															-Preferences page
														
 
															-    Click Element  link=preferences
														
 
															-    Page Should Contain  Preferences
														
 
															-    Page Should Contain  Default categories
														
 
															-    Page Should Contain  Currently used search engines
														
 
															-    Page Should Contain  dummy dummy
														
 
															-    Page Should Contain  general dummy
														
 
															-
														
 
															-Switch category
														
 
															-    Go To  http://localhost:11111/preferences
														
 
															-    Page Should Contain Checkbox  category_general
														
 
															-    Page Should Contain Checkbox  category_dummy
														
 
															-    Click Element  xpath=//*[.="general"]
														
 
															-    Click Element  xpath=//*[.="dummy"]
														
 
															-    Submit Preferences
														
 
															-    Checkbox Should Not Be Selected  category_general
														
 
															-    Checkbox Should Be Selected  category_dummy
														
 
															-
														
 
															-Change language
														
 
															-    Page Should Contain  about
														
 
															-    Page Should Contain  preferences
														
 
															-    Go To  http://localhost:11111/preferences
														
 
															-    Select From List  locale  hu
														
 
															-    Submit Preferences
														
 
															-    Page Should Contain  rólunk
														
 
															-    Page Should Contain  beállítások
														
 
															-
														
 
															-Change method
														
 
															-    Page Should Contain  about
														
 
															-    Page Should Contain  preferences
														
 
															-    Go To  http://localhost:11111/preferences
														
 
															-    Select From List  method  GET
														
 
															-    Submit Preferences
														
 
															-    Go To  http://localhost:11111/preferences
														
 
															-    List Selection Should Be  method  GET
														
 
															-    Select From List  method  POST
														
 
															-    Submit Preferences
														
 
															-    Go To  http://localhost:11111/preferences
														
 
															-    List Selection Should Be  method  POST
														
 
															-
														
 
															-Change theme
														
 
															-    Page Should Contain  about
														
 
															-    Page Should Contain  preferences
														
 
															-    Go To  http://localhost:11111/preferences
														
 
															-    List Selection Should Be  theme  legacy
														
 
															-    Select From List  theme  oscar
														
 
															-    Submit Preferences
														
 
															-    Go To  http://localhost:11111/preferences
														
 
															-    List Selection Should Be  theme  oscar
														
 
															-
														
 
															-Change safesearch
														
 
															-    Page Should Contain  about
														
 
															-    Page Should Contain  preferences
														
 
															-    Go To  http://localhost:11111/preferences
														
 
															-    List Selection Should Be  safesearch  None
														
 
															-    Select From List  safesearch  Strict
														
 
															-    Submit Preferences
														
 
															-    Go To  http://localhost:11111/preferences
														
 
															-    List Selection Should Be  safesearch  Strict
														
 
															-
														
 
															-Change image proxy
														
 
															-    Page Should Contain  about
														
 
															-    Page Should Contain  preferences
														
 
															-    Go To  http://localhost:11111/preferences
														
 
															-    List Selection Should Be  image_proxy  Disabled
														
 
															-    Select From List  image_proxy  Enabled
														
 
															-    Submit Preferences
														
 
															-    Go To  http://localhost:11111/preferences
														
 
															-    List Selection Should Be  image_proxy  Enabled
														
 
															-
														
 
															-Change search language
														
 
															-    Page Should Contain  about
														
 
															-    Page Should Contain  preferences
														
 
															-    Go To  http://localhost:11111/preferences
														
 
															-    List Selection Should Be  language  Default language
														
 
															-    Select From List  language  Türkçe - tr-TR
														
 
															-    Submit Preferences
														
 
															-    Go To  http://localhost:11111/preferences
														
 
															-    List Selection Should Be  language  Türkçe - tr-TR
														
 
															-
														
 
															-Change autocomplete
														
 
															-    Page Should Contain  about
														
 
															-    Page Should Contain  preferences
														
 
															-    Go To  http://localhost:11111/preferences
														
 
															-    List Selection Should Be  autocomplete  -
														
 
															-    Select From List  autocomplete  google
														
 
															-    Submit Preferences
														
 
															-    Go To  http://localhost:11111/preferences
														
 
															-    List Selection Should Be  autocomplete  google
														
 
															-
														
 
															-Change allowed/disabled engines
														
 
															-    Page Should Contain  about
														
 
															-    Page Should Contain  preferences
														
 
															-    Go To  http://localhost:11111/preferences
														
 
															-    Page Should Contain  Engine name
														
 
															-    Element Should Contain  xpath=//label[@class="deny"][@for='engine_dummy_dummy_dummy']  Block
														
 
															-    Element Should Contain  xpath=//label[@class="deny"][@for='engine_general_general_dummy']  Block
														
 
															-    Click Element  xpath=//label[@class="deny"][@for='engine_general_general_dummy']
														
 
															-    Submit Preferences
														
 
															-    Page Should Contain  about
														
 
															-    Page Should Contain  preferences
														
 
															-    Go To  http://localhost:11111/preferences
														
 
															-    Page Should Contain  Engine name
														
 
															-    Element Should Contain  xpath=//label[@class="deny"][@for='engine_dummy_dummy_dummy']  Block
														
 
															-    Element Should Contain  xpath=//label[@class="deny"][@for='engine_general_general_dummy']  \
														
 
															-
														
 
															-Block a plugin
														
 
															-    Page Should Contain  about
														
 
															-    Page Should Contain  preferences
														
 
															-    Go To  http://localhost:11111/preferences
														
 
															-    List Selection Should Be  theme  legacy
														
 
															-    Select From List  theme  oscar
														
 
															-    Submit Preferences
														
 
															-    Go To  http://localhost:11111/preferences
														
 
															-    List Selection Should Be  theme  oscar
														
 
															-    Page Should Contain  Plugins
														
 
															-    Click Link  Plugins
														
 
															-    Checkbox Should Not Be Selected  id=plugin_HTTPS_rewrite
														
 
															-    Click Element  xpath=//label[@for='plugin_HTTPS_rewrite']
														
 
															-    Submit Preferences
														
 
															-    Go To  http://localhost:11111/preferences
														
 
															-    Page Should Contain  Plugins
														
 
															-    Click Link  Plugins
														
 
															-    Checkbox Should Be Selected  id=plugin_HTTPS_rewrite
														
--- a/tests/unit/engines/test_archlinux.py
+++ b/tests/unit/engines/test_archlinux.py
 
															         self.assertTrue(query in params['url'])
														
 
															         self.assertTrue('wiki.archlinux.org' in params['url'])
														
 
															-        for lang, domain in domains.iteritems():
														
 
															+        for lang, domain in domains.items():
														
 
															             dic['language'] = lang
														
 
															             params = archlinux.request(query, dic)
														
 
															             self.assertTrue(domain in params['url'])
														
 
															         for exp in expected:
														
 
															             res = results[i]
														
 
															             i += 1
														
 
															-            for key, value in exp.iteritems():
														
 
															+            for key, value in exp.items():
														
 
															                 self.assertEqual(res[key], value)
														
--- a/tests/unit/engines/test_bing.py
+++ b/tests/unit/engines/test_bing.py
 
															 class TestBingEngine(SearxTestCase):
														
 
															     def test_request(self):
														
 
															-        query = 'test_query'
														
 
															+        query = u'test_query'
														
 
															         dicto = defaultdict(dict)
														
 
															         dicto['pageno'] = 0
														
 
															         dicto['language'] = 'fr_FR'
														
 
															-        params = bing.request(query, dicto)
														
 
															+        params = bing.request(query.encode('utf-8'), dicto)
														
 
															         self.assertTrue('url' in params)
														
 
															         self.assertTrue(query in params['url'])
														
 
															         self.assertTrue('language%3AFR' in params['url'])
														
 
															         self.assertTrue('bing.com' in params['url'])
														
 
															         dicto['language'] = 'all'
														
 
															-        params = bing.request(query, dicto)
														
 
															+        params = bing.request(query.encode('utf-8'), dicto)
														
 
															         self.assertTrue('language' in params['url'])
														
 
															     def test_response(self):
														
--- a/tests/unit/engines/test_bing_news.py
+++ b/tests/unit/engines/test_bing_news.py
 
															         self.assertRaises(AttributeError, bing_news.response, '')
														
 
															         self.assertRaises(AttributeError, bing_news.response, '[]')
														
 
															-        response = mock.Mock(content='<html></html>')
														
 
															+        response = mock.Mock(text='<html></html>')
														
 
															         self.assertEqual(bing_news.response(response), [])
														
 
															-        response = mock.Mock(content='<html></html>')
														
 
															+        response = mock.Mock(text='<html></html>')
														
 
															         self.assertEqual(bing_news.response(response), [])
														
 
															         html = """<?xml version="1.0" encoding="utf-8" ?>
														
 
															         </item>
														
 
															     </channel>
														
 
															 </rss>"""  # noqa
														
 
															-        response = mock.Mock(content=html)
														
 
															+        response = mock.Mock(text=html.encode('utf-8'))
														
 
															         results = bing_news.response(response)
														
 
															         self.assertEqual(type(results), list)
														
 
															         self.assertEqual(len(results), 2)
														
 
															         </item>
														
 
															     </channel>
														
 
															 </rss>"""  # noqa
														
 
															-        response = mock.Mock(content=html)
														
 
															+        response = mock.Mock(text=html.encode('utf-8'))
														
 
															         results = bing_news.response(response)
														
 
															         self.assertEqual(type(results), list)
														
 
															         self.assertEqual(len(results), 1)
														
 
															     </channel>
														
 
															 </rss>"""  # noqa
														
 
															-        response = mock.Mock(content=html)
														
 
															+        response = mock.Mock(text=html.encode('utf-8'))
														
 
															         results = bing_news.response(response)
														
 
															         self.assertEqual(type(results), list)
														
 
															         self.assertEqual(len(results), 0)
														
 
															         html = """<?xml version="1.0" encoding="utf-8" ?>gabarge"""
														
 
															-        response = mock.Mock(content=html)
														
 
															+        response = mock.Mock(text=html.encode('utf-8'))
														
 
															         self.assertRaises(lxml.etree.XMLSyntaxError, bing_news.response, response)
														
--- a/tests/unit/engines/test_btdigg.py
+++ b/tests/unit/engines/test_btdigg.py