瀏覽代碼

Merge pull request #913 from asciimoo/py3

Add Python3 compatibility
Adam Tauber 8 年之前
父節點
當前提交
4cffd78650
共有 100 個檔案被更改,包括 442 行新增440 行删除
  1. 3
    2
      .travis.yml
  2. 1
    2
      requirements-dev.txt
  3. 8
    4
      searx/answerers/__init__.py
  4. 9
    4
      searx/answerers/random/answerer.py
  5. 10
    6
      searx/answerers/statistics/answerer.py
  6. 5
    1
      searx/autocomplete.py
  7. 1
    2
      searx/engines/1337x.py
  8. 2
    3
      searx/engines/__init__.py
  9. 1
    2
      searx/engines/archlinux.py
  10. 3
    3
      searx/engines/base.py
  11. 1
    1
      searx/engines/bing.py
  12. 1
    1
      searx/engines/bing_images.py
  13. 2
    3
      searx/engines/bing_news.py
  14. 1
    1
      searx/engines/blekko_images.py
  15. 2
    3
      searx/engines/btdigg.py
  16. 9
    5
      searx/engines/currency_convert.py
  17. 1
    2
      searx/engines/dailymotion.py
  18. 2
    3
      searx/engines/deezer.py
  19. 1
    1
      searx/engines/deviantart.py
  20. 3
    3
      searx/engines/dictzone.py
  21. 6
    2
      searx/engines/digbt.py
  22. 2
    2
      searx/engines/digg.py
  23. 1
    1
      searx/engines/doku.py
  24. 1
    1
      searx/engines/duckduckgo.py
  25. 3
    3
      searx/engines/duckduckgo_definitions.py
  26. 1
    1
      searx/engines/faroo.py
  27. 3
    4
      searx/engines/fdroid.py
  28. 7
    4
      searx/engines/filecrop.py
  29. 1
    1
      searx/engines/flickr.py
  30. 1
    1
      searx/engines/flickr_noapi.py
  31. 1
    3
      searx/engines/framalibre.py
  32. 1
    1
      searx/engines/frinkiac.py
  33. 1
    2
      searx/engines/gigablast.py
  34. 1
    1
      searx/engines/github.py
  35. 2
    3
      searx/engines/google.py
  36. 1
    1
      searx/engines/google_images.py
  37. 1
    2
      searx/engines/google_news.py
  38. 7
    3
      searx/engines/ina.py
  39. 8
    3
      searx/engines/json_engine.py
  40. 1
    2
      searx/engines/kickass.py
  41. 1
    1
      searx/engines/mediawiki.py
  42. 1
    1
      searx/engines/mixcloud.py
  43. 1
    1
      searx/engines/nyaa.py
  44. 0
    4
      searx/engines/openstreetmap.py
  45. 1
    1
      searx/engines/photon.py
  46. 1
    2
      searx/engines/piratebay.py
  47. 1
    2
      searx/engines/qwant.py
  48. 2
    4
      searx/engines/reddit.py
  49. 1
    3
      searx/engines/scanr_structures.py
  50. 2
    3
      searx/engines/searchcode_code.py
  51. 2
    3
      searx/engines/searchcode_doc.py
  52. 1
    3
      searx/engines/seedpeer.py
  53. 12
    7
      searx/engines/soundcloud.py
  54. 2
    3
      searx/engines/spotify.py
  55. 2
    4
      searx/engines/stackoverflow.py
  56. 1
    1
      searx/engines/startpage.py
  57. 1
    1
      searx/engines/subtitleseeker.py
  58. 13
    14
      searx/engines/swisscows.py
  59. 5
    6
      searx/engines/tokyotoshokan.py
  60. 4
    4
      searx/engines/torrentz.py
  61. 4
    0
      searx/engines/translated.py
  62. 1
    2
      searx/engines/twitter.py
  63. 1
    1
      searx/engines/vimeo.py
  64. 5
    8
      searx/engines/wikidata.py
  65. 9
    12
      searx/engines/wikipedia.py
  66. 6
    7
      searx/engines/wolframalpha_api.py
  67. 4
    5
      searx/engines/wolframalpha_noapi.py
  68. 2
    4
      searx/engines/www1x.py
  69. 1
    2
      searx/engines/www500px.py
  70. 2
    2
      searx/engines/xpath.py
  71. 1
    1
      searx/engines/yacy.py
  72. 1
    2
      searx/engines/yahoo.py
  73. 3
    3
      searx/engines/yahoo_news.py
  74. 2
    2
      searx/engines/yandex.py
  75. 1
    1
      searx/engines/youtube_api.py
  76. 1
    1
      searx/engines/youtube_noapi.py
  77. 4
    1
      searx/plugins/__init__.py
  78. 1
    1
      searx/plugins/doai_rewrite.py
  79. 4
    1
      searx/plugins/https_rewrite.py
  80. 2
    2
      searx/plugins/self_info.py
  81. 1
    1
      searx/plugins/tracker_url_remover.py
  82. 9
    9
      searx/preferences.py
  83. 6
    2
      searx/query.py
  84. 5
    1
      searx/results.py
  85. 10
    2
      searx/search.py
  86. 1
    1
      searx/settings_robot.yml
  87. 1
    1
      searx/templates/courgette/404.html
  88. 1
    1
      searx/templates/legacy/404.html
  89. 1
    1
      searx/templates/oscar/404.html
  90. 1
    1
      searx/templates/pix-art/404.html
  91. 26
    16
      searx/testing.py
  92. 28
    0
      searx/url_utils.py
  93. 18
    8
      searx/utils.py
  94. 23
    13
      searx/webapp.py
  95. 75
    0
      tests/robot/__init__.py
  96. 0
    153
      tests/robot/test_basic.robot
  97. 2
    2
      tests/unit/engines/test_archlinux.py
  98. 3
    3
      tests/unit/engines/test_bing.py
  99. 6
    6
      tests/unit/engines/test_bing_news.py
  100. 0
    0
      tests/unit/engines/test_btdigg.py

+ 3
- 2
.travis.yml 查看文件

9
 language: python
9
 language: python
10
 python:
10
 python:
11
   - "2.7"
11
   - "2.7"
12
+  - "3.6"
12
 before_install:
13
 before_install:
13
   - "export DISPLAY=:99.0"
14
   - "export DISPLAY=:99.0"
14
   - "sh -e /etc/init.d/xvfb start"
15
   - "sh -e /etc/init.d/xvfb start"
24
   - ./manage.sh styles
25
   - ./manage.sh styles
25
   - ./manage.sh grunt_build
26
   - ./manage.sh grunt_build
26
   - ./manage.sh tests
27
   - ./manage.sh tests
27
-  - ./manage.sh py_test_coverage
28
 after_success:
28
 after_success:
29
-  coveralls
29
+  - ./manage.sh py_test_coverage
30
+  - coveralls
30
 notifications:
31
 notifications:
31
   irc:
32
   irc:
32
     channels:
33
     channels:

+ 1
- 2
requirements-dev.txt 查看文件

3
 nose2[coverage-plugin]
3
 nose2[coverage-plugin]
4
 pep8==1.7.0
4
 pep8==1.7.0
5
 plone.testing==5.0.0
5
 plone.testing==5.0.0
6
-robotframework-selenium2library==1.8.0
7
-robotsuite==1.7.0
6
+splinter==0.7.5
8
 transifex-client==0.12.2
7
 transifex-client==0.12.2
9
 unittest2==1.1.0
8
 unittest2==1.1.0
10
 zope.testrunner==4.5.1
9
 zope.testrunner==4.5.1

+ 8
- 4
searx/answerers/__init__.py 查看文件

1
 from os import listdir
1
 from os import listdir
2
 from os.path import realpath, dirname, join, isdir
2
 from os.path import realpath, dirname, join, isdir
3
+from sys import version_info
3
 from searx.utils import load_module
4
 from searx.utils import load_module
4
 from collections import defaultdict
5
 from collections import defaultdict
5
 
6
 
7
+if version_info[0] == 3:
8
+    unicode = str
9
+
6
 
10
 
7
 answerers_dir = dirname(realpath(__file__))
11
 answerers_dir = dirname(realpath(__file__))
8
 
12
 
10
 def load_answerers():
14
 def load_answerers():
11
     answerers = []
15
     answerers = []
12
     for filename in listdir(answerers_dir):
16
     for filename in listdir(answerers_dir):
13
-        if not isdir(join(answerers_dir, filename)):
17
+        if not isdir(join(answerers_dir, filename)) or filename.startswith('_'):
14
             continue
18
             continue
15
         module = load_module('answerer.py', join(answerers_dir, filename))
19
         module = load_module('answerer.py', join(answerers_dir, filename))
16
         if not hasattr(module, 'keywords') or not isinstance(module.keywords, tuple) or not len(module.keywords):
20
         if not hasattr(module, 'keywords') or not isinstance(module.keywords, tuple) or not len(module.keywords):
30
 
34
 
31
 def ask(query):
35
 def ask(query):
32
     results = []
36
     results = []
33
-    query_parts = filter(None, query.query.split())
37
+    query_parts = list(filter(None, query.query.split()))
34
 
38
 
35
-    if query_parts[0] not in answerers_by_keywords:
39
+    if query_parts[0].decode('utf-8') not in answerers_by_keywords:
36
         return results
40
         return results
37
 
41
 
38
-    for answerer in answerers_by_keywords[query_parts[0]]:
42
+    for answerer in answerers_by_keywords[query_parts[0].decode('utf-8')]:
39
         result = answerer(query)
43
         result = answerer(query)
40
         if result:
44
         if result:
41
             results.append(result)
45
             results.append(result)

+ 9
- 4
searx/answerers/random/answerer.py 查看文件

1
 import random
1
 import random
2
 import string
2
 import string
3
+import sys
3
 from flask_babel import gettext
4
 from flask_babel import gettext
4
 
5
 
5
 # required answerer attribute
6
 # required answerer attribute
8
 
9
 
9
 random_int_max = 2**31
10
 random_int_max = 2**31
10
 
11
 
11
-random_string_letters = string.lowercase + string.digits + string.uppercase
12
+if sys.version_info[0] == 2:
13
+    random_string_letters = string.lowercase + string.digits + string.uppercase
14
+else:
15
+    unicode = str
16
+    random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
12
 
17
 
13
 
18
 
14
 def random_string():
19
 def random_string():
24
     return unicode(random.randint(-random_int_max, random_int_max))
29
     return unicode(random.randint(-random_int_max, random_int_max))
25
 
30
 
26
 
31
 
27
-random_types = {u'string': random_string,
28
-                u'int': random_int,
29
-                u'float': random_float}
32
+random_types = {b'string': random_string,
33
+                b'int': random_int,
34
+                b'float': random_float}
30
 
35
 
31
 
36
 
32
 # required answerer function
37
 # required answerer function

+ 10
- 6
searx/answerers/statistics/answerer.py 查看文件

1
+from sys import version_info
1
 from functools import reduce
2
 from functools import reduce
2
 from operator import mul
3
 from operator import mul
3
 
4
 
4
 from flask_babel import gettext
5
 from flask_babel import gettext
5
 
6
 
7
+if version_info[0] == 3:
8
+    unicode = str
9
+
6
 keywords = ('min',
10
 keywords = ('min',
7
             'max',
11
             'max',
8
             'avg',
12
             'avg',
19
         return []
23
         return []
20
 
24
 
21
     try:
25
     try:
22
-        args = map(float, parts[1:])
26
+        args = list(map(float, parts[1:]))
23
     except:
27
     except:
24
         return []
28
         return []
25
 
29
 
26
     func = parts[0]
30
     func = parts[0]
27
     answer = None
31
     answer = None
28
 
32
 
29
-    if func == 'min':
33
+    if func == b'min':
30
         answer = min(args)
34
         answer = min(args)
31
-    elif func == 'max':
35
+    elif func == b'max':
32
         answer = max(args)
36
         answer = max(args)
33
-    elif func == 'avg':
37
+    elif func == b'avg':
34
         answer = sum(args) / len(args)
38
         answer = sum(args) / len(args)
35
-    elif func == 'sum':
39
+    elif func == b'sum':
36
         answer = sum(args)
40
         answer = sum(args)
37
-    elif func == 'prod':
41
+    elif func == b'prod':
38
         answer = reduce(mul, args, 1)
42
         answer = reduce(mul, args, 1)
39
 
43
 
40
     if answer is None:
44
     if answer is None:

+ 5
- 1
searx/autocomplete.py 查看文件

18
 
18
 
19
 from lxml import etree
19
 from lxml import etree
20
 from json import loads
20
 from json import loads
21
-from urllib import urlencode
22
 from searx import settings
21
 from searx import settings
23
 from searx.languages import language_codes
22
 from searx.languages import language_codes
24
 from searx.engines import (
23
 from searx.engines import (
26
 )
25
 )
27
 from searx.poolrequests import get as http_get
26
 from searx.poolrequests import get as http_get
28
 
27
 
28
+try:
29
+    from urllib import urlencode
30
+except:
31
+    from urllib.parse import urlencode
32
+
29
 
33
 
30
 def get(*args, **kwargs):
34
 def get(*args, **kwargs):
31
     if 'timeout' not in kwargs:
35
     if 'timeout' not in kwargs:

+ 1
- 2
searx/engines/1337x.py 查看文件

1
-from urllib import quote
2
 from lxml import html
1
 from lxml import html
3
 from searx.engines.xpath import extract_text
2
 from searx.engines.xpath import extract_text
4
 from searx.utils import get_torrent_size
3
 from searx.utils import get_torrent_size
5
-from urlparse import urljoin
4
+from searx.url_utils import quote, urljoin
6
 
5
 
7
 url = 'https://1337x.to/'
6
 url = 'https://1337x.to/'
8
 search_url = url + 'search/{search_term}/{pageno}/'
7
 search_url = url + 'search/{search_term}/{pageno}/'

+ 2
- 3
searx/engines/__init__.py 查看文件

72
             if engine_data['categories'] == 'none':
72
             if engine_data['categories'] == 'none':
73
                 engine.categories = []
73
                 engine.categories = []
74
             else:
74
             else:
75
-                engine.categories = map(
76
-                    str.strip, engine_data['categories'].split(','))
75
+                engine.categories = list(map(str.strip, engine_data['categories'].split(',')))
77
             continue
76
             continue
78
         setattr(engine, param_name, engine_data[param_name])
77
         setattr(engine, param_name, engine_data[param_name])
79
 
78
 
80
-    for arg_name, arg_value in engine_default_args.iteritems():
79
+    for arg_name, arg_value in engine_default_args.items():
81
         if not hasattr(engine, arg_name):
80
         if not hasattr(engine, arg_name):
82
             setattr(engine, arg_name, arg_value)
81
             setattr(engine, arg_name, arg_value)
83
 
82
 

+ 1
- 2
searx/engines/archlinux.py 查看文件

11
  @parse        url, title
11
  @parse        url, title
12
 """
12
 """
13
 
13
 
14
-from urlparse import urljoin
15
-from urllib import urlencode
16
 from lxml import html
14
 from lxml import html
17
 from searx.engines.xpath import extract_text
15
 from searx.engines.xpath import extract_text
16
+from searx.url_utils import urlencode, urljoin
18
 
17
 
19
 # engine dependent config
18
 # engine dependent config
20
 categories = ['it']
19
 categories = ['it']

+ 3
- 3
searx/engines/base.py 查看文件

14
 """
14
 """
15
 
15
 
16
 from lxml import etree
16
 from lxml import etree
17
-from urllib import urlencode
18
-from searx.utils import searx_useragent
19
 from datetime import datetime
17
 from datetime import datetime
20
 import re
18
 import re
19
+from searx.url_utils import urlencode
20
+from searx.utils import searx_useragent
21
 
21
 
22
 
22
 
23
 categories = ['science']
23
 categories = ['science']
73
 def response(resp):
73
 def response(resp):
74
     results = []
74
     results = []
75
 
75
 
76
-    search_results = etree.XML(resp.content)
76
+    search_results = etree.XML(resp.text)
77
 
77
 
78
     for entry in search_results.xpath('./result/doc'):
78
     for entry in search_results.xpath('./result/doc'):
79
         content = "No description available"
79
         content = "No description available"

+ 1
- 1
searx/engines/bing.py 查看文件

13
  @todo        publishedDate
13
  @todo        publishedDate
14
 """
14
 """
15
 
15
 
16
-from urllib import urlencode
17
 from lxml import html
16
 from lxml import html
18
 from searx.engines.xpath import extract_text
17
 from searx.engines.xpath import extract_text
18
+from searx.url_utils import urlencode
19
 
19
 
20
 # engine dependent config
20
 # engine dependent config
21
 categories = ['general']
21
 categories = ['general']

+ 1
- 1
searx/engines/bing_images.py 查看文件

15
               limited response to 10 images
15
               limited response to 10 images
16
 """
16
 """
17
 
17
 
18
-from urllib import urlencode
19
 from lxml import html
18
 from lxml import html
20
 from json import loads
19
 from json import loads
21
 import re
20
 import re
22
 from searx.engines.bing import _fetch_supported_languages, supported_languages_url
21
 from searx.engines.bing import _fetch_supported_languages, supported_languages_url
22
+from searx.url_utils import urlencode
23
 
23
 
24
 # engine dependent config
24
 # engine dependent config
25
 categories = ['images']
25
 categories = ['images']

+ 2
- 3
searx/engines/bing_news.py 查看文件

11
  @parse       url, title, content, publishedDate, thumbnail
11
  @parse       url, title, content, publishedDate, thumbnail
12
 """
12
 """
13
 
13
 
14
-from urllib import urlencode
15
-from urlparse import urlparse, parse_qsl
16
 from datetime import datetime
14
 from datetime import datetime
17
 from dateutil import parser
15
 from dateutil import parser
18
 from lxml import etree
16
 from lxml import etree
19
 from searx.utils import list_get
17
 from searx.utils import list_get
20
 from searx.engines.bing import _fetch_supported_languages, supported_languages_url
18
 from searx.engines.bing import _fetch_supported_languages, supported_languages_url
19
+from searx.url_utils import urlencode, urlparse, parse_qsl
21
 
20
 
22
 # engine dependent config
21
 # engine dependent config
23
 categories = ['news']
22
 categories = ['news']
86
 def response(resp):
85
 def response(resp):
87
     results = []
86
     results = []
88
 
87
 
89
-    rss = etree.fromstring(resp.content)
88
+    rss = etree.fromstring(resp.text)
90
 
89
 
91
     ns = rss.nsmap
90
     ns = rss.nsmap
92
 
91
 

+ 1
- 1
searx/engines/blekko_images.py 查看文件

11
 """
11
 """
12
 
12
 
13
 from json import loads
13
 from json import loads
14
-from urllib import urlencode
14
+from searx.url_utils import urlencode
15
 
15
 
16
 # engine dependent config
16
 # engine dependent config
17
 categories = ['images']
17
 categories = ['images']

+ 2
- 3
searx/engines/btdigg.py 查看文件

10
  @parse       url, title, content, seed, leech, magnetlink
10
  @parse       url, title, content, seed, leech, magnetlink
11
 """
11
 """
12
 
12
 
13
-from urlparse import urljoin
14
-from urllib import quote
15
 from lxml import html
13
 from lxml import html
16
 from operator import itemgetter
14
 from operator import itemgetter
17
 from searx.engines.xpath import extract_text
15
 from searx.engines.xpath import extract_text
16
+from searx.url_utils import quote, urljoin
18
 from searx.utils import get_torrent_size
17
 from searx.utils import get_torrent_size
19
 
18
 
20
 # engine dependent config
19
 # engine dependent config
38
 def response(resp):
37
 def response(resp):
39
     results = []
38
     results = []
40
 
39
 
41
-    dom = html.fromstring(resp.content)
40
+    dom = html.fromstring(resp.text)
42
 
41
 
43
     search_res = dom.xpath('//div[@id="search_res"]/table/tr')
42
     search_res = dom.xpath('//div[@id="search_res"]/table/tr')
44
 
43
 

+ 9
- 5
searx/engines/currency_convert.py 查看文件

1
-from datetime import datetime
1
+import json
2
 import re
2
 import re
3
 import os
3
 import os
4
-import json
4
+import sys
5
 import unicodedata
5
 import unicodedata
6
 
6
 
7
+from datetime import datetime
8
+
9
+if sys.version_info[0] == 3:
10
+    unicode = str
7
 
11
 
8
 categories = []
12
 categories = []
9
 url = 'https://download.finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X'
13
 url = 'https://download.finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X'
10
 weight = 100
14
 weight = 100
11
 
15
 
12
-parser_re = re.compile(u'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)  # noqa
16
+parser_re = re.compile(b'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
13
 
17
 
14
 db = 1
18
 db = 1
15
 
19
 
16
 
20
 
17
 def normalize_name(name):
21
 def normalize_name(name):
18
-    name = name.lower().replace('-', ' ').rstrip('s')
22
+    name = name.decode('utf-8').lower().replace('-', ' ').rstrip('s')
19
     name = re.sub(' +', ' ', name)
23
     name = re.sub(' +', ' ', name)
20
     return unicodedata.normalize('NFKD', name).lower()
24
     return unicodedata.normalize('NFKD', name).lower()
21
 
25
 
35
 
39
 
36
 
40
 
37
 def request(query, params):
41
 def request(query, params):
38
-    m = parser_re.match(unicode(query, 'utf8'))
42
+    m = parser_re.match(query)
39
     if not m:
43
     if not m:
40
         # wrong query
44
         # wrong query
41
         return params
45
         return params

+ 1
- 2
searx/engines/dailymotion.py 查看文件

12
  @todo        set content-parameter with correct data
12
  @todo        set content-parameter with correct data
13
 """
13
 """
14
 
14
 
15
-from urllib import urlencode
16
 from json import loads
15
 from json import loads
17
 from datetime import datetime
16
 from datetime import datetime
18
-from requests import get
17
+from searx.url_utils import urlencode
19
 
18
 
20
 # engine dependent config
19
 # engine dependent config
21
 categories = ['videos']
20
 categories = ['videos']

+ 2
- 3
searx/engines/deezer.py 查看文件

11
 """
11
 """
12
 
12
 
13
 from json import loads
13
 from json import loads
14
-from urllib import urlencode
14
+from searx.url_utils import urlencode
15
 
15
 
16
 # engine dependent config
16
 # engine dependent config
17
 categories = ['music']
17
 categories = ['music']
30
 def request(query, params):
30
 def request(query, params):
31
     offset = (params['pageno'] - 1) * 25
31
     offset = (params['pageno'] - 1) * 25
32
 
32
 
33
-    params['url'] = search_url.format(query=urlencode({'q': query}),
34
-                                      offset=offset)
33
+    params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset)
35
 
34
 
36
     return params
35
     return params
37
 
36
 

+ 1
- 1
searx/engines/deviantart.py 查看文件

12
  @todo        rewrite to api
12
  @todo        rewrite to api
13
 """
13
 """
14
 
14
 
15
-from urllib import urlencode
16
 from lxml import html
15
 from lxml import html
17
 import re
16
 import re
18
 from searx.engines.xpath import extract_text
17
 from searx.engines.xpath import extract_text
18
+from searx.url_utils import urlencode
19
 
19
 
20
 # engine dependent config
20
 # engine dependent config
21
 categories = ['images']
21
 categories = ['images']

+ 3
- 3
searx/engines/dictzone.py 查看文件

10
 """
10
 """
11
 
11
 
12
 import re
12
 import re
13
-from urlparse import urljoin
14
 from lxml import html
13
 from lxml import html
15
 from searx.utils import is_valid_lang
14
 from searx.utils import is_valid_lang
15
+from searx.url_utils import urljoin
16
 
16
 
17
 categories = ['general']
17
 categories = ['general']
18
 url = u'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
18
 url = u'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
19
 weight = 100
19
 weight = 100
20
 
20
 
21
-parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
21
+parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
22
 results_xpath = './/table[@id="r"]/tr'
22
 results_xpath = './/table[@id="r"]/tr'
23
 
23
 
24
 
24
 
25
 def request(query, params):
25
 def request(query, params):
26
-    m = parser_re.match(unicode(query, 'utf8'))
26
+    m = parser_re.match(query)
27
     if not m:
27
     if not m:
28
         return params
28
         return params
29
 
29
 

+ 6
- 2
searx/engines/digbt.py 查看文件

10
  @parse       url, title, content, magnetlink
10
  @parse       url, title, content, magnetlink
11
 """
11
 """
12
 
12
 
13
-from urlparse import urljoin
13
+from sys import version_info
14
 from lxml import html
14
 from lxml import html
15
 from searx.engines.xpath import extract_text
15
 from searx.engines.xpath import extract_text
16
 from searx.utils import get_torrent_size
16
 from searx.utils import get_torrent_size
17
+from searx.url_utils import urljoin
18
+
19
+if version_info[0] == 3:
20
+    unicode = str
17
 
21
 
18
 categories = ['videos', 'music', 'files']
22
 categories = ['videos', 'music', 'files']
19
 paging = True
23
 paging = True
31
 
35
 
32
 
36
 
33
 def response(resp):
37
 def response(resp):
34
-    dom = html.fromstring(resp.content)
38
+    dom = html.fromstring(resp.text)
35
     search_res = dom.xpath('.//td[@class="x-item"]')
39
     search_res = dom.xpath('.//td[@class="x-item"]')
36
 
40
 
37
     if not search_res:
41
     if not search_res:

+ 2
- 2
searx/engines/digg.py 查看文件

10
  @parse       url, title, content, publishedDate, thumbnail
10
  @parse       url, title, content, publishedDate, thumbnail
11
 """
11
 """
12
 
12
 
13
-from urllib import quote_plus
13
+from dateutil import parser
14
 from json import loads
14
 from json import loads
15
 from lxml import html
15
 from lxml import html
16
-from dateutil import parser
16
+from searx.url_utils import quote_plus
17
 
17
 
18
 # engine dependent config
18
 # engine dependent config
19
 categories = ['news', 'social media']
19
 categories = ['news', 'social media']

+ 1
- 1
searx/engines/doku.py 查看文件

9
 # @stable      yes
9
 # @stable      yes
10
 # @parse       (general)    url, title, content
10
 # @parse       (general)    url, title, content
11
 
11
 
12
-from urllib import urlencode
13
 from lxml.html import fromstring
12
 from lxml.html import fromstring
14
 from searx.engines.xpath import extract_text
13
 from searx.engines.xpath import extract_text
14
+from searx.url_utils import urlencode
15
 
15
 
16
 # engine dependent config
16
 # engine dependent config
17
 categories = ['general']  # TODO , 'images', 'music', 'videos', 'files'
17
 categories = ['general']  # TODO , 'images', 'music', 'videos', 'files'

+ 1
- 1
searx/engines/duckduckgo.py 查看文件

13
  @todo        rewrite to api
13
  @todo        rewrite to api
14
 """
14
 """
15
 
15
 
16
-from urllib import urlencode
17
 from lxml.html import fromstring
16
 from lxml.html import fromstring
18
 from requests import get
17
 from requests import get
19
 from json import loads
18
 from json import loads
20
 from searx.engines.xpath import extract_text
19
 from searx.engines.xpath import extract_text
20
+from searx.url_utils import urlencode
21
 
21
 
22
 # engine dependent config
22
 # engine dependent config
23
 categories = ['general']
23
 categories = ['general']

+ 3
- 3
searx/engines/duckduckgo_definitions.py 查看文件

1
 import json
1
 import json
2
-from urllib import urlencode
3
-from re import compile, sub
4
 from lxml import html
2
 from lxml import html
5
-from searx.utils import html_to_text
3
+from re import compile
6
 from searx.engines.xpath import extract_text
4
 from searx.engines.xpath import extract_text
7
 from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
5
 from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
6
+from searx.url_utils import urlencode
7
+from searx.utils import html_to_text
8
 
8
 
9
 url = 'https://api.duckduckgo.com/'\
9
 url = 'https://api.duckduckgo.com/'\
10
     + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
10
     + '?{query}&format=json&pretty=0&no_redirect=1&d=1'

+ 1
- 1
searx/engines/faroo.py 查看文件

10
  @parse       url, title, content, publishedDate, img_src
10
  @parse       url, title, content, publishedDate, img_src
11
 """
11
 """
12
 
12
 
13
-from urllib import urlencode
14
 from json import loads
13
 from json import loads
15
 import datetime
14
 import datetime
16
 from searx.utils import searx_useragent
15
 from searx.utils import searx_useragent
16
+from searx.url_utils import urlencode
17
 
17
 
18
 # engine dependent config
18
 # engine dependent config
19
 categories = ['general', 'news']
19
 categories = ['general', 'news']

+ 3
- 4
searx/engines/fdroid.py 查看文件

9
  @parse        url, title, content
9
  @parse        url, title, content
10
 """
10
 """
11
 
11
 
12
-from urllib import urlencode
13
-from searx.engines.xpath import extract_text
14
 from lxml import html
12
 from lxml import html
13
+from searx.engines.xpath import extract_text
14
+from searx.url_utils import urlencode
15
 
15
 
16
 # engine dependent config
16
 # engine dependent config
17
 categories = ['files']
17
 categories = ['files']
24
 
24
 
25
 # do search-request
25
 # do search-request
26
 def request(query, params):
26
 def request(query, params):
27
-    query = urlencode({'fdfilter': query,
28
-                       'fdpage': params['pageno']})
27
+    query = urlencode({'fdfilter': query, 'fdpage': params['pageno']})
29
     params['url'] = search_url.format(query=query)
28
     params['url'] = search_url.format(query=query)
30
     return params
29
     return params
31
 
30
 

+ 7
- 4
searx/engines/filecrop.py 查看文件

1
-from urllib import urlencode
2
-from HTMLParser import HTMLParser
1
+from searx.url_utils import urlencode
2
+
3
+try:
4
+    from HTMLParser import HTMLParser
5
+except:
6
+    from html.parser import HTMLParser
3
 
7
 
4
 url = 'http://www.filecrop.com/'
8
 url = 'http://www.filecrop.com/'
5
 search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}'  # noqa
9
 search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}'  # noqa
73
 
77
 
74
 def request(query, params):
78
 def request(query, params):
75
     index = 1 + (params['pageno'] - 1) * 30
79
     index = 1 + (params['pageno'] - 1) * 30
76
-    params['url'] = search_url.format(query=urlencode({'w': query}),
77
-                                      index=index)
80
+    params['url'] = search_url.format(query=urlencode({'w': query}), index=index)
78
     return params
81
     return params
79
 
82
 
80
 
83
 

+ 1
- 1
searx/engines/flickr.py 查看文件

13
  More info on api-key : https://www.flickr.com/services/apps/create/
13
  More info on api-key : https://www.flickr.com/services/apps/create/
14
 """
14
 """
15
 
15
 
16
-from urllib import urlencode
17
 from json import loads
16
 from json import loads
17
+from searx.url_utils import urlencode
18
 
18
 
19
 categories = ['images']
19
 categories = ['images']
20
 
20
 

+ 1
- 1
searx/engines/flickr_noapi.py 查看文件

12
  @parse       url, title, thumbnail, img_src
12
  @parse       url, title, thumbnail, img_src
13
 """
13
 """
14
 
14
 
15
-from urllib import urlencode
16
 from json import loads
15
 from json import loads
17
 from time import time
16
 from time import time
18
 import re
17
 import re
19
 from searx.engines import logger
18
 from searx.engines import logger
19
+from searx.url_utils import urlencode
20
 
20
 
21
 
21
 
22
 logger = logger.getChild('flickr-noapi')
22
 logger = logger.getChild('flickr-noapi')

+ 1
- 3
searx/engines/framalibre.py 查看文件

10
  @parse       url, title, content, thumbnail, img_src
10
  @parse       url, title, content, thumbnail, img_src
11
 """
11
 """
12
 
12
 
13
-from urlparse import urljoin
14
 from cgi import escape
13
 from cgi import escape
15
-from urllib import urlencode
16
 from lxml import html
14
 from lxml import html
17
 from searx.engines.xpath import extract_text
15
 from searx.engines.xpath import extract_text
18
-from dateutil import parser
16
+from searx.url_utils import urljoin, urlencode
19
 
17
 
20
 # engine dependent config
18
 # engine dependent config
21
 categories = ['it']
19
 categories = ['it']

+ 1
- 1
searx/engines/frinkiac.py 查看文件

10
 """
10
 """
11
 
11
 
12
 from json import loads
12
 from json import loads
13
-from urllib import urlencode
13
+from searx.url_utils import urlencode
14
 
14
 
15
 categories = ['images']
15
 categories = ['images']
16
 
16
 

+ 1
- 2
searx/engines/gigablast.py 查看文件

11
 """
11
 """
12
 
12
 
13
 from json import loads
13
 from json import loads
14
-from random import randint
15
 from time import time
14
 from time import time
16
-from urllib import urlencode
17
 from lxml.html import fromstring
15
 from lxml.html import fromstring
16
+from searx.url_utils import urlencode
18
 
17
 
19
 # engine dependent config
18
 # engine dependent config
20
 categories = ['general']
19
 categories = ['general']

+ 1
- 1
searx/engines/github.py 查看文件

10
  @parse       url, title, content
10
  @parse       url, title, content
11
 """
11
 """
12
 
12
 
13
-from urllib import urlencode
14
 from json import loads
13
 from json import loads
14
+from searx.url_utils import urlencode
15
 
15
 
16
 # engine dependent config
16
 # engine dependent config
17
 categories = ['it']
17
 categories = ['it']

+ 2
- 3
searx/engines/google.py 查看文件

9
 # @parse       url, title, content, suggestion
9
 # @parse       url, title, content, suggestion
10
 
10
 
11
 import re
11
 import re
12
-from urllib import urlencode
13
-from urlparse import urlparse, parse_qsl
14
 from lxml import html, etree
12
 from lxml import html, etree
15
 from searx.engines.xpath import extract_text, extract_url
13
 from searx.engines.xpath import extract_text, extract_url
16
-from searx.search import logger
14
+from searx import logger
15
+from searx.url_utils import urlencode, urlparse, parse_qsl
17
 
16
 
18
 logger = logger.getChild('google engine')
17
 logger = logger.getChild('google engine')
19
 
18
 

+ 1
- 1
searx/engines/google_images.py 查看文件

11
 """
11
 """
12
 
12
 
13
 from datetime import date, timedelta
13
 from datetime import date, timedelta
14
-from urllib import urlencode
15
 from json import loads
14
 from json import loads
16
 from lxml import html
15
 from lxml import html
16
+from searx.url_utils import urlencode
17
 
17
 
18
 
18
 
19
 # engine dependent config
19
 # engine dependent config

+ 1
- 2
searx/engines/google_news.py 查看文件

11
 """
11
 """
12
 
12
 
13
 from lxml import html
13
 from lxml import html
14
-from urllib import urlencode
15
-from json import loads
16
 from searx.engines.google import _fetch_supported_languages, supported_languages_url
14
 from searx.engines.google import _fetch_supported_languages, supported_languages_url
15
+from searx.url_utils import urlencode
17
 
16
 
18
 # search-url
17
 # search-url
19
 categories = ['news']
18
 categories = ['news']

+ 7
- 3
searx/engines/ina.py 查看文件

12
 # @todo        embedded (needs some md5 from video page)
12
 # @todo        embedded (needs some md5 from video page)
13
 
13
 
14
 from json import loads
14
 from json import loads
15
-from urllib import urlencode
16
 from lxml import html
15
 from lxml import html
17
-from HTMLParser import HTMLParser
18
-from searx.engines.xpath import extract_text
19
 from dateutil import parser
16
 from dateutil import parser
17
+from searx.engines.xpath import extract_text
18
+from searx.url_utils import urlencode
19
+
20
+try:
21
+    from HTMLParser import HTMLParser
22
+except:
23
+    from html.parser import HTMLParser
20
 
24
 
21
 # engine dependent config
25
 # engine dependent config
22
 categories = ['videos']
26
 categories = ['videos']

+ 8
- 3
searx/engines/json_engine.py 查看文件

1
-from urllib import urlencode
2
-from json import loads
3
 from collections import Iterable
1
 from collections import Iterable
2
+from json import loads
3
+from sys import version_info
4
+from searx.url_utils import urlencode
5
+
6
+if version_info[0] == 3:
7
+    unicode = str
4
 
8
 
5
 search_url = None
9
 search_url = None
6
 url_query = None
10
 url_query = None
7
 content_query = None
11
 content_query = None
8
 title_query = None
12
 title_query = None
13
+paging = False
9
 suggestion_query = ''
14
 suggestion_query = ''
10
 results_query = ''
15
 results_query = ''
11
 
16
 
20
 
25
 
21
 def iterate(iterable):
26
 def iterate(iterable):
22
     if type(iterable) == dict:
27
     if type(iterable) == dict:
23
-        it = iterable.iteritems()
28
+        it = iterable.items()
24
 
29
 
25
     else:
30
     else:
26
         it = enumerate(iterable)
31
         it = enumerate(iterable)

+ 1
- 2
searx/engines/kickass.py 查看文件

10
  @parse       url, title, content, seed, leech, magnetlink
10
  @parse       url, title, content, seed, leech, magnetlink
11
 """
11
 """
12
 
12
 
13
-from urlparse import urljoin
14
-from urllib import quote
15
 from lxml import html
13
 from lxml import html
16
 from operator import itemgetter
14
 from operator import itemgetter
17
 from searx.engines.xpath import extract_text
15
 from searx.engines.xpath import extract_text
18
 from searx.utils import get_torrent_size, convert_str_to_int
16
 from searx.utils import get_torrent_size, convert_str_to_int
17
+from searx.url_utils import quote, urljoin
19
 
18
 
20
 # engine dependent config
19
 # engine dependent config
21
 categories = ['videos', 'music', 'files']
20
 categories = ['videos', 'music', 'files']

+ 1
- 1
searx/engines/mediawiki.py 查看文件

14
 
14
 
15
 from json import loads
15
 from json import loads
16
 from string import Formatter
16
 from string import Formatter
17
-from urllib import urlencode, quote
17
+from searx.url_utils import urlencode, quote
18
 
18
 
19
 # engine dependent config
19
 # engine dependent config
20
 categories = ['general']
20
 categories = ['general']

+ 1
- 1
searx/engines/mixcloud.py 查看文件

11
 """
11
 """
12
 
12
 
13
 from json import loads
13
 from json import loads
14
-from urllib import urlencode
15
 from dateutil import parser
14
 from dateutil import parser
15
+from searx.url_utils import urlencode
16
 
16
 
17
 # engine dependent config
17
 # engine dependent config
18
 categories = ['music']
18
 categories = ['music']

+ 1
- 1
searx/engines/nyaa.py 查看文件

9
  @parse        url, title, content, seed, leech, torrentfile
9
  @parse        url, title, content, seed, leech, torrentfile
10
 """
10
 """
11
 
11
 
12
-from urllib import urlencode
13
 from lxml import html
12
 from lxml import html
14
 from searx.engines.xpath import extract_text
13
 from searx.engines.xpath import extract_text
14
+from searx.url_utils import urlencode
15
 
15
 
16
 # engine dependent config
16
 # engine dependent config
17
 categories = ['files', 'images', 'videos', 'music']
17
 categories = ['files', 'images', 'videos', 'music']

+ 0
- 4
searx/engines/openstreetmap.py 查看文件

11
 """
11
 """
12
 
12
 
13
 from json import loads
13
 from json import loads
14
-from searx.utils import searx_useragent
15
 
14
 
16
 # engine dependent config
15
 # engine dependent config
17
 categories = ['map']
16
 categories = ['map']
27
 def request(query, params):
26
 def request(query, params):
28
     params['url'] = base_url + search_string.format(query=query)
27
     params['url'] = base_url + search_string.format(query=query)
29
 
28
 
30
-    # using searx User-Agent
31
-    params['headers']['User-Agent'] = searx_useragent()
32
-
33
     return params
29
     return params
34
 
30
 
35
 
31
 

+ 1
- 1
searx/engines/photon.py 查看文件

10
  @parse       url, title
10
  @parse       url, title
11
 """
11
 """
12
 
12
 
13
-from urllib import urlencode
14
 from json import loads
13
 from json import loads
15
 from searx.utils import searx_useragent
14
 from searx.utils import searx_useragent
15
+from searx.url_utils import urlencode
16
 
16
 
17
 # engine dependent config
17
 # engine dependent config
18
 categories = ['map']
18
 categories = ['map']

+ 1
- 2
searx/engines/piratebay.py 查看文件

8
 # @stable      yes (HTML can change)
8
 # @stable      yes (HTML can change)
9
 # @parse       url, title, content, seed, leech, magnetlink
9
 # @parse       url, title, content, seed, leech, magnetlink
10
 
10
 
11
-from urlparse import urljoin
12
-from urllib import quote
13
 from lxml import html
11
 from lxml import html
14
 from operator import itemgetter
12
 from operator import itemgetter
15
 from searx.engines.xpath import extract_text
13
 from searx.engines.xpath import extract_text
14
+from searx.url_utils import quote, urljoin
16
 
15
 
17
 # engine dependent config
16
 # engine dependent config
18
 categories = ['videos', 'music', 'files']
17
 categories = ['videos', 'music', 'files']

+ 1
- 2
searx/engines/qwant.py 查看文件

12
 
12
 
13
 from datetime import datetime
13
 from datetime import datetime
14
 from json import loads
14
 from json import loads
15
-from urllib import urlencode
16
-
17
 from searx.utils import html_to_text
15
 from searx.utils import html_to_text
16
+from searx.url_utils import urlencode
18
 
17
 
19
 # engine dependent config
18
 # engine dependent config
20
 categories = None
19
 categories = None

+ 2
- 4
searx/engines/reddit.py 查看文件

11
 """
11
 """
12
 
12
 
13
 import json
13
 import json
14
-from urllib import urlencode
15
-from urlparse import urlparse, urljoin
16
 from datetime import datetime
14
 from datetime import datetime
15
+from searx.url_utils import urlencode, urljoin, urlparse
17
 
16
 
18
 # engine dependent config
17
 # engine dependent config
19
 categories = ['general', 'images', 'news', 'social media']
18
 categories = ['general', 'images', 'news', 'social media']
26
 
25
 
27
 # do search-request
26
 # do search-request
28
 def request(query, params):
27
 def request(query, params):
29
-    query = urlencode({'q': query,
30
-                       'limit': page_size})
28
+    query = urlencode({'q': query, 'limit': page_size})
31
     params['url'] = search_url.format(query=query)
29
     params['url'] = search_url.format(query=query)
32
 
30
 
33
     return params
31
     return params

+ 1
- 3
searx/engines/scanr_structures.py 查看文件

10
  @parse       url, title, content, img_src
10
  @parse       url, title, content, img_src
11
 """
11
 """
12
 
12
 
13
-from urllib import urlencode
14
 from json import loads, dumps
13
 from json import loads, dumps
15
-from dateutil import parser
16
 from searx.utils import html_to_text
14
 from searx.utils import html_to_text
17
 
15
 
18
 # engine dependent config
16
 # engine dependent config
48
     search_res = loads(resp.text)
46
     search_res = loads(resp.text)
49
 
47
 
50
     # return empty array if there are no results
48
     # return empty array if there are no results
51
-    if search_res.get('total') < 1:
49
+    if search_res.get('total', 0) < 1:
52
         return []
50
         return []
53
 
51
 
54
     # parse results
52
     # parse results

+ 2
- 3
searx/engines/searchcode_code.py 查看文件

10
  @parse       url, title, content
10
  @parse       url, title, content
11
 """
11
 """
12
 
12
 
13
-from urllib import urlencode
14
 from json import loads
13
 from json import loads
14
+from searx.url_utils import urlencode
15
 
15
 
16
 
16
 
17
 # engine dependent config
17
 # engine dependent config
31
 
31
 
32
 # do search-request
32
 # do search-request
33
 def request(query, params):
33
 def request(query, params):
34
-    params['url'] = search_url.format(query=urlencode({'q': query}),
35
-                                      pageno=params['pageno'] - 1)
34
+    params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'] - 1)
36
 
35
 
37
     return params
36
     return params
38
 
37
 

+ 2
- 3
searx/engines/searchcode_doc.py 查看文件

10
  @parse       url, title, content
10
  @parse       url, title, content
11
 """
11
 """
12
 
12
 
13
-from urllib import urlencode
14
 from json import loads
13
 from json import loads
14
+from searx.url_utils import urlencode
15
 
15
 
16
 # engine dependent config
16
 # engine dependent config
17
 categories = ['it']
17
 categories = ['it']
24
 
24
 
25
 # do search-request
25
 # do search-request
26
 def request(query, params):
26
 def request(query, params):
27
-    params['url'] = search_url.format(query=urlencode({'q': query}),
28
-                                      pageno=params['pageno'] - 1)
27
+    params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'] - 1)
29
 
28
 
30
     return params
29
     return params
31
 
30
 

+ 1
- 3
searx/engines/seedpeer.py 查看文件

8
 # @stable      yes (HTML can change)
8
 # @stable      yes (HTML can change)
9
 # @parse       url, title, content, seed, leech, magnetlink
9
 # @parse       url, title, content, seed, leech, magnetlink
10
 
10
 
11
-from urlparse import urljoin
12
-from urllib import quote
13
 from lxml import html
11
 from lxml import html
14
 from operator import itemgetter
12
 from operator import itemgetter
15
-from searx.engines.xpath import extract_text
13
+from searx.url_utils import quote, urljoin
16
 
14
 
17
 
15
 
18
 url = 'http://www.seedpeer.eu/'
16
 url = 'http://www.seedpeer.eu/'

+ 12
- 7
searx/engines/soundcloud.py 查看文件

11
 """
11
 """
12
 
12
 
13
 import re
13
 import re
14
-from StringIO import StringIO
15
 from json import loads
14
 from json import loads
16
-from lxml import etree
17
-from urllib import urlencode, quote_plus
15
+from lxml import html
18
 from dateutil import parser
16
 from dateutil import parser
19
 from searx import logger
17
 from searx import logger
20
 from searx.poolrequests import get as http_get
18
 from searx.poolrequests import get as http_get
19
+from searx.url_utils import quote_plus, urlencode
20
+
21
+try:
22
+    from cStringIO import StringIO
23
+except:
24
+    from io import StringIO
21
 
25
 
22
 # engine dependent config
26
 # engine dependent config
23
 categories = ['music']
27
 categories = ['music']
36
     'scrolling="no" frameborder="no" ' +\
40
     'scrolling="no" frameborder="no" ' +\
37
     'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>'
41
     'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>'
38
 
42
 
43
+cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U)
44
+
39
 
45
 
40
 def get_client_id():
46
 def get_client_id():
41
     response = http_get("https://soundcloud.com")
47
     response = http_get("https://soundcloud.com")
42
-    rx_namespace = {"re": "http://exslt.org/regular-expressions"}
43
 
48
 
44
     if response.ok:
49
     if response.ok:
45
-        tree = etree.parse(StringIO(response.content), etree.HTMLParser())
46
-        script_tags = tree.xpath("//script[re:match(@src, '(.*app.*js)')]", namespaces=rx_namespace)
50
+        tree = html.fromstring(response.content)
51
+        script_tags = tree.xpath("//script[contains(@src, '/assets/app')]")
47
         app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None]
52
         app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None]
48
 
53
 
49
         # extracts valid app_js urls from soundcloud.com content
54
         # extracts valid app_js urls from soundcloud.com content
51
             # gets app_js and searches for the clientid
56
             # gets app_js and searches for the clientid
52
             response = http_get(app_js_url)
57
             response = http_get(app_js_url)
53
             if response.ok:
58
             if response.ok:
54
-                cids = re.search(r'client_id:"([^"]*)"', response.content, re.M | re.I)
59
+                cids = cid_re.search(response.text)
55
                 if cids is not None and len(cids.groups()):
60
                 if cids is not None and len(cids.groups()):
56
                     return cids.groups()[0]
61
                     return cids.groups()[0]
57
     logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")
62
     logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")

+ 2
- 3
searx/engines/spotify.py 查看文件

11
 """
11
 """
12
 
12
 
13
 from json import loads
13
 from json import loads
14
-from urllib import urlencode
14
+from searx.url_utils import urlencode
15
 
15
 
16
 # engine dependent config
16
 # engine dependent config
17
 categories = ['music']
17
 categories = ['music']
29
 def request(query, params):
29
 def request(query, params):
30
     offset = (params['pageno'] - 1) * 20
30
     offset = (params['pageno'] - 1) * 20
31
 
31
 
32
-    params['url'] = search_url.format(query=urlencode({'q': query}),
33
-                                      offset=offset)
32
+    params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset)
34
 
33
 
35
     return params
34
     return params
36
 
35
 

+ 2
- 4
searx/engines/stackoverflow.py 查看文件

10
  @parse       url, title, content
10
  @parse       url, title, content
11
 """
11
 """
12
 
12
 
13
-from urlparse import urljoin
14
-from urllib import urlencode
15
 from lxml import html
13
 from lxml import html
16
 from searx.engines.xpath import extract_text
14
 from searx.engines.xpath import extract_text
15
+from searx.url_utils import urlencode, urljoin
17
 
16
 
18
 # engine dependent config
17
 # engine dependent config
19
 categories = ['it']
18
 categories = ['it']
31
 
30
 
32
 # do search-request
31
 # do search-request
33
 def request(query, params):
32
 def request(query, params):
34
-    params['url'] = search_url.format(query=urlencode({'q': query}),
35
-                                      pageno=params['pageno'])
33
+    params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'])
36
 
34
 
37
     return params
35
     return params
38
 
36
 

+ 1
- 1
searx/engines/startpage.py 查看文件

56
 def response(resp):
56
 def response(resp):
57
     results = []
57
     results = []
58
 
58
 
59
-    dom = html.fromstring(resp.content)
59
+    dom = html.fromstring(resp.text)
60
 
60
 
61
     # parse results
61
     # parse results
62
     for result in dom.xpath(results_xpath):
62
     for result in dom.xpath(results_xpath):

+ 1
- 1
searx/engines/subtitleseeker.py 查看文件

10
  @parse       url, title, content
10
  @parse       url, title, content
11
 """
11
 """
12
 
12
 
13
-from urllib import quote_plus
14
 from lxml import html
13
 from lxml import html
15
 from searx.languages import language_codes
14
 from searx.languages import language_codes
16
 from searx.engines.xpath import extract_text
15
 from searx.engines.xpath import extract_text
16
+from searx.url_utils import quote_plus
17
 
17
 
18
 # engine dependent config
18
 # engine dependent config
19
 categories = ['videos']
19
 categories = ['videos']

+ 13
- 14
searx/engines/swisscows.py 查看文件

11
 """
11
 """
12
 
12
 
13
 from json import loads
13
 from json import loads
14
-from urllib import urlencode, unquote
15
 import re
14
 import re
16
 from lxml.html import fromstring
15
 from lxml.html import fromstring
16
+from searx.url_utils import unquote, urlencode
17
 
17
 
18
 # engine dependent config
18
 # engine dependent config
19
 categories = ['general', 'images']
19
 categories = ['general', 'images']
27
 supported_languages_url = base_url
27
 supported_languages_url = base_url
28
 
28
 
29
 # regex
29
 # regex
30
-regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
31
-regex_json_remove_start = re.compile(r'^initialData:\s*')
32
-regex_json_remove_end = re.compile(r',\s*environment$')
33
-regex_img_url_remove_start = re.compile(r'^https?://i\.swisscows\.ch/\?link=')
30
+regex_json = re.compile(b'initialData: {"Request":(.|\n)*},\s*environment')
31
+regex_json_remove_start = re.compile(b'^initialData:\s*')
32
+regex_json_remove_end = re.compile(b',\s*environment$')
33
+regex_img_url_remove_start = re.compile(b'^https?://i\.swisscows\.ch/\?link=')
34
 
34
 
35
 
35
 
36
 # do search-request
36
 # do search-request
45
         ui_language = params['language'].split('-')[0]
45
         ui_language = params['language'].split('-')[0]
46
 
46
 
47
     search_path = search_string.format(
47
     search_path = search_string.format(
48
-        query=urlencode({'query': query,
49
-                         'uiLanguage': ui_language,
50
-                         'region': region}),
51
-        page=params['pageno'])
48
+        query=urlencode({'query': query, 'uiLanguage': ui_language, 'region': region}),
49
+        page=params['pageno']
50
+    )
52
 
51
 
53
     # image search query is something like 'image?{query}&page={page}'
52
     # image search query is something like 'image?{query}&page={page}'
54
     if params['category'] == 'images':
53
     if params['category'] == 'images':
63
 def response(resp):
62
 def response(resp):
64
     results = []
63
     results = []
65
 
64
 
66
-    json_regex = regex_json.search(resp.content)
65
+    json_regex = regex_json.search(resp.text)
67
 
66
 
68
     # check if results are returned
67
     # check if results are returned
69
     if not json_regex:
68
     if not json_regex:
70
         return []
69
         return []
71
 
70
 
72
-    json_raw = regex_json_remove_end.sub('', regex_json_remove_start.sub('', json_regex.group()))
73
-    json = loads(json_raw)
71
+    json_raw = regex_json_remove_end.sub(b'', regex_json_remove_start.sub(b'', json_regex.group()))
72
+    json = loads(json_raw.decode('utf-8'))
74
 
73
 
75
     # parse results
74
     # parse results
76
     for result in json['Results'].get('items', []):
75
     for result in json['Results'].get('items', []):
78
 
77
 
79
         # parse image results
78
         # parse image results
80
         if result.get('ContentType', '').startswith('image'):
79
         if result.get('ContentType', '').startswith('image'):
81
-            img_url = unquote(regex_img_url_remove_start.sub('', result['Url']))
80
+            img_url = unquote(regex_img_url_remove_start.sub(b'', result['Url'].encode('utf-8')).decode('utf-8'))
82
 
81
 
83
             # append result
82
             # append result
84
             results.append({'url': result['SourceUrl'],
83
             results.append({'url': result['SourceUrl'],
100
     # parse images
99
     # parse images
101
     for result in json.get('Images', []):
100
     for result in json.get('Images', []):
102
         # decode image url
101
         # decode image url
103
-        img_url = unquote(regex_img_url_remove_start.sub('', result['Url']))
102
+        img_url = unquote(regex_img_url_remove_start.sub(b'', result['Url'].encode('utf-8')).decode('utf-8'))
104
 
103
 
105
         # append result
104
         # append result
106
         results.append({'url': result['SourceUrl'],
105
         results.append({'url': result['SourceUrl'],

+ 5
- 6
searx/engines/tokyotoshokan.py 查看文件

11
 """
11
 """
12
 
12
 
13
 import re
13
 import re
14
-from urllib import urlencode
15
 from lxml import html
14
 from lxml import html
16
 from searx.engines.xpath import extract_text
15
 from searx.engines.xpath import extract_text
17
 from datetime import datetime
16
 from datetime import datetime
18
 from searx.engines.nyaa import int_or_zero, get_filesize_mul
17
 from searx.engines.nyaa import int_or_zero, get_filesize_mul
18
+from searx.url_utils import urlencode
19
 
19
 
20
 # engine dependent config
20
 # engine dependent config
21
 categories = ['files', 'videos', 'music']
21
 categories = ['files', 'videos', 'music']
28
 
28
 
29
 # do search-request
29
 # do search-request
30
 def request(query, params):
30
 def request(query, params):
31
-    query = urlencode({'page': params['pageno'],
32
-                       'terms': query})
31
+    query = urlencode({'page': params['pageno'], 'terms': query})
33
     params['url'] = search_url.format(query=query)
32
     params['url'] = search_url.format(query=query)
34
     return params
33
     return params
35
 
34
 
50
     size_re = re.compile(r'Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE)
49
     size_re = re.compile(r'Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE)
51
 
50
 
52
     # processing the results, two rows at a time
51
     # processing the results, two rows at a time
53
-    for i in xrange(0, len(rows), 2):
52
+    for i in range(0, len(rows), 2):
54
         # parse the first row
53
         # parse the first row
55
         name_row = rows[i]
54
         name_row = rows[i]
56
 
55
 
79
                     groups = size_re.match(item).groups()
78
                     groups = size_re.match(item).groups()
80
                     multiplier = get_filesize_mul(groups[1])
79
                     multiplier = get_filesize_mul(groups[1])
81
                     params['filesize'] = int(multiplier * float(groups[0]))
80
                     params['filesize'] = int(multiplier * float(groups[0]))
82
-                except Exception as e:
81
+                except:
83
                     pass
82
                     pass
84
             elif item.startswith('Date:'):
83
             elif item.startswith('Date:'):
85
                 try:
84
                 try:
86
                     # Date: 2016-02-21 21:44 UTC
85
                     # Date: 2016-02-21 21:44 UTC
87
                     date = datetime.strptime(item, 'Date: %Y-%m-%d %H:%M UTC')
86
                     date = datetime.strptime(item, 'Date: %Y-%m-%d %H:%M UTC')
88
                     params['publishedDate'] = date
87
                     params['publishedDate'] = date
89
-                except Exception as e:
88
+                except:
90
                     pass
89
                     pass
91
             elif item.startswith('Comment:'):
90
             elif item.startswith('Comment:'):
92
                 params['content'] = item
91
                 params['content'] = item

+ 4
- 4
searx/engines/torrentz.py 查看文件

12
 """
12
 """
13
 
13
 
14
 import re
14
 import re
15
-from urllib import urlencode
16
 from lxml import html
15
 from lxml import html
17
-from searx.engines.xpath import extract_text
18
 from datetime import datetime
16
 from datetime import datetime
19
 from searx.engines.nyaa import int_or_zero, get_filesize_mul
17
 from searx.engines.nyaa import int_or_zero, get_filesize_mul
18
+from searx.engines.xpath import extract_text
19
+from searx.url_utils import urlencode
20
 
20
 
21
 # engine dependent config
21
 # engine dependent config
22
 categories = ['files', 'videos', 'music']
22
 categories = ['files', 'videos', 'music']
70
             size_str = result.xpath('./dd/span[@class="s"]/text()')[0]
70
             size_str = result.xpath('./dd/span[@class="s"]/text()')[0]
71
             size, suffix = size_str.split()
71
             size, suffix = size_str.split()
72
             params['filesize'] = int(size) * get_filesize_mul(suffix)
72
             params['filesize'] = int(size) * get_filesize_mul(suffix)
73
-        except Exception as e:
73
+        except:
74
             pass
74
             pass
75
 
75
 
76
         # does our link contain a valid SHA1 sum?
76
         # does our link contain a valid SHA1 sum?
84
             # Fri, 25 Mar 2016 16:29:01
84
             # Fri, 25 Mar 2016 16:29:01
85
             date = datetime.strptime(date_str, '%a, %d %b %Y %H:%M:%S')
85
             date = datetime.strptime(date_str, '%a, %d %b %Y %H:%M:%S')
86
             params['publishedDate'] = date
86
             params['publishedDate'] = date
87
-        except Exception as e:
87
+        except:
88
             pass
88
             pass
89
 
89
 
90
         results.append(params)
90
         results.append(params)

+ 4
- 0
searx/engines/translated.py 查看文件

9
  @parse       url, title, content
9
  @parse       url, title, content
10
 """
10
 """
11
 import re
11
 import re
12
+from sys import version_info
12
 from searx.utils import is_valid_lang
13
 from searx.utils import is_valid_lang
13
 
14
 
15
+if version_info[0] == 3:
16
+    unicode = str
17
+
14
 categories = ['general']
18
 categories = ['general']
15
 url = u'http://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
19
 url = u'http://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
16
 web_url = u'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
20
 web_url = u'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'

+ 1
- 2
searx/engines/twitter.py 查看文件

12
  @todo        publishedDate
12
  @todo        publishedDate
13
 """
13
 """
14
 
14
 
15
-from urlparse import urljoin
16
-from urllib import urlencode
17
 from lxml import html
15
 from lxml import html
18
 from datetime import datetime
16
 from datetime import datetime
19
 from searx.engines.xpath import extract_text
17
 from searx.engines.xpath import extract_text
18
+from searx.url_utils import urlencode, urljoin
20
 
19
 
21
 # engine dependent config
20
 # engine dependent config
22
 categories = ['social media']
21
 categories = ['social media']

+ 1
- 1
searx/engines/vimeo.py 查看文件

13
 # @todo        set content-parameter with correct data
13
 # @todo        set content-parameter with correct data
14
 
14
 
15
 from json import loads
15
 from json import loads
16
-from urllib import urlencode
17
 from dateutil import parser
16
 from dateutil import parser
17
+from searx.url_utils import urlencode
18
 
18
 
19
 # engine dependent config
19
 # engine dependent config
20
 categories = ['videos']
20
 categories = ['videos']

+ 5
- 8
searx/engines/wikidata.py 查看文件

14
 from searx import logger
14
 from searx import logger
15
 from searx.poolrequests import get
15
 from searx.poolrequests import get
16
 from searx.engines.xpath import extract_text
16
 from searx.engines.xpath import extract_text
17
-from searx.utils import format_date_by_locale
18
 from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
17
 from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
18
+from searx.url_utils import urlencode
19
 
19
 
20
 from json import loads
20
 from json import loads
21
 from lxml.html import fromstring
21
 from lxml.html import fromstring
22
-from urllib import urlencode
23
 
22
 
24
 logger = logger.getChild('wikidata')
23
 logger = logger.getChild('wikidata')
25
 result_count = 1
24
 result_count = 1
62
         language = 'en'
61
         language = 'en'
63
 
62
 
64
     params['url'] = url_search.format(
63
     params['url'] = url_search.format(
65
-        query=urlencode({'label': query,
66
-                        'language': language}))
64
+        query=urlencode({'label': query, 'language': language}))
67
     return params
65
     return params
68
 
66
 
69
 
67
 
70
 def response(resp):
68
 def response(resp):
71
     results = []
69
     results = []
72
-    html = fromstring(resp.content)
70
+    html = fromstring(resp.text)
73
     wikidata_ids = html.xpath(wikidata_ids_xpath)
71
     wikidata_ids = html.xpath(wikidata_ids_xpath)
74
 
72
 
75
     language = resp.search_params['language'].split('-')[0]
73
     language = resp.search_params['language'].split('-')[0]
78
 
76
 
79
     # TODO: make requests asynchronous to avoid timeout when result_count > 1
77
     # TODO: make requests asynchronous to avoid timeout when result_count > 1
80
     for wikidata_id in wikidata_ids[:result_count]:
78
     for wikidata_id in wikidata_ids[:result_count]:
81
-        url = url_detail.format(query=urlencode({'page': wikidata_id,
82
-                                                'uselang': language}))
79
+        url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
83
         htmlresponse = get(url)
80
         htmlresponse = get(url)
84
-        jsonresponse = loads(htmlresponse.content)
81
+        jsonresponse = loads(htmlresponse.text)
85
         results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language'])
82
         results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language'])
86
 
83
 
87
     return results
84
     return results

+ 9
- 12
searx/engines/wikipedia.py 查看文件

11
 """
11
 """
12
 
12
 
13
 from json import loads
13
 from json import loads
14
-from urllib import urlencode, quote
15
 from lxml.html import fromstring
14
 from lxml.html import fromstring
16
-
15
+from searx.url_utils import quote, urlencode
17
 
16
 
18
 # search-url
17
 # search-url
19
-base_url = 'https://{language}.wikipedia.org/'
20
-search_postfix = 'w/api.php?'\
18
+base_url = u'https://{language}.wikipedia.org/'
19
+search_url = base_url + u'w/api.php?'\
21
     'action=query'\
20
     'action=query'\
22
     '&format=json'\
21
     '&format=json'\
23
     '&{query}'\
22
     '&{query}'\
37
     else:
36
     else:
38
         language = lang
37
         language = lang
39
 
38
 
40
-    return base_url.format(language=language)
39
+    return language
41
 
40
 
42
 
41
 
43
 # do search-request
42
 # do search-request
44
 def request(query, params):
43
 def request(query, params):
45
     if query.islower():
44
     if query.islower():
46
-        query += '|' + query.title()
45
+        query = u'{0}|{1}'.format(query.decode('utf-8'), query.decode('utf-8').title()).encode('utf-8')
47
 
46
 
48
-    params['url'] = url_lang(params['language']) \
49
-        + search_postfix.format(query=urlencode({'titles': query}))
47
+    params['url'] = search_url.format(query=urlencode({'titles': query}),
48
+                                      language=url_lang(params['language']))
50
 
49
 
51
     return params
50
     return params
52
 
51
 
78
 def response(resp):
77
 def response(resp):
79
     results = []
78
     results = []
80
 
79
 
81
-    search_result = loads(resp.content)
80
+    search_result = loads(resp.text)
82
 
81
 
83
     # wikipedia article's unique id
82
     # wikipedia article's unique id
84
     # first valid id is assumed to be the requested article
83
     # first valid id is assumed to be the requested article
99
     extract = page.get('extract')
98
     extract = page.get('extract')
100
 
99
 
101
     summary = extract_first_paragraph(extract, title, image)
100
     summary = extract_first_paragraph(extract, title, image)
102
-    if not summary:
103
-        return []
104
 
101
 
105
     # link to wikipedia article
102
     # link to wikipedia article
106
-    wikipedia_link = url_lang(resp.search_params['language']) \
103
+    wikipedia_link = base_url.format(language=url_lang(resp.search_params['language'])) \
107
         + 'wiki/' + quote(title.replace(' ', '_').encode('utf8'))
104
         + 'wiki/' + quote(title.replace(' ', '_').encode('utf8'))
108
 
105
 
109
     results.append({'url': wikipedia_link, 'title': title})
106
     results.append({'url': wikipedia_link, 'title': title})

+ 6
- 7
searx/engines/wolframalpha_api.py 查看文件

8
 # @stable      yes
8
 # @stable      yes
9
 # @parse       url, infobox
9
 # @parse       url, infobox
10
 
10
 
11
-from urllib import urlencode
12
 from lxml import etree
11
 from lxml import etree
12
+from searx.url_utils import urlencode
13
 
13
 
14
 # search-url
14
 # search-url
15
 search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}'
15
 search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}'
37
 
37
 
38
 # do search-request
38
 # do search-request
39
 def request(query, params):
39
 def request(query, params):
40
-    params['url'] = search_url.format(query=urlencode({'input': query}),
41
-                                      api_key=api_key)
40
+    params['url'] = search_url.format(query=urlencode({'input': query}), api_key=api_key)
42
     params['headers']['Referer'] = site_url.format(query=urlencode({'i': query}))
41
     params['headers']['Referer'] = site_url.format(query=urlencode({'i': query}))
43
 
42
 
44
     return params
43
     return params
56
                  u'\uf74e': 'i',        # imaginary number
55
                  u'\uf74e': 'i',        # imaginary number
57
                  u'\uf7d9': '='}        # equals sign
56
                  u'\uf7d9': '='}        # equals sign
58
 
57
 
59
-    for k, v in pua_chars.iteritems():
58
+    for k, v in pua_chars.items():
60
         text = text.replace(k, v)
59
         text = text.replace(k, v)
61
 
60
 
62
     return text
61
     return text
66
 def response(resp):
65
 def response(resp):
67
     results = []
66
     results = []
68
 
67
 
69
-    search_results = etree.XML(resp.content)
68
+    search_results = etree.XML(resp.text)
70
 
69
 
71
     # return empty array if there are no results
70
     # return empty array if there are no results
72
     if search_results.xpath(failure_xpath):
71
     if search_results.xpath(failure_xpath):
120
     # append infobox
119
     # append infobox
121
     results.append({'infobox': infobox_title,
120
     results.append({'infobox': infobox_title,
122
                     'attributes': result_chunks,
121
                     'attributes': result_chunks,
123
-                    'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
122
+                    'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}]})
124
 
123
 
125
     # append link to site
124
     # append link to site
126
-    results.append({'url': resp.request.headers['Referer'].decode('utf8'),
125
+    results.append({'url': resp.request.headers['Referer'],
127
                     'title': title,
126
                     'title': title,
128
                     'content': result_content})
127
                     'content': result_content})
129
 
128
 

+ 4
- 5
searx/engines/wolframalpha_noapi.py 查看文件

10
 
10
 
11
 from json import loads
11
 from json import loads
12
 from time import time
12
 from time import time
13
-from urllib import urlencode
14
-from lxml.etree import XML
15
 
13
 
16
 from searx.poolrequests import get as http_get
14
 from searx.poolrequests import get as http_get
15
+from searx.url_utils import urlencode
17
 
16
 
18
 # search-url
17
 # search-url
19
 url = 'https://www.wolframalpha.com/'
18
 url = 'https://www.wolframalpha.com/'
62
 # do search-request
61
 # do search-request
63
 def request(query, params):
62
 def request(query, params):
64
     # obtain token if last update was more than an hour
63
     # obtain token if last update was more than an hour
65
-    if time() - token['last_updated'] > 3600:
64
+    if time() - (token['last_updated'] or 0) > 3600:
66
         obtain_token()
65
         obtain_token()
67
     params['url'] = search_url.format(query=urlencode({'input': query}), token=token['value'])
66
     params['url'] = search_url.format(query=urlencode({'input': query}), token=token['value'])
68
     params['headers']['Referer'] = referer_url.format(query=urlencode({'i': query}))
67
     params['headers']['Referer'] = referer_url.format(query=urlencode({'i': query}))
112
 
111
 
113
     results.append({'infobox': infobox_title,
112
     results.append({'infobox': infobox_title,
114
                     'attributes': result_chunks,
113
                     'attributes': result_chunks,
115
-                    'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
114
+                    'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}]})
116
 
115
 
117
-    results.append({'url': resp.request.headers['Referer'].decode('utf8'),
116
+    results.append({'url': resp.request.headers['Referer'],
118
                     'title': 'Wolfram|Alpha (' + infobox_title + ')',
117
                     'title': 'Wolfram|Alpha (' + infobox_title + ')',
119
                     'content': result_content})
118
                     'content': result_content})
120
 
119
 

+ 2
- 4
searx/engines/www1x.py 查看文件

10
  @parse       url, title, thumbnail, img_src, content
10
  @parse       url, title, thumbnail, img_src, content
11
 """
11
 """
12
 
12
 
13
-from urllib import urlencode
14
-from urlparse import urljoin
15
 from lxml import html
13
 from lxml import html
16
-import string
17
 import re
14
 import re
15
+from searx.url_utils import urlencode, urljoin
18
 
16
 
19
 # engine dependent config
17
 # engine dependent config
20
 categories = ['images']
18
 categories = ['images']
55
         cur_element += result_part
53
         cur_element += result_part
56
 
54
 
57
         # fix xml-error
55
         # fix xml-error
58
-        cur_element = string.replace(cur_element, '"></a>', '"/></a>')
56
+        cur_element = cur_element.replace('"></a>', '"/></a>')
59
 
57
 
60
         dom = html.fromstring(cur_element)
58
         dom = html.fromstring(cur_element)
61
         link = dom.xpath('//a')[0]
59
         link = dom.xpath('//a')[0]

+ 1
- 2
searx/engines/www500px.py 查看文件

13
 """
13
 """
14
 
14
 
15
 from json import loads
15
 from json import loads
16
-from urllib import urlencode
17
-from urlparse import urljoin
16
+from searx.url_utils import urlencode, urljoin
18
 
17
 
19
 # engine dependent config
18
 # engine dependent config
20
 categories = ['images']
19
 categories = ['images']

+ 2
- 2
searx/engines/xpath.py 查看文件

1
 from lxml import html
1
 from lxml import html
2
-from urllib import urlencode, unquote
3
-from urlparse import urlparse, urljoin
4
 from lxml.etree import _ElementStringResult, _ElementUnicodeResult
2
 from lxml.etree import _ElementStringResult, _ElementUnicodeResult
5
 from searx.utils import html_to_text
3
 from searx.utils import html_to_text
4
+from searx.url_utils import unquote, urlencode, urljoin, urlparse
6
 
5
 
7
 search_url = None
6
 search_url = None
8
 url_xpath = None
7
 url_xpath = None
9
 content_xpath = None
8
 content_xpath = None
10
 title_xpath = None
9
 title_xpath = None
10
+paging = False
11
 suggestion_xpath = ''
11
 suggestion_xpath = ''
12
 results_xpath = ''
12
 results_xpath = ''
13
 
13
 

+ 1
- 1
searx/engines/yacy.py 查看文件

13
 # @todo        parse video, audio and file results
13
 # @todo        parse video, audio and file results
14
 
14
 
15
 from json import loads
15
 from json import loads
16
-from urllib import urlencode
17
 from dateutil import parser
16
 from dateutil import parser
17
+from searx.url_utils import urlencode
18
 
18
 
19
 from searx.utils import html_to_text
19
 from searx.utils import html_to_text
20
 
20
 

+ 1
- 2
searx/engines/yahoo.py 查看文件

11
  @parse       url, title, content, suggestion
11
  @parse       url, title, content, suggestion
12
 """
12
 """
13
 
13
 
14
-from urllib import urlencode
15
-from urlparse import unquote
16
 from lxml import html
14
 from lxml import html
17
 from searx.engines.xpath import extract_text, extract_url
15
 from searx.engines.xpath import extract_text, extract_url
16
+from searx.url_utils import unquote, urlencode
18
 
17
 
19
 # engine dependent config
18
 # engine dependent config
20
 categories = ['general']
19
 categories = ['general']

+ 3
- 3
searx/engines/yahoo_news.py 查看文件

9
 # @stable      no (HTML can change)
9
 # @stable      no (HTML can change)
10
 # @parse       url, title, content, publishedDate
10
 # @parse       url, title, content, publishedDate
11
 
11
 
12
-from urllib import urlencode
12
+import re
13
+from datetime import datetime, timedelta
13
 from lxml import html
14
 from lxml import html
14
 from searx.engines.xpath import extract_text, extract_url
15
 from searx.engines.xpath import extract_text, extract_url
15
 from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url
16
 from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url
16
-from datetime import datetime, timedelta
17
-import re
18
 from dateutil import parser
17
 from dateutil import parser
18
+from searx.url_utils import urlencode
19
 
19
 
20
 # engine dependent config
20
 # engine dependent config
21
 categories = ['news']
21
 categories = ['news']

+ 2
- 2
searx/engines/yandex.py 查看文件

9
  @parse       url, title, content
9
  @parse       url, title, content
10
 """
10
 """
11
 
11
 
12
-from urllib import urlencode
13
 from lxml import html
12
 from lxml import html
14
-from searx.search import logger
13
+from searx import logger
14
+from searx.url_utils import urlencode
15
 
15
 
16
 logger = logger.getChild('yandex engine')
16
 logger = logger.getChild('yandex engine')
17
 
17
 

+ 1
- 1
searx/engines/youtube_api.py 查看文件

9
 # @parse       url, title, content, publishedDate, thumbnail, embedded
9
 # @parse       url, title, content, publishedDate, thumbnail, embedded
10
 
10
 
11
 from json import loads
11
 from json import loads
12
-from urllib import urlencode
13
 from dateutil import parser
12
 from dateutil import parser
13
+from searx.url_utils import urlencode
14
 
14
 
15
 # engine dependent config
15
 # engine dependent config
16
 categories = ['videos', 'music']
16
 categories = ['videos', 'music']

+ 1
- 1
searx/engines/youtube_noapi.py 查看文件

8
 # @stable      no
8
 # @stable      no
9
 # @parse       url, title, content, publishedDate, thumbnail, embedded
9
 # @parse       url, title, content, publishedDate, thumbnail, embedded
10
 
10
 
11
-from urllib import quote_plus
12
 from lxml import html
11
 from lxml import html
13
 from searx.engines.xpath import extract_text
12
 from searx.engines.xpath import extract_text
14
 from searx.utils import list_get
13
 from searx.utils import list_get
14
+from searx.url_utils import quote_plus
15
 
15
 
16
 # engine dependent config
16
 # engine dependent config
17
 categories = ['videos', 'music']
17
 categories = ['videos', 'music']

+ 4
- 1
searx/plugins/__init__.py 查看文件

14
 
14
 
15
 (C) 2015 by Adam Tauber, <asciimoo@gmail.com>
15
 (C) 2015 by Adam Tauber, <asciimoo@gmail.com>
16
 '''
16
 '''
17
-from sys import exit
17
+from sys import exit, version_info
18
 from searx import logger
18
 from searx import logger
19
 
19
 
20
+if version_info[0] == 3:
21
+    unicode = str
22
+
20
 logger = logger.getChild('plugins')
23
 logger = logger.getChild('plugins')
21
 
24
 
22
 from searx.plugins import (doai_rewrite,
25
 from searx.plugins import (doai_rewrite,

+ 1
- 1
searx/plugins/doai_rewrite.py 查看文件

1
 from flask_babel import gettext
1
 from flask_babel import gettext
2
 import re
2
 import re
3
-from urlparse import urlparse, parse_qsl
3
+from searx.url_utils import urlparse, parse_qsl
4
 
4
 
5
 regex = re.compile(r'10\.\d{4,9}/[^\s]+')
5
 regex = re.compile(r'10\.\d{4,9}/[^\s]+')
6
 
6
 

+ 4
- 1
searx/plugins/https_rewrite.py 查看文件

16
 '''
16
 '''
17
 
17
 
18
 import re
18
 import re
19
-from urlparse import urlparse
19
+import sys
20
 from lxml import etree
20
 from lxml import etree
21
 from os import listdir, environ
21
 from os import listdir, environ
22
 from os.path import isfile, isdir, join
22
 from os.path import isfile, isdir, join
23
 from searx.plugins import logger
23
 from searx.plugins import logger
24
 from flask_babel import gettext
24
 from flask_babel import gettext
25
 from searx import searx_dir
25
 from searx import searx_dir
26
+from searx.url_utils import urlparse
26
 
27
 
28
+if sys.version_info[0] == 3:
29
+    unicode = str
27
 
30
 
28
 name = "HTTPS rewrite"
31
 name = "HTTPS rewrite"
29
 description = gettext('Rewrite HTTP links to HTTPS if possible')
32
 description = gettext('Rewrite HTTP links to HTTPS if possible')

+ 2
- 2
searx/plugins/self_info.py 查看文件

22
 
22
 
23
 
23
 
24
 # Self User Agent regex
24
 # Self User Agent regex
25
-p = re.compile('.*user[ -]agent.*', re.IGNORECASE)
25
+p = re.compile(b'.*user[ -]agent.*', re.IGNORECASE)
26
 
26
 
27
 
27
 
28
 # attach callback to the post search hook
28
 # attach callback to the post search hook
31
 def post_search(request, search):
31
 def post_search(request, search):
32
     if search.search_query.pageno > 1:
32
     if search.search_query.pageno > 1:
33
         return True
33
         return True
34
-    if search.search_query.query == 'ip':
34
+    if search.search_query.query == b'ip':
35
         x_forwarded_for = request.headers.getlist("X-Forwarded-For")
35
         x_forwarded_for = request.headers.getlist("X-Forwarded-For")
36
         if x_forwarded_for:
36
         if x_forwarded_for:
37
             ip = x_forwarded_for[0]
37
             ip = x_forwarded_for[0]

+ 1
- 1
searx/plugins/tracker_url_remover.py 查看文件

17
 
17
 
18
 from flask_babel import gettext
18
 from flask_babel import gettext
19
 import re
19
 import re
20
-from urlparse import urlunparse
20
+from searx.url_utils import urlunparse
21
 
21
 
22
 regexes = {re.compile(r'utm_[^&]+&?'),
22
 regexes = {re.compile(r'utm_[^&]+&?'),
23
            re.compile(r'(wkey|wemail)[^&]+&?'),
23
            re.compile(r'(wkey|wemail)[^&]+&?'),

+ 9
- 9
searx/preferences.py 查看文件

23
     def __init__(self, default_value, **kwargs):
23
     def __init__(self, default_value, **kwargs):
24
         super(Setting, self).__init__()
24
         super(Setting, self).__init__()
25
         self.value = default_value
25
         self.value = default_value
26
-        for key, value in kwargs.iteritems():
26
+        for key, value in kwargs.items():
27
             setattr(self, key, value)
27
             setattr(self, key, value)
28
 
28
 
29
         self._post_init()
29
         self._post_init()
38
         return self.value
38
         return self.value
39
 
39
 
40
     def save(self, name, resp):
40
     def save(self, name, resp):
41
-        resp.set_cookie(name, bytes(self.value), max_age=COOKIE_MAX_AGE)
41
+        resp.set_cookie(name, self.value, max_age=COOKIE_MAX_AGE)
42
 
42
 
43
 
43
 
44
 class StringSetting(Setting):
44
 class StringSetting(Setting):
133
 
133
 
134
     def save(self, name, resp):
134
     def save(self, name, resp):
135
         if hasattr(self, 'key'):
135
         if hasattr(self, 'key'):
136
-            resp.set_cookie(name, bytes(self.key), max_age=COOKIE_MAX_AGE)
136
+            resp.set_cookie(name, self.key, max_age=COOKIE_MAX_AGE)
137
 
137
 
138
 
138
 
139
 class SwitchableSetting(Setting):
139
 class SwitchableSetting(Setting):
194
     def _post_init(self):
194
     def _post_init(self):
195
         super(EnginesSetting, self)._post_init()
195
         super(EnginesSetting, self)._post_init()
196
         transformed_choices = []
196
         transformed_choices = []
197
-        for engine_name, engine in self.choices.iteritems():
197
+        for engine_name, engine in self.choices.items():
198
             for category in engine.categories:
198
             for category in engine.categories:
199
                 transformed_choice = dict()
199
                 transformed_choice = dict()
200
                 transformed_choice['default_on'] = not engine.disabled
200
                 transformed_choice['default_on'] = not engine.disabled
241
                                    'language': SearchLanguageSetting(settings['search']['language'],
241
                                    'language': SearchLanguageSetting(settings['search']['language'],
242
                                                                      choices=LANGUAGE_CODES),
242
                                                                      choices=LANGUAGE_CODES),
243
                                    'locale': EnumStringSetting(settings['ui']['default_locale'],
243
                                    'locale': EnumStringSetting(settings['ui']['default_locale'],
244
-                                                               choices=settings['locales'].keys() + ['']),
244
+                                                               choices=list(settings['locales'].keys()) + ['']),
245
                                    'autocomplete': EnumStringSetting(settings['search']['autocomplete'],
245
                                    'autocomplete': EnumStringSetting(settings['search']['autocomplete'],
246
-                                                                     choices=autocomplete.backends.keys() + ['']),
246
+                                                                     choices=list(autocomplete.backends.keys()) + ['']),
247
                                    'image_proxy': MapSetting(settings['server']['image_proxy'],
247
                                    'image_proxy': MapSetting(settings['server']['image_proxy'],
248
                                                              map={'': settings['server']['image_proxy'],
248
                                                              map={'': settings['server']['image_proxy'],
249
                                                                   '0': False,
249
                                                                   '0': False,
260
         self.unknown_params = {}
260
         self.unknown_params = {}
261
 
261
 
262
     def parse_cookies(self, input_data):
262
     def parse_cookies(self, input_data):
263
-        for user_setting_name, user_setting in input_data.iteritems():
263
+        for user_setting_name, user_setting in input_data.items():
264
             if user_setting_name in self.key_value_settings:
264
             if user_setting_name in self.key_value_settings:
265
                 self.key_value_settings[user_setting_name].parse(user_setting)
265
                 self.key_value_settings[user_setting_name].parse(user_setting)
266
             elif user_setting_name == 'disabled_engines':
266
             elif user_setting_name == 'disabled_engines':
274
         disabled_engines = []
274
         disabled_engines = []
275
         enabled_categories = []
275
         enabled_categories = []
276
         disabled_plugins = []
276
         disabled_plugins = []
277
-        for user_setting_name, user_setting in input_data.iteritems():
277
+        for user_setting_name, user_setting in input_data.items():
278
             if user_setting_name in self.key_value_settings:
278
             if user_setting_name in self.key_value_settings:
279
                 self.key_value_settings[user_setting_name].parse(user_setting)
279
                 self.key_value_settings[user_setting_name].parse(user_setting)
280
             elif user_setting_name.startswith('engine_'):
280
             elif user_setting_name.startswith('engine_'):
295
             return self.key_value_settings[user_setting_name].get_value()
295
             return self.key_value_settings[user_setting_name].get_value()
296
 
296
 
297
     def save(self, resp):
297
     def save(self, resp):
298
-        for user_setting_name, user_setting in self.key_value_settings.iteritems():
298
+        for user_setting_name, user_setting in self.key_value_settings.items():
299
             user_setting.save(user_setting_name, resp)
299
             user_setting.save(user_setting_name, resp)
300
         self.engines.save(resp)
300
         self.engines.save(resp)
301
         self.plugins.save(resp)
301
         self.plugins.save(resp)

+ 6
- 2
searx/query.py 查看文件

21
 from searx.engines import (
21
 from searx.engines import (
22
     categories, engines, engine_shortcuts
22
     categories, engines, engine_shortcuts
23
 )
23
 )
24
-import string
25
 import re
24
 import re
25
+import string
26
+import sys
27
+
28
+if sys.version_info[0] == 3:
29
+    unicode = str
26
 
30
 
27
 VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
31
 VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
28
 
32
 
146
     """container for all the search parameters (query, language, etc...)"""
150
     """container for all the search parameters (query, language, etc...)"""
147
 
151
 
148
     def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range):
152
     def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range):
149
-        self.query = query
153
+        self.query = query.encode('utf-8')
150
         self.engines = engines
154
         self.engines = engines
151
         self.categories = categories
155
         self.categories = categories
152
         self.lang = lang
156
         self.lang = lang

+ 5
- 1
searx/results.py 查看文件

1
 import re
1
 import re
2
+import sys
2
 from collections import defaultdict
3
 from collections import defaultdict
3
 from operator import itemgetter
4
 from operator import itemgetter
4
 from threading import RLock
5
 from threading import RLock
5
-from urlparse import urlparse, unquote
6
 from searx.engines import engines
6
 from searx.engines import engines
7
+from searx.url_utils import urlparse, unquote
8
+
9
+if sys.version_info[0] == 3:
10
+    basestring = str
7
 
11
 
8
 CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
12
 CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
9
 WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
13
 WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)

+ 10
- 2
searx/search.py 查看文件

16
 '''
16
 '''
17
 
17
 
18
 import gc
18
 import gc
19
+import sys
19
 import threading
20
 import threading
20
-from thread import start_new_thread
21
 from time import time
21
 from time import time
22
 from uuid import uuid4
22
 from uuid import uuid4
23
 import requests.exceptions
23
 import requests.exceptions
33
 from searx.plugins import plugins
33
 from searx.plugins import plugins
34
 from searx.exceptions import SearxParameterException
34
 from searx.exceptions import SearxParameterException
35
 
35
 
36
+try:
37
+    from thread import start_new_thread
38
+except:
39
+    from _thread import start_new_thread
40
+
41
+if sys.version_info[0] == 3:
42
+    unicode = str
43
+
36
 logger = logger.getChild('search')
44
 logger = logger.getChild('search')
37
 
45
 
38
 number_of_searches = 0
46
 number_of_searches = 0
387
             request_params['time_range'] = search_query.time_range
395
             request_params['time_range'] = search_query.time_range
388
 
396
 
389
             # append request to list
397
             # append request to list
390
-            requests.append((selected_engine['name'], search_query.query.encode('utf-8'), request_params))
398
+            requests.append((selected_engine['name'], search_query.query, request_params))
391
 
399
 
392
             # update timeout_limit
400
             # update timeout_limit
393
             timeout_limit = max(timeout_limit, engine.timeout)
401
             timeout_limit = max(timeout_limit, engine.timeout)

+ 1
- 1
searx/settings_robot.yml 查看文件

17
 
17
 
18
 ui:
18
 ui:
19
     themes_path : ""
19
     themes_path : ""
20
-    default_theme : legacy
20
+    default_theme : oscar
21
     default_locale : ""
21
     default_locale : ""
22
 
22
 
23
 outgoing:
23
 outgoing:

+ 1
- 1
searx/templates/courgette/404.html 查看文件

3
 <div class="center">
3
 <div class="center">
4
     <h1>{{ _('Page not found') }}</h1>
4
     <h1>{{ _('Page not found') }}</h1>
5
     {% autoescape false %}
5
     {% autoescape false %}
6
-    <p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p>
6
+    <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
7
     {% endautoescape %}
7
     {% endautoescape %}
8
 </div>
8
 </div>
9
 {% endblock %}
9
 {% endblock %}

+ 1
- 1
searx/templates/legacy/404.html 查看文件

3
 <div class="center">
3
 <div class="center">
4
     <h1>{{ _('Page not found') }}</h1>
4
     <h1>{{ _('Page not found') }}</h1>
5
     {% autoescape false %}
5
     {% autoescape false %}
6
-    <p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p>
6
+    <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
7
     {% endautoescape %}
7
     {% endautoescape %}
8
 </div>
8
 </div>
9
 {% endblock %}
9
 {% endblock %}

+ 1
- 1
searx/templates/oscar/404.html 查看文件

3
 <div class="text-center">
3
 <div class="text-center">
4
     <h1>{{ _('Page not found') }}</h1>
4
     <h1>{{ _('Page not found') }}</h1>
5
     {% autoescape false %}
5
     {% autoescape false %}
6
-    <p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p>
6
+    <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
7
     {% endautoescape %}
7
     {% endautoescape %}
8
 </div>
8
 </div>
9
 {% endblock %}
9
 {% endblock %}

+ 1
- 1
searx/templates/pix-art/404.html 查看文件

3
 <div class="center">
3
 <div class="center">
4
     <h1>{{ _('Page not found') }}</h1>
4
     <h1>{{ _('Page not found') }}</h1>
5
     {% autoescape false %}
5
     {% autoescape false %}
6
-    <p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p>
6
+    <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
7
     {% endautoescape %}
7
     {% endautoescape %}
8
 </div>
8
 </div>
9
 {% endblock %}
9
 {% endblock %}

+ 26
- 16
searx/testing.py 查看文件

1
 # -*- coding: utf-8 -*-
1
 # -*- coding: utf-8 -*-
2
 """Shared testing code."""
2
 """Shared testing code."""
3
 
3
 
4
-from plone.testing import Layer
5
-from unittest2 import TestCase
6
-from os.path import dirname, join, abspath
7
-
8
 
4
 
9
 import os
5
 import os
10
 import subprocess
6
 import subprocess
7
+import traceback
8
+
9
+
10
+from os.path import dirname, join, abspath
11
+
12
+from splinter import Browser
13
+from unittest2 import TestCase
11
 
14
 
12
 
15
 
13
 class SearxTestLayer:
16
 class SearxTestLayer:
32
     testTearDown = classmethod(testTearDown)
35
     testTearDown = classmethod(testTearDown)
33
 
36
 
34
 
37
 
35
-class SearxRobotLayer(Layer):
38
+class SearxRobotLayer():
36
     """Searx Robot Test Layer"""
39
     """Searx Robot Test Layer"""
37
 
40
 
38
     def setUp(self):
41
     def setUp(self):
62
         del os.environ['SEARX_SETTINGS_PATH']
65
         del os.environ['SEARX_SETTINGS_PATH']
63
 
66
 
64
 
67
 
65
-SEARXROBOTLAYER = SearxRobotLayer()
68
+# SEARXROBOTLAYER = SearxRobotLayer()
69
+def run_robot_tests(tests):
70
+    print('Running {0} tests'.format(len(tests)))
71
+    for test in tests:
72
+        with Browser() as browser:
73
+            test(browser)
66
 
74
 
67
 
75
 
68
 class SearxTestCase(TestCase):
76
 class SearxTestCase(TestCase):
72
 
80
 
73
 
81
 
74
 if __name__ == '__main__':
82
 if __name__ == '__main__':
75
-    from tests.test_robot import test_suite
76
     import sys
83
     import sys
77
-    from zope.testrunner.runner import Runner
84
+    # test cases
85
+    from tests import robot
78
 
86
 
79
     base_dir = abspath(join(dirname(__file__), '../tests'))
87
     base_dir = abspath(join(dirname(__file__), '../tests'))
80
     if sys.argv[1] == 'robot':
88
     if sys.argv[1] == 'robot':
81
-        r = Runner(['--color',
82
-                    '--auto-progress',
83
-                    '--stop-on-error',
84
-                    '--path',
85
-                    base_dir],
86
-                   found_suites=[test_suite()])
87
-        r.run()
88
-        sys.exit(int(r.failed))
89
+        test_layer = SearxRobotLayer()
90
+        errors = False
91
+        try:
92
+            test_layer.setUp()
93
+            run_robot_tests([getattr(robot, x) for x in dir(robot) if x.startswith('test_')])
94
+        except Exception:
95
+            errors = True
96
+            print('Error occured: {0}'.format(traceback.format_exc()))
97
+        test_layer.tearDown()
98
+        sys.exit(1 if errors else 0)

+ 28
- 0
searx/url_utils.py 查看文件

1
+from sys import version_info
2
+
3
+if version_info[0] == 2:
4
+    from urllib import quote, quote_plus, unquote, urlencode
5
+    from urlparse import parse_qsl, urljoin, urlparse, urlunparse, ParseResult
6
+else:
7
+    from urllib.parse import (
8
+        parse_qsl,
9
+        quote,
10
+        quote_plus,
11
+        unquote,
12
+        urlencode,
13
+        urljoin,
14
+        urlparse,
15
+        urlunparse,
16
+        ParseResult
17
+    )
18
+
19
+
20
+__export__ = (parse_qsl,
21
+              quote,
22
+              quote_plus,
23
+              unquote,
24
+              urlencode,
25
+              urljoin,
26
+              urlparse,
27
+              urlunparse,
28
+              ParseResult)

+ 18
- 8
searx/utils.py 查看文件

1
-import cStringIO
2
 import csv
1
 import csv
3
 import os
2
 import os
4
 import re
3
 import re
5
 
4
 
6
 from babel.dates import format_date
5
 from babel.dates import format_date
7
 from codecs import getincrementalencoder
6
 from codecs import getincrementalencoder
8
-from HTMLParser import HTMLParser
9
 from imp import load_source
7
 from imp import load_source
10
 from os.path import splitext, join
8
 from os.path import splitext, join
11
 from random import choice
9
 from random import choice
16
 from searx import settings
14
 from searx import settings
17
 from searx import logger
15
 from searx import logger
18
 
16
 
17
+try:
18
+    from cStringIO import StringIO
19
+except:
20
+    from io import StringIO
21
+
22
+try:
23
+    from HTMLParser import HTMLParser
24
+except:
25
+    from html.parser import HTMLParser
26
+
27
+if sys.version_info[0] == 3:
28
+    unichr = chr
29
+    unicode = str
19
 
30
 
20
 logger = logger.getChild('utils')
31
 logger = logger.getChild('utils')
21
 
32
 
140
 
151
 
141
     def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
152
     def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
142
         # Redirect output to a queue
153
         # Redirect output to a queue
143
-        self.queue = cStringIO.StringIO()
154
+        self.queue = StringIO()
144
         self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
155
         self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
145
         self.stream = f
156
         self.stream = f
146
         self.encoder = getincrementalencoder(encoding)()
157
         self.encoder = getincrementalencoder(encoding)()
152
                 unicode_row.append(col.encode('utf-8').strip())
163
                 unicode_row.append(col.encode('utf-8').strip())
153
             else:
164
             else:
154
                 unicode_row.append(col)
165
                 unicode_row.append(col)
155
-        self.writer.writerow(unicode_row)
166
+        self.writer.writerow([x.decode('utf-8') if hasattr(x, 'decode') else x for x in unicode_row])
156
         # Fetch UTF-8 output from the queue ...
167
         # Fetch UTF-8 output from the queue ...
157
-        data = self.queue.getvalue()
158
-        data = data.decode("utf-8")
168
+        data = self.queue.getvalue().strip('\x00')
159
         # ... and reencode it into the target encoding
169
         # ... and reencode it into the target encoding
160
         data = self.encoder.encode(data)
170
         data = self.encoder.encode(data)
161
         # write to the target stream
171
         # write to the target stream
162
-        self.stream.write(data)
172
+        self.stream.write(data.decode('utf-8'))
163
         # empty queue
173
         # empty queue
164
         self.queue.truncate(0)
174
         self.queue.truncate(0)
165
 
175
 
231
 
241
 
232
 def prettify_url(url, max_length=74):
242
 def prettify_url(url, max_length=74):
233
     if len(url) > max_length:
243
     if len(url) > max_length:
234
-        chunk_len = max_length / 2 + 1
244
+        chunk_len = int(max_length / 2 + 1)
235
         return u'{0}[...]{1}'.format(url[:chunk_len], url[-chunk_len:])
245
         return u'{0}[...]{1}'.format(url[:chunk_len], url[-chunk_len:])
236
     else:
246
     else:
237
         return url
247
         return url

+ 23
- 13
searx/webapp.py 查看文件

22
     from os.path import realpath, dirname
22
     from os.path import realpath, dirname
23
     path.append(realpath(dirname(realpath(__file__)) + '/../'))
23
     path.append(realpath(dirname(realpath(__file__)) + '/../'))
24
 
24
 
25
-import cStringIO
26
 import hashlib
25
 import hashlib
27
 import hmac
26
 import hmac
28
 import json
27
 import json
29
 import os
28
 import os
29
+import sys
30
+
30
 import requests
31
 import requests
31
 
32
 
32
 from searx import logger
33
 from searx import logger
42
     exit(1)
43
     exit(1)
43
 from cgi import escape
44
 from cgi import escape
44
 from datetime import datetime, timedelta
45
 from datetime import datetime, timedelta
45
-from urllib import urlencode
46
-from urlparse import urlparse, urljoin
47
 from werkzeug.contrib.fixers import ProxyFix
46
 from werkzeug.contrib.fixers import ProxyFix
48
 from flask import (
47
 from flask import (
49
     Flask, request, render_template, url_for, Response, make_response,
48
     Flask, request, render_template, url_for, Response, make_response,
52
 from flask_babel import Babel, gettext, format_date, format_decimal
51
 from flask_babel import Babel, gettext, format_date, format_decimal
53
 from flask.json import jsonify
52
 from flask.json import jsonify
54
 from searx import settings, searx_dir, searx_debug
53
 from searx import settings, searx_dir, searx_debug
55
-from searx.exceptions import SearxException, SearxParameterException
54
+from searx.exceptions import SearxParameterException
56
 from searx.engines import (
55
 from searx.engines import (
57
     categories, engines, engine_shortcuts, get_engines_stats, initialize_engines
56
     categories, engines, engine_shortcuts, get_engines_stats, initialize_engines
58
 )
57
 )
69
 from searx.plugins import plugins
68
 from searx.plugins import plugins
70
 from searx.preferences import Preferences, ValidationException
69
 from searx.preferences import Preferences, ValidationException
71
 from searx.answerers import answerers
70
 from searx.answerers import answerers
71
+from searx.url_utils import urlencode, urlparse, urljoin
72
 
72
 
73
 # check if the pyopenssl package is installed.
73
 # check if the pyopenssl package is installed.
74
 # It is needed for SSL connection without trouble, see #298
74
 # It is needed for SSL connection without trouble, see #298
78
     logger.critical("The pyopenssl package has to be installed.\n"
78
     logger.critical("The pyopenssl package has to be installed.\n"
79
                     "Some HTTPS connections will fail")
79
                     "Some HTTPS connections will fail")
80
 
80
 
81
+try:
82
+    from cStringIO import StringIO
83
+except:
84
+    from io import StringIO
85
+
86
+
87
+if sys.version_info[0] == 3:
88
+    unicode = str
89
+
81
 # serve pages with HTTP/1.1
90
 # serve pages with HTTP/1.1
82
 from werkzeug.serving import WSGIRequestHandler
91
 from werkzeug.serving import WSGIRequestHandler
83
 WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0'))
92
 WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0'))
357
 
366
 
358
     kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab')
367
     kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab')
359
 
368
 
369
+    kwargs['unicode'] = unicode
370
+
360
     kwargs['scripts'] = set()
371
     kwargs['scripts'] = set()
361
     for plugin in request.user_plugins:
372
     for plugin in request.user_plugins:
362
         for script in plugin.js_dependencies:
373
         for script in plugin.js_dependencies:
375
 def pre_request():
386
 def pre_request():
376
     request.errors = []
387
     request.errors = []
377
 
388
 
378
-    preferences = Preferences(themes, categories.keys(), engines, plugins)
389
+    preferences = Preferences(themes, list(categories.keys()), engines, plugins)
379
     request.preferences = preferences
390
     request.preferences = preferences
380
     try:
391
     try:
381
         preferences.parse_cookies(request.cookies)
392
         preferences.parse_cookies(request.cookies)
479
     for result in results:
490
     for result in results:
480
         if output_format == 'html':
491
         if output_format == 'html':
481
             if 'content' in result and result['content']:
492
             if 'content' in result and result['content']:
482
-                result['content'] = highlight_content(escape(result['content'][:1024]),
483
-                                                      search_query.query.encode('utf-8'))
484
-            result['title'] = highlight_content(escape(result['title'] or u''),
485
-                                                search_query.query.encode('utf-8'))
493
+                result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query)
494
+            result['title'] = highlight_content(escape(result['title'] or u''), search_query.query)
486
         else:
495
         else:
487
             if result.get('content'):
496
             if result.get('content'):
488
                 result['content'] = html_to_text(result['content']).strip()
497
                 result['content'] = html_to_text(result['content']).strip()
510
                     result['publishedDate'] = format_date(result['publishedDate'])
519
                     result['publishedDate'] = format_date(result['publishedDate'])
511
 
520
 
512
     if output_format == 'json':
521
     if output_format == 'json':
513
-        return Response(json.dumps({'query': search_query.query,
522
+        return Response(json.dumps({'query': search_query.query.decode('utf-8'),
514
                                     'number_of_results': number_of_results,
523
                                     'number_of_results': number_of_results,
515
                                     'results': results,
524
                                     'results': results,
516
                                     'answers': list(result_container.answers),
525
                                     'answers': list(result_container.answers),
519
                                     'suggestions': list(result_container.suggestions)}),
528
                                     'suggestions': list(result_container.suggestions)}),
520
                         mimetype='application/json')
529
                         mimetype='application/json')
521
     elif output_format == 'csv':
530
     elif output_format == 'csv':
522
-        csv = UnicodeWriter(cStringIO.StringIO())
531
+        csv = UnicodeWriter(StringIO())
523
         keys = ('title', 'url', 'content', 'host', 'engine', 'score')
532
         keys = ('title', 'url', 'content', 'host', 'engine', 'score')
524
         csv.writerow(keys)
533
         csv.writerow(keys)
525
         for row in results:
534
         for row in results:
527
             csv.writerow([row.get(key, '') for key in keys])
536
             csv.writerow([row.get(key, '') for key in keys])
528
         csv.stream.seek(0)
537
         csv.stream.seek(0)
529
         response = Response(csv.stream.read(), mimetype='application/csv')
538
         response = Response(csv.stream.read(), mimetype='application/csv')
530
-        cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.encode('utf-8'))
539
+        cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query)
531
         response.headers.add('Content-Disposition', cont_disp)
540
         response.headers.add('Content-Disposition', cont_disp)
532
         return response
541
         return response
533
     elif output_format == 'rss':
542
     elif output_format == 'rss':
578
     disabled_engines = request.preferences.engines.get_disabled()
587
     disabled_engines = request.preferences.engines.get_disabled()
579
 
588
 
580
     # parse query
589
     # parse query
581
-    raw_text_query = RawTextQuery(request.form.get('q', '').encode('utf-8'), disabled_engines)
590
+    raw_text_query = RawTextQuery(request.form.get('q', u'').encode('utf-8'), disabled_engines)
582
     raw_text_query.parse_query()
591
     raw_text_query.parse_query()
583
 
592
 
584
     # check if search query is set
593
     # check if search query is set
820
 
829
 
821
 
830
 
822
 def run():
831
 def run():
832
+    logger.debug('starting webserver on %s:%s', settings['server']['port'], settings['server']['bind_address'])
823
     app.run(
833
     app.run(
824
         debug=searx_debug,
834
         debug=searx_debug,
825
         use_debugger=searx_debug,
835
         use_debugger=searx_debug,

+ 75
- 0
tests/robot/__init__.py 查看文件

1
+# -*- coding: utf-8 -*-
2
+
3
+from time import sleep
4
+
5
+url = "http://localhost:11111/"
6
+
7
+
8
+def test_index(browser):
9
+    # Visit URL
10
+    browser.visit(url)
11
+    assert browser.is_text_present('about')
12
+
13
+
14
+def test_404(browser):
15
+    # Visit URL
16
+    browser.visit(url + 'missing_link')
17
+    assert browser.is_text_present('Page not found')
18
+
19
+
20
+def test_about(browser):
21
+    browser.visit(url)
22
+    browser.click_link_by_text('about')
23
+    assert browser.is_text_present('Why use searx?')
24
+
25
+
26
+def test_preferences(browser):
27
+    browser.visit(url)
28
+    browser.click_link_by_text('preferences')
29
+    assert browser.is_text_present('Preferences')
30
+    assert browser.is_text_present('Cookies')
31
+
32
+    assert browser.is_element_present_by_xpath('//label[@for="checkbox_dummy"]')
33
+
34
+
35
+def test_preferences_engine_select(browser):
36
+    browser.visit(url)
37
+    browser.click_link_by_text('preferences')
38
+
39
+    assert browser.is_element_present_by_xpath('//a[@href="#tab_engine"]')
40
+    browser.find_by_xpath('//a[@href="#tab_engine"]').first.click()
41
+
42
+    assert not browser.find_by_xpath('//input[@id="engine_general_dummy__general"]').first.checked
43
+    browser.find_by_xpath('//label[@for="engine_general_dummy__general"]').first.check()
44
+    browser.find_by_xpath('//input[@value="save"]').first.click()
45
+
46
+    # waiting for the redirect - without this the test is flaky..
47
+    sleep(1)
48
+
49
+    browser.visit(url)
50
+    browser.click_link_by_text('preferences')
51
+    browser.find_by_xpath('//a[@href="#tab_engine"]').first.click()
52
+
53
+    assert browser.find_by_xpath('//input[@id="engine_general_dummy__general"]').first.checked
54
+
55
+
56
+def test_preferences_locale(browser):
57
+    browser.visit(url)
58
+    browser.click_link_by_text('preferences')
59
+
60
+    browser.select('locale', 'hu')
61
+    browser.find_by_xpath('//input[@value="save"]').first.click()
62
+
63
+    # waiting for the redirect - without this the test is flaky..
64
+    sleep(1)
65
+
66
+    browser.visit(url)
67
+    browser.click_link_by_text('beállítások')
68
+    browser.is_text_present('Beállítások')
69
+
70
+
71
+def test_search(browser):
72
+    browser.visit(url)
73
+    browser.fill('q', 'test search query')
74
+    browser.find_by_xpath('//button[@type="submit"]').first.click()
75
+    assert browser.is_text_present('didn\'t find any results')

+ 0
- 153
tests/robot/test_basic.robot 查看文件

1
-*** Settings ***
2
-Library         Selenium2Library  timeout=10  implicit_wait=0.5
3
-Test Setup      Open Browser  http://localhost:11111/
4
-Test Teardown   Close All Browsers
5
-
6
-
7
-*** Keywords ***
8
-Submit Preferences
9
-    Set Selenium Speed  2 seconds
10
-    Submit Form  id=search_form
11
-    Location Should Be  http://localhost:11111/
12
-    Set Selenium Speed  0 seconds
13
-
14
-
15
-*** Test Cases ***
16
-Front page
17
-    Page Should Contain  about
18
-    Page Should Contain  preferences
19
-
20
-404 page
21
-    Go To  http://localhost:11111/no-such-page
22
-    Page Should Contain  Page not found
23
-    Page Should Contain  Go to search page
24
-
25
-About page
26
-    Click Element  link=about
27
-    Page Should Contain  Why use searx?
28
-    Page Should Contain Element  link=search engines
29
-
30
-Preferences page
31
-    Click Element  link=preferences
32
-    Page Should Contain  Preferences
33
-    Page Should Contain  Default categories
34
-    Page Should Contain  Currently used search engines
35
-    Page Should Contain  dummy dummy
36
-    Page Should Contain  general dummy
37
-
38
-Switch category
39
-    Go To  http://localhost:11111/preferences
40
-    Page Should Contain Checkbox  category_general
41
-    Page Should Contain Checkbox  category_dummy
42
-    Click Element  xpath=//*[.="general"]
43
-    Click Element  xpath=//*[.="dummy"]
44
-    Submit Preferences
45
-    Checkbox Should Not Be Selected  category_general
46
-    Checkbox Should Be Selected  category_dummy
47
-
48
-Change language
49
-    Page Should Contain  about
50
-    Page Should Contain  preferences
51
-    Go To  http://localhost:11111/preferences
52
-    Select From List  locale  hu
53
-    Submit Preferences
54
-    Page Should Contain  rólunk
55
-    Page Should Contain  beállítások
56
-
57
-Change method
58
-    Page Should Contain  about
59
-    Page Should Contain  preferences
60
-    Go To  http://localhost:11111/preferences
61
-    Select From List  method  GET
62
-    Submit Preferences
63
-    Go To  http://localhost:11111/preferences
64
-    List Selection Should Be  method  GET
65
-    Select From List  method  POST
66
-    Submit Preferences
67
-    Go To  http://localhost:11111/preferences
68
-    List Selection Should Be  method  POST
69
-
70
-Change theme
71
-    Page Should Contain  about
72
-    Page Should Contain  preferences
73
-    Go To  http://localhost:11111/preferences
74
-    List Selection Should Be  theme  legacy
75
-    Select From List  theme  oscar
76
-    Submit Preferences
77
-    Go To  http://localhost:11111/preferences
78
-    List Selection Should Be  theme  oscar
79
-
80
-Change safesearch
81
-    Page Should Contain  about
82
-    Page Should Contain  preferences
83
-    Go To  http://localhost:11111/preferences
84
-    List Selection Should Be  safesearch  None
85
-    Select From List  safesearch  Strict
86
-    Submit Preferences
87
-    Go To  http://localhost:11111/preferences
88
-    List Selection Should Be  safesearch  Strict
89
-
90
-Change image proxy
91
-    Page Should Contain  about
92
-    Page Should Contain  preferences
93
-    Go To  http://localhost:11111/preferences
94
-    List Selection Should Be  image_proxy  Disabled
95
-    Select From List  image_proxy  Enabled
96
-    Submit Preferences
97
-    Go To  http://localhost:11111/preferences
98
-    List Selection Should Be  image_proxy  Enabled
99
-
100
-Change search language
101
-    Page Should Contain  about
102
-    Page Should Contain  preferences
103
-    Go To  http://localhost:11111/preferences
104
-    List Selection Should Be  language  Default language
105
-    Select From List  language  Türkçe - tr-TR
106
-    Submit Preferences
107
-    Go To  http://localhost:11111/preferences
108
-    List Selection Should Be  language  Türkçe - tr-TR
109
-
110
-Change autocomplete
111
-    Page Should Contain  about
112
-    Page Should Contain  preferences
113
-    Go To  http://localhost:11111/preferences
114
-    List Selection Should Be  autocomplete  -
115
-    Select From List  autocomplete  google
116
-    Submit Preferences
117
-    Go To  http://localhost:11111/preferences
118
-    List Selection Should Be  autocomplete  google
119
-
120
-Change allowed/disabled engines
121
-    Page Should Contain  about
122
-    Page Should Contain  preferences
123
-    Go To  http://localhost:11111/preferences
124
-    Page Should Contain  Engine name
125
-    Element Should Contain  xpath=//label[@class="deny"][@for='engine_dummy_dummy_dummy']  Block
126
-    Element Should Contain  xpath=//label[@class="deny"][@for='engine_general_general_dummy']  Block
127
-    Click Element  xpath=//label[@class="deny"][@for='engine_general_general_dummy']
128
-    Submit Preferences
129
-    Page Should Contain  about
130
-    Page Should Contain  preferences
131
-    Go To  http://localhost:11111/preferences
132
-    Page Should Contain  Engine name
133
-    Element Should Contain  xpath=//label[@class="deny"][@for='engine_dummy_dummy_dummy']  Block
134
-    Element Should Contain  xpath=//label[@class="deny"][@for='engine_general_general_dummy']  \
135
-
136
-Block a plugin
137
-    Page Should Contain  about
138
-    Page Should Contain  preferences
139
-    Go To  http://localhost:11111/preferences
140
-    List Selection Should Be  theme  legacy
141
-    Select From List  theme  oscar
142
-    Submit Preferences
143
-    Go To  http://localhost:11111/preferences
144
-    List Selection Should Be  theme  oscar
145
-    Page Should Contain  Plugins
146
-    Click Link  Plugins
147
-    Checkbox Should Not Be Selected  id=plugin_HTTPS_rewrite
148
-    Click Element  xpath=//label[@for='plugin_HTTPS_rewrite']
149
-    Submit Preferences
150
-    Go To  http://localhost:11111/preferences
151
-    Page Should Contain  Plugins
152
-    Click Link  Plugins
153
-    Checkbox Should Be Selected  id=plugin_HTTPS_rewrite

+ 2
- 2
tests/unit/engines/test_archlinux.py 查看文件

25
         self.assertTrue(query in params['url'])
25
         self.assertTrue(query in params['url'])
26
         self.assertTrue('wiki.archlinux.org' in params['url'])
26
         self.assertTrue('wiki.archlinux.org' in params['url'])
27
 
27
 
28
-        for lang, domain in domains.iteritems():
28
+        for lang, domain in domains.items():
29
             dic['language'] = lang
29
             dic['language'] = lang
30
             params = archlinux.request(query, dic)
30
             params = archlinux.request(query, dic)
31
             self.assertTrue(domain in params['url'])
31
             self.assertTrue(domain in params['url'])
102
         for exp in expected:
102
         for exp in expected:
103
             res = results[i]
103
             res = results[i]
104
             i += 1
104
             i += 1
105
-            for key, value in exp.iteritems():
105
+            for key, value in exp.items():
106
                 self.assertEqual(res[key], value)
106
                 self.assertEqual(res[key], value)

+ 3
- 3
tests/unit/engines/test_bing.py 查看文件

7
 class TestBingEngine(SearxTestCase):
7
 class TestBingEngine(SearxTestCase):
8
 
8
 
9
     def test_request(self):
9
     def test_request(self):
10
-        query = 'test_query'
10
+        query = u'test_query'
11
         dicto = defaultdict(dict)
11
         dicto = defaultdict(dict)
12
         dicto['pageno'] = 0
12
         dicto['pageno'] = 0
13
         dicto['language'] = 'fr_FR'
13
         dicto['language'] = 'fr_FR'
14
-        params = bing.request(query, dicto)
14
+        params = bing.request(query.encode('utf-8'), dicto)
15
         self.assertTrue('url' in params)
15
         self.assertTrue('url' in params)
16
         self.assertTrue(query in params['url'])
16
         self.assertTrue(query in params['url'])
17
         self.assertTrue('language%3AFR' in params['url'])
17
         self.assertTrue('language%3AFR' in params['url'])
18
         self.assertTrue('bing.com' in params['url'])
18
         self.assertTrue('bing.com' in params['url'])
19
 
19
 
20
         dicto['language'] = 'all'
20
         dicto['language'] = 'all'
21
-        params = bing.request(query, dicto)
21
+        params = bing.request(query.encode('utf-8'), dicto)
22
         self.assertTrue('language' in params['url'])
22
         self.assertTrue('language' in params['url'])
23
 
23
 
24
     def test_response(self):
24
     def test_response(self):

+ 6
- 6
tests/unit/engines/test_bing_news.py 查看文件

36
         self.assertRaises(AttributeError, bing_news.response, '')
36
         self.assertRaises(AttributeError, bing_news.response, '')
37
         self.assertRaises(AttributeError, bing_news.response, '[]')
37
         self.assertRaises(AttributeError, bing_news.response, '[]')
38
 
38
 
39
-        response = mock.Mock(content='<html></html>')
39
+        response = mock.Mock(text='<html></html>')
40
         self.assertEqual(bing_news.response(response), [])
40
         self.assertEqual(bing_news.response(response), [])
41
 
41
 
42
-        response = mock.Mock(content='<html></html>')
42
+        response = mock.Mock(text='<html></html>')
43
         self.assertEqual(bing_news.response(response), [])
43
         self.assertEqual(bing_news.response(response), [])
44
 
44
 
45
         html = """<?xml version="1.0" encoding="utf-8" ?>
45
         html = """<?xml version="1.0" encoding="utf-8" ?>
74
         </item>
74
         </item>
75
     </channel>
75
     </channel>
76
 </rss>"""  # noqa
76
 </rss>"""  # noqa
77
-        response = mock.Mock(content=html)
77
+        response = mock.Mock(text=html.encode('utf-8'))
78
         results = bing_news.response(response)
78
         results = bing_news.response(response)
79
         self.assertEqual(type(results), list)
79
         self.assertEqual(type(results), list)
80
         self.assertEqual(len(results), 2)
80
         self.assertEqual(len(results), 2)
113
         </item>
113
         </item>
114
     </channel>
114
     </channel>
115
 </rss>"""  # noqa
115
 </rss>"""  # noqa
116
-        response = mock.Mock(content=html)
116
+        response = mock.Mock(text=html.encode('utf-8'))
117
         results = bing_news.response(response)
117
         results = bing_news.response(response)
118
         self.assertEqual(type(results), list)
118
         self.assertEqual(type(results), list)
119
         self.assertEqual(len(results), 1)
119
         self.assertEqual(len(results), 1)
136
     </channel>
136
     </channel>
137
 </rss>"""  # noqa
137
 </rss>"""  # noqa
138
 
138
 
139
-        response = mock.Mock(content=html)
139
+        response = mock.Mock(text=html.encode('utf-8'))
140
         results = bing_news.response(response)
140
         results = bing_news.response(response)
141
         self.assertEqual(type(results), list)
141
         self.assertEqual(type(results), list)
142
         self.assertEqual(len(results), 0)
142
         self.assertEqual(len(results), 0)
143
 
143
 
144
         html = """<?xml version="1.0" encoding="utf-8" ?>gabarge"""
144
         html = """<?xml version="1.0" encoding="utf-8" ?>gabarge"""
145
-        response = mock.Mock(content=html)
145
+        response = mock.Mock(text=html.encode('utf-8'))
146
         self.assertRaises(lxml.etree.XMLSyntaxError, bing_news.response, response)
146
         self.assertRaises(lxml.etree.XMLSyntaxError, bing_news.response, response)

+ 0
- 0
tests/unit/engines/test_btdigg.py 查看文件


部分文件因文件數量過多而無法顯示