9 years ago · b3ab221b98
--- a/searx/engines/currency_convert.py
+++ b/searx/engines/currency_convert.py
 
															 url = 'https://download.finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X'
														
 
															 weight = 100
														
 
															-parser_re = re.compile(u'.*?(\d+(?:\.\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)  # noqa
														
 
															+parser_re = re.compile(u'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)  # noqa
														
 
															 db = 1
														
--- a/searx/engines/deviantart.py
+++ b/searx/engines/deviantart.py
 
															     dom = html.fromstring(resp.text)
														
 
															-    regex = re.compile('\/200H\/')
														
 
															+    regex = re.compile(r'\/200H\/')
														
 
															     # parse results
														
 
															     for result in dom.xpath('//div[contains(@class, "tt-a tt-fh")]'):
														
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
 
															     results = []
														
 
															     # try to parse the geoloc
														
 
															-    m = re.search('@([0-9\.]+),([0-9\.]+),([0-9]+)', parsed_url.path)
														
 
															+    m = re.search(r'@([0-9\.]+),([0-9\.]+),([0-9]+)', parsed_url.path)
														
 
															     if m is None:
														
 
															-        m = re.search('ll\=([0-9\.]+),([0-9\.]+)\&z\=([0-9]+)', parsed_url.query)
														
 
															+        m = re.search(r'll\=([0-9\.]+),([0-9\.]+)\&z\=([0-9]+)', parsed_url.query)
														
 
															     if m is not None:
														
 
															         # geoloc found (ignored)
														
--- a/searx/engines/startpage.py
+++ b/searx/engines/startpage.py
 
															         url = link.attrib.get('href')
														
 
															         # block google-ad url's
														
 
															-        if re.match("^http(s|)://(www\.)?google\.[a-z]+/aclk.*$", url):
														
 
															+        if re.match(r"^http(s|)://(www\.)?google\.[a-z]+/aclk.*$", url):
														
 
															             continue
														
 
															         # block startpage search url's
														
 
															-        if re.match("^http(s|)://(www\.)?startpage\.com/do/search\?.*$", url):
														
 
															+        if re.match(r"^http(s|)://(www\.)?startpage\.com/do/search\?.*$", url):
														
 
															             continue
														
 
															         # block ixquick search url's
														
 
															-        if re.match("^http(s|)://(www\.)?ixquick\.com/do/search\?.*$", url):
														
 
															+        if re.match(r"^http(s|)://(www\.)?ixquick\.com/do/search\?.*$", url):
														
 
															             continue
														
 
															         title = escape(extract_text(link))
														
 
															         published_date = None
														
 
															         # check if search result starts with something like: "2 Sep 2014 ... "
														
 
															-        if re.match("^([1-9]|[1-2][0-9]|3[0-1]) [A-Z][a-z]{2} [0-9]{4} \.\.\. ", content):
														
 
															+        if re.match(r"^([1-9]|[1-2][0-9]|3[0-1]) [A-Z][a-z]{2} [0-9]{4} \.\.\. ", content):
														
 
															             date_pos = content.find('...') + 4
														
 
															             date_string = content[0:date_pos - 5]
														
 
															             published_date = parser.parse(date_string, dayfirst=True)
														
 
															             content = content[date_pos:]
														
 
															         # check if search result starts with something like: "5 days ago ... "
														
 
															-        elif re.match("^[0-9]+ days? ago \.\.\. ", content):
														
 
															+        elif re.match(r"^[0-9]+ days? ago \.\.\. ", content):
														
 
															             date_pos = content.find('...') + 4
														
 
															             date_string = content[0:date_pos - 5]
														
--- a/searx/engines/swisscows.py
+++ b/searx/engines/swisscows.py
 
															 search_string = '?{query}&page={page}'
														
 
															 # regex
														
 
															-regex_json = re.compile('initialData: {"Request":(.|\n)*},\s*environment')
														
 
															-regex_json_remove_start = re.compile('^initialData:\s*')
														
 
															-regex_json_remove_end = re.compile(',\s*environment$')
														
 
															-regex_img_url_remove_start = re.compile('^https?://i\.swisscows\.ch/\?link=')
														
 
															+regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
														
 
															+regex_json_remove_start = re.compile(r'^initialData:\s*')
														
 
															+regex_json_remove_end = re.compile(r',\s*environment$')
														
 
															+regex_img_url_remove_start = re.compile(r'^https?://i\.swisscows\.ch/\?link=')
														
 
															 # do search-request
														
--- a/searx/engines/tokyotoshokan.py
+++ b/searx/engines/tokyotoshokan.py
 
															         return []
														
 
															     # regular expression for parsing torrent size strings
														
 
															-    size_re = re.compile('Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE)
														
 
															+    size_re = re.compile(r'Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE)
														
 
															     # processing the results, two rows at a time
														
 
															     for i in xrange(0, len(rows), 2):
														
--- a/searx/engines/www500px.py
+++ b/searx/engines/www500px.py
 
															     results = []
														
 
															     dom = html.fromstring(resp.text)
														
 
															-    regex = re.compile('3\.jpg.*$')
														
 
															+    regex = re.compile(r'3\.jpg.*$')
														
 
															     # parse results
														
 
															     for result in dom.xpath('//div[@class="photo"]'):
														
--- a/searx/engines/yahoo_news.py
+++ b/searx/engines/yahoo_news.py
 
															 def sanitize_url(url):
														
 
															     if ".yahoo.com/" in url:
														
 
															-        return re.sub(u"\;\_ylt\=.+$", "", url)
														
 
															+        return re.sub(u"\\;\\_ylt\\=.+$", "", url)
														
 
															     else:
														
 
															         return url
														
--- a/searx/plugins/https_rewrite.py
+++ b/searx/plugins/https_rewrite.py
 
															             # convert host-rule to valid regex
														
 
															             host = ruleset.attrib.get('host')\
														
 
															-                .replace('.', '\.').replace('*', '.*')
														
 
															+                .replace('.', r'\.').replace('*', '.*')
														
 
															             # append to host list
														
 
															             hosts.append(host)
														
--- a/searx/results.py
+++ b/searx/results.py
 
															 from urlparse import urlparse, unquote
														
 
															 from searx.engines import engines
														
 
															-CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile('[,;:!?\./\\\\ ()-_]', re.M | re.U)
														
 
															+CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
														
 
															 WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
														
--- a/searx/utils.py
+++ b/searx/utils.py
 
															         regex_parts = []
														
 
															         for chunk in query.split():
														
 
															             if len(chunk) == 1:
														
 
															-                regex_parts.append(u'\W+{0}\W+'.format(re.escape(chunk)))
														
 
															+                regex_parts.append(u'\\W+{0}\\W+'.format(re.escape(chunk)))
														
 
															             else:
														
 
															                 regex_parts.append(u'{0}'.format(re.escape(chunk)))
														
 
															         query_regex = u'({0})'.format('|'.join(regex_parts))
														
--- a/tests/unit/engines/test_dailymotion.py
+++ b/tests/unit/engines/test_dailymotion.py
 
															         self.assertEqual(results[0]['content'], 'Description')
														
 
															         self.assertIn('x2fit7q', results[0]['embedded'])
														
 
															-        json = """
														
 
															+        json = r"""
														
 
															         {"toto":[
														
 
															             {"id":200,"name":"Artist Name",
														
 
															             "link":"http:\/\/www.dailymotion.com\/artist\/1217","type":"artist"}
														
--- a/tests/unit/engines/test_deezer.py
+++ b/tests/unit/engines/test_deezer.py
 
															         response = mock.Mock(text='{"data": []}')
														
 
															         self.assertEqual(deezer.response(response), [])
														
 
															-        json = """
														
 
															+        json = r"""
														
 
															         {"data":[
														
 
															             {"id":100, "title":"Title of track",
														
 
															             "link":"https:\/\/www.deezer.com\/track\/1094042","duration":232,
														
 
															         self.assertEqual(results[0]['content'], 'Artist Name &bull; Album Title &bull; Title of track')
														
 
															         self.assertTrue('100' in results[0]['embedded'])
														
 
															-        json = """
														
 
															+        json = r"""
														
 
															         {"data":[
														
 
															             {"id":200,"name":"Artist Name",
														
 
															             "link":"https:\/\/www.deezer.com\/artist\/1217","type":"artist"}
														
--- a/tests/unit/engines/test_flickr.py
+++ b/tests/unit/engines/test_flickr.py
 
															         response = mock.Mock(text='{"data": []}')
														
 
															         self.assertEqual(flickr.response(response), [])
														
 
															-        json = """
														
 
															+        json = r"""
														
 
															         { "photos": { "page": 1, "pages": "41001", "perpage": 100, "total": "4100032",
														
 
															             "photo": [
														
 
															             { "id": "15751017054", "owner": "66847915@N08",
														
 
															         self.assertTrue('Owner' in results[0]['content'])
														
 
															         self.assertTrue('Description' in results[0]['content'])
														
 
															-        json = """
														
 
															+        json = r"""
														
 
															         { "photos": { "page": 1, "pages": "41001", "perpage": 100, "total": "4100032",
														
 
															             "photo": [
														
 
															             { "id": "15751017054", "owner": "66847915@N08",
														
 
															         self.assertTrue('Owner' in results[0]['content'])
														
 
															         self.assertTrue('Description' in results[0]['content'])
														
 
															-        json = """
														
 
															+        json = r"""
														
 
															         { "photos": { "page": 1, "pages": "41001", "perpage": 100, "total": "4100032",
														
 
															             "photo": [
														
 
															             { "id": "15751017054", "owner": "66847915@N08",
														
 
															         self.assertTrue('Owner' in results[0]['content'])
														
 
															         self.assertTrue('Description' in results[0]['content'])
														
 
															-        json = """
														
 
															+        json = r"""
														
 
															         { "photos": { "page": 1, "pages": "41001", "perpage": 100, "total": "4100032",
														
 
															             "photo": [
														
 
															             { "id": "15751017054", "owner": "66847915@N08",
														
 
															         self.assertEqual(type(results), list)
														
 
															         self.assertEqual(len(results), 0)
														
 
															-        json = """
														
 
															+        json = r"""
														
 
															         {"toto":[
														
 
															             {"id":200,"name":"Artist Name",
														
 
															             "link":"http:\/\/www.flickr.com\/artist\/1217","type":"artist"}
														
--- a/tests/unit/engines/test_flickr_noapi.py
+++ b/tests/unit/engines/test_flickr_noapi.py
 
															         self.assertEqual(len(results), 0)
														
 
															         # garbage test
														
 
															-        json = """
														
 
															+        json = r"""
														
 
															         {"toto":[
														
 
															             {"id":200,"name":"Artist Name",
														
 
															             "link":"http:\/\/www.flickr.com\/artist\/1217","type":"artist"}
														
--- a/tests/unit/engines/test_ina.py
+++ b/tests/unit/engines/test_ina.py
 
															         <div class=\\"search-results--list\\"><div class=\\"media\\">\\n\
														
 
															         \\t\\t\\t\\t<a class=\\"media-left  media-video  premium    xiti_click_action\\" \
														
 
															         data-xiti-params=\\"recherche_v4::resultats_conference_de_presse_du_general_de_gaulle::N\\" \
														
 
															-        href=\\"\/video\/CAF89035682\/conference-de-presse-du-general-de-gaulle-video.html\\">\\n\
														
 
															-        <img src=\\"https:\/\/www.ina.fr\/images_v2\/140x105\/CAF89035682.jpeg\\" \
														
 
															+        href=\\"\\/video\\/CAF89035682\\/conference-de-presse-du-general-de-gaulle-video.html\\">\\n\
														
 
															+        <img src=\\"https:\\/\\/www.ina.fr\\/images_v2\\/140x105\\/CAF89035682.jpeg\\" \
														
 
															         alt=\\"Conf\\u00e9rence de presse du G\\u00e9n\\u00e9ral de Gaulle \\">\\n\
														
 
															-        \\t\\t\\t\\t\\t<\/a>\\n\
														
 
															+        \\t\\t\\t\\t\\t<\\/a>\\n\
														
 
															         \\t\\t\\t\\t\\t<div class=\\"media-body\\">\\n\\t\\t\\t\\t\\t\\t<h3 class=\\"h3--title media-heading\\">\\n\
														
 
															         \\t\\t\\t\\t\\t\\t\\t<a class=\\"xiti_click_action\\" \
														
 
															         data-xiti-params=\\"recherche_v4::resultats_conference_de_presse_du_general_de_gaulle::N\\" \
														
 
															-        href=\\"\/video\/CAF89035682\/conference-de-presse-du-general-de-gaulle-video.html\\">\
														
 
															-        Conf\\u00e9rence de presse du G\\u00e9n\\u00e9ral de Gaulle <\/a>\\n\
														
 
															-        <\/h3>\\n\
														
 
															-        <div class=\\"media-body__info\\">\\n<span class=\\"broadcast\\">27\/11\/1967<\/span>\\n\
														
 
															-        <span class=\\"views\\">29321 vues<\/span>\\n\
														
 
															-        <span class=\\"duration\\">01h 33m 07s<\/span>\\n\
														
 
															-        <\/div>\\n\
														
 
															+        href=\\"\\/video\\/CAF89035682\\/conference-de-presse-du-general-de-gaulle-video.html\\">\
														
 
															+        Conf\\u00e9rence de presse du G\\u00e9n\\u00e9ral de Gaulle <\\/a>\\n\
														
 
															+        <\\/h3>\\n\
														
 
															+        <div class=\\"media-body__info\\">\\n<span class=\\"broadcast\\">27\\/11\\/1967<\\/span>\\n\
														
 
															+        <span class=\\"views\\">29321 vues<\\/span>\\n\
														
 
															+        <span class=\\"duration\\">01h 33m 07s<\\/span>\\n\
														
 
															+        <\\/div>\\n\
														
 
															         <p class=\\"media-body__summary\\">VERSION INTEGRALE DE LA CONFERENCE DE PRESSE DU GENERAL DE GAULLE . \
														
 
															-              - PA le Pr\\u00e9sident DE GAULLE : il ouvre les bras et s'assied. DP journalis...<\/p>\\n\
														
 
															-        <\/div>\\n<\/div><!-- \/.media -->\\n"
														
 
															+              - PA le Pr\\u00e9sident DE GAULLE : il ouvre les bras et s'assied. DP journalis...<\\/p>\\n\
														
 
															+        <\\/div>\\n<\\/div><!-- \\/.media -->\\n"
														
 
															         }
														
 
															         """
														
 
															         response = mock.Mock(text=json)
														
--- a/tests/unit/engines/test_mediawiki.py
+++ b/tests/unit/engines/test_mediawiki.py
 
															         self.assertEqual(type(results), list)
														
 
															         self.assertEqual(len(results), 0)
														
 
															-        json = """
														
 
															+        json = r"""
														
 
															         {"toto":[
														
 
															             {"id":200,"name":"Artist Name",
														
 
															             "link":"http:\/\/www.mediawiki.com\/artist\/1217","type":"artist"}
														
--- a/tests/unit/engines/test_mixcloud.py
+++ b/tests/unit/engines/test_mixcloud.py
 
															         self.assertEqual(results[0]['content'], 'User')
														
 
															         self.assertTrue('http://www.mixcloud.com/user/this-is-the-url/' in results[0]['embedded'])
														
 
															-        json = """
														
 
															+        json = r"""
														
 
															         {"toto":[
														
 
															             {"id":200,"name":"Artist Name",
														
 
															             "link":"http:\/\/www.mixcloud.com\/artist\/1217","type":"artist"}
														
--- a/tests/unit/engines/test_searchcode_code.py
+++ b/tests/unit/engines/test_searchcode_code.py
 
															         self.assertEqual(results[0]['repository'], 'https://repo')
														
 
															         self.assertEqual(results[0]['code_language'], 'cpp')
														
 
															-        json = """
														
 
															+        json = r"""
														
 
															         {"toto":[
														
 
															             {"id":200,"name":"Artist Name",
														
 
															             "link":"http:\/\/www.searchcode_code.com\/artist\/1217","type":"artist"}
														
--- a/tests/unit/engines/test_searchcode_doc.py
+++ b/tests/unit/engines/test_searchcode_doc.py
 
															         self.assertIn('test', results[0]['content'])
														
 
															         self.assertIn('Description', results[0]['content'])
														
 
															-        json = """
														
 
															+        json = r"""
														
 
															         {"toto":[
														
 
															             {"id":200,"name":"Artist Name",
														
 
															             "link":"http:\/\/www.searchcode_doc.com\/artist\/1217","type":"artist"}
														
--- a/tests/unit/engines/test_wolframalpha_noapi.py
+++ b/tests/unit/engines/test_wolframalpha_noapi.py
 
															         request = Request(headers={'Referer': referer_url})
														
 
															         # test failure
														
 
															-        json = '''
														
 
															+        json = r'''
														
 
															         {"queryresult" : {
														
 
															             "success" : false,
														
 
															             "error" : false,
														
 
															         self.assertEqual(wolframalpha_noapi.response(response), [])
														
 
															         # test basic case
														
 
															-        json = '''
														
 
															+        json = r'''
														
 
															         {"queryresult" : {
														
 
															             "success" : true,
														
 
															             "error" : false,
														
 
															         self.assertEqual('Wolfram|Alpha', results[1]['title'])
														
 
															         # test calc
														
 
															-        json = """
														
 
															+        json = r"""
														
 
															         {"queryresult" : {
														
 
															             "success" : true,
														
 
															             "error" : false,