Browse Source

Fix anomalous backslash in string

stepshal 8 years ago
parent
commit
b3ab221b98

+ 1
- 1
searx/engines/currency_convert.py View File

@@ -9,7 +9,7 @@ categories = []
9 9
 url = 'https://download.finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X'
10 10
 weight = 100
11 11
 
12
-parser_re = re.compile(u'.*?(\d+(?:\.\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)  # noqa
12
+parser_re = re.compile(u'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)  # noqa
13 13
 
14 14
 db = 1
15 15
 

+ 1
- 1
searx/engines/deviantart.py View File

@@ -47,7 +47,7 @@ def response(resp):
47 47
 
48 48
     dom = html.fromstring(resp.text)
49 49
 
50
-    regex = re.compile('\/200H\/')
50
+    regex = re.compile(r'\/200H\/')
51 51
 
52 52
     # parse results
53 53
     for result in dom.xpath('//div[contains(@class, "tt-a tt-fh")]'):

+ 2
- 2
searx/engines/google.py View File

@@ -300,9 +300,9 @@ def parse_map_detail(parsed_url, result, google_hostname):
300 300
     results = []
301 301
 
302 302
     # try to parse the geoloc
303
-    m = re.search('@([0-9\.]+),([0-9\.]+),([0-9]+)', parsed_url.path)
303
+    m = re.search(r'@([0-9\.]+),([0-9\.]+),([0-9]+)', parsed_url.path)
304 304
     if m is None:
305
-        m = re.search('ll\=([0-9\.]+),([0-9\.]+)\&z\=([0-9]+)', parsed_url.query)
305
+        m = re.search(r'll\=([0-9\.]+),([0-9\.]+)\&z\=([0-9]+)', parsed_url.query)
306 306
 
307 307
     if m is not None:
308 308
         # geoloc found (ignored)

+ 5
- 5
searx/engines/startpage.py View File

@@ -68,15 +68,15 @@ def response(resp):
68 68
         url = link.attrib.get('href')
69 69
 
70 70
         # block google-ad url's
71
-        if re.match("^http(s|)://(www\.)?google\.[a-z]+/aclk.*$", url):
71
+        if re.match(r"^http(s|)://(www\.)?google\.[a-z]+/aclk.*$", url):
72 72
             continue
73 73
 
74 74
         # block startpage search url's
75
-        if re.match("^http(s|)://(www\.)?startpage\.com/do/search\?.*$", url):
75
+        if re.match(r"^http(s|)://(www\.)?startpage\.com/do/search\?.*$", url):
76 76
             continue
77 77
 
78 78
         # block ixquick search url's
79
-        if re.match("^http(s|)://(www\.)?ixquick\.com/do/search\?.*$", url):
79
+        if re.match(r"^http(s|)://(www\.)?ixquick\.com/do/search\?.*$", url):
80 80
             continue
81 81
 
82 82
         title = escape(extract_text(link))
@@ -89,7 +89,7 @@ def response(resp):
89 89
         published_date = None
90 90
 
91 91
         # check if search result starts with something like: "2 Sep 2014 ... "
92
-        if re.match("^([1-9]|[1-2][0-9]|3[0-1]) [A-Z][a-z]{2} [0-9]{4} \.\.\. ", content):
92
+        if re.match(r"^([1-9]|[1-2][0-9]|3[0-1]) [A-Z][a-z]{2} [0-9]{4} \.\.\. ", content):
93 93
             date_pos = content.find('...') + 4
94 94
             date_string = content[0:date_pos - 5]
95 95
             published_date = parser.parse(date_string, dayfirst=True)
@@ -98,7 +98,7 @@ def response(resp):
98 98
             content = content[date_pos:]
99 99
 
100 100
         # check if search result starts with something like: "5 days ago ... "
101
-        elif re.match("^[0-9]+ days? ago \.\.\. ", content):
101
+        elif re.match(r"^[0-9]+ days? ago \.\.\. ", content):
102 102
             date_pos = content.find('...') + 4
103 103
             date_string = content[0:date_pos - 5]
104 104
 

+ 4
- 4
searx/engines/swisscows.py View File

@@ -25,10 +25,10 @@ base_url = 'https://swisscows.ch/'
25 25
 search_string = '?{query}&page={page}'
26 26
 
27 27
 # regex
28
-regex_json = re.compile('initialData: {"Request":(.|\n)*},\s*environment')
29
-regex_json_remove_start = re.compile('^initialData:\s*')
30
-regex_json_remove_end = re.compile(',\s*environment$')
31
-regex_img_url_remove_start = re.compile('^https?://i\.swisscows\.ch/\?link=')
28
+regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
29
+regex_json_remove_start = re.compile(r'^initialData:\s*')
30
+regex_json_remove_end = re.compile(r',\s*environment$')
31
+regex_img_url_remove_start = re.compile(r'^https?://i\.swisscows\.ch/\?link=')
32 32
 
33 33
 
34 34
 # do search-request

+ 1
- 1
searx/engines/tokyotoshokan.py View File

@@ -48,7 +48,7 @@ def response(resp):
48 48
         return []
49 49
 
50 50
     # regular expression for parsing torrent size strings
51
-    size_re = re.compile('Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE)
51
+    size_re = re.compile(r'Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE)
52 52
 
53 53
     # processing the results, two rows at a time
54 54
     for i in xrange(0, len(rows), 2):

+ 1
- 1
searx/engines/www500px.py View File

@@ -41,7 +41,7 @@ def response(resp):
41 41
     results = []
42 42
 
43 43
     dom = html.fromstring(resp.text)
44
-    regex = re.compile('3\.jpg.*$')
44
+    regex = re.compile(r'3\.jpg.*$')
45 45
 
46 46
     # parse results
47 47
     for result in dom.xpath('//div[@class="photo"]'):

+ 1
- 1
searx/engines/yahoo_news.py View File

@@ -55,7 +55,7 @@ def request(query, params):
55 55
 
56 56
 def sanitize_url(url):
57 57
     if ".yahoo.com/" in url:
58
-        return re.sub(u"\;\_ylt\=.+$", "", url)
58
+        return re.sub(u"\\;\\_ylt\\=.+$", "", url)
59 59
     else:
60 60
         return url
61 61
 

+ 1
- 1
searx/plugins/https_rewrite.py View File

@@ -87,7 +87,7 @@ def load_single_https_ruleset(rules_path):
87 87
 
88 88
             # convert host-rule to valid regex
89 89
             host = ruleset.attrib.get('host')\
90
-                .replace('.', '\.').replace('*', '.*')
90
+                .replace('.', r'\.').replace('*', '.*')
91 91
 
92 92
             # append to host list
93 93
             hosts.append(host)

+ 1
- 1
searx/results.py View File

@@ -5,7 +5,7 @@ from threading import RLock
5 5
 from urlparse import urlparse, unquote
6 6
 from searx.engines import engines
7 7
 
8
-CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile('[,;:!?\./\\\\ ()-_]', re.M | re.U)
8
+CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
9 9
 WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
10 10
 
11 11
 

+ 1
- 1
searx/utils.py View File

@@ -63,7 +63,7 @@ def highlight_content(content, query):
63 63
         regex_parts = []
64 64
         for chunk in query.split():
65 65
             if len(chunk) == 1:
66
-                regex_parts.append(u'\W+{0}\W+'.format(re.escape(chunk)))
66
+                regex_parts.append(u'\\W+{0}\\W+'.format(re.escape(chunk)))
67 67
             else:
68 68
                 regex_parts.append(u'{0}'.format(re.escape(chunk)))
69 69
         query_regex = u'({0})'.format('|'.join(regex_parts))

+ 1
- 1
tests/unit/engines/test_dailymotion.py View File

@@ -62,7 +62,7 @@ class TestDailymotionEngine(SearxTestCase):
62 62
         self.assertEqual(results[0]['content'], 'Description')
63 63
         self.assertIn('x2fit7q', results[0]['embedded'])
64 64
 
65
-        json = """
65
+        json = r"""
66 66
         {"toto":[
67 67
             {"id":200,"name":"Artist Name",
68 68
             "link":"http:\/\/www.dailymotion.com\/artist\/1217","type":"artist"}

+ 2
- 2
tests/unit/engines/test_deezer.py View File

@@ -27,7 +27,7 @@ class TestDeezerEngine(SearxTestCase):
27 27
         response = mock.Mock(text='{"data": []}')
28 28
         self.assertEqual(deezer.response(response), [])
29 29
 
30
-        json = """
30
+        json = r"""
31 31
         {"data":[
32 32
             {"id":100, "title":"Title of track",
33 33
             "link":"https:\/\/www.deezer.com\/track\/1094042","duration":232,
@@ -45,7 +45,7 @@ class TestDeezerEngine(SearxTestCase):
45 45
         self.assertEqual(results[0]['content'], 'Artist Name • Album Title • Title of track')
46 46
         self.assertTrue('100' in results[0]['embedded'])
47 47
 
48
-        json = """
48
+        json = r"""
49 49
         {"data":[
50 50
             {"id":200,"name":"Artist Name",
51 51
             "link":"https:\/\/www.deezer.com\/artist\/1217","type":"artist"}

+ 5
- 5
tests/unit/engines/test_flickr.py View File

@@ -27,7 +27,7 @@ class TestFlickrEngine(SearxTestCase):
27 27
         response = mock.Mock(text='{"data": []}')
28 28
         self.assertEqual(flickr.response(response), [])
29 29
 
30
-        json = """
30
+        json = r"""
31 31
         { "photos": { "page": 1, "pages": "41001", "perpage": 100, "total": "4100032",
32 32
             "photo": [
33 33
             { "id": "15751017054", "owner": "66847915@N08",
@@ -55,7 +55,7 @@ class TestFlickrEngine(SearxTestCase):
55 55
         self.assertTrue('Owner' in results[0]['content'])
56 56
         self.assertTrue('Description' in results[0]['content'])
57 57
 
58
-        json = """
58
+        json = r"""
59 59
         { "photos": { "page": 1, "pages": "41001", "perpage": 100, "total": "4100032",
60 60
             "photo": [
61 61
             { "id": "15751017054", "owner": "66847915@N08",
@@ -79,7 +79,7 @@ class TestFlickrEngine(SearxTestCase):
79 79
         self.assertTrue('Owner' in results[0]['content'])
80 80
         self.assertTrue('Description' in results[0]['content'])
81 81
 
82
-        json = """
82
+        json = r"""
83 83
         { "photos": { "page": 1, "pages": "41001", "perpage": 100, "total": "4100032",
84 84
             "photo": [
85 85
             { "id": "15751017054", "owner": "66847915@N08",
@@ -103,7 +103,7 @@ class TestFlickrEngine(SearxTestCase):
103 103
         self.assertTrue('Owner' in results[0]['content'])
104 104
         self.assertTrue('Description' in results[0]['content'])
105 105
 
106
-        json = """
106
+        json = r"""
107 107
         { "photos": { "page": 1, "pages": "41001", "perpage": 100, "total": "4100032",
108 108
             "photo": [
109 109
             { "id": "15751017054", "owner": "66847915@N08",
@@ -130,7 +130,7 @@ class TestFlickrEngine(SearxTestCase):
130 130
         self.assertEqual(type(results), list)
131 131
         self.assertEqual(len(results), 0)
132 132
 
133
-        json = """
133
+        json = r"""
134 134
         {"toto":[
135 135
             {"id":200,"name":"Artist Name",
136 136
             "link":"http:\/\/www.flickr.com\/artist\/1217","type":"artist"}

+ 1
- 1
tests/unit/engines/test_flickr_noapi.py View File

@@ -316,7 +316,7 @@ class TestFlickrNoapiEngine(SearxTestCase):
316 316
         self.assertEqual(len(results), 0)
317 317
 
318 318
         # garbage test
319
-        json = """
319
+        json = r"""
320 320
         {"toto":[
321 321
             {"id":200,"name":"Artist Name",
322 322
             "link":"http:\/\/www.flickr.com\/artist\/1217","type":"artist"}

+ 12
- 12
tests/unit/engines/test_ina.py View File

@@ -33,23 +33,23 @@ class TestInaEngine(SearxTestCase):
33 33
         <div class=\\"search-results--list\\"><div class=\\"media\\">\\n\
34 34
         \\t\\t\\t\\t<a class=\\"media-left  media-video  premium    xiti_click_action\\" \
35 35
         data-xiti-params=\\"recherche_v4::resultats_conference_de_presse_du_general_de_gaulle::N\\" \
36
-        href=\\"\/video\/CAF89035682\/conference-de-presse-du-general-de-gaulle-video.html\\">\\n\
37
-        <img src=\\"https:\/\/www.ina.fr\/images_v2\/140x105\/CAF89035682.jpeg\\" \
36
+        href=\\"\\/video\\/CAF89035682\\/conference-de-presse-du-general-de-gaulle-video.html\\">\\n\
37
+        <img src=\\"https:\\/\\/www.ina.fr\\/images_v2\\/140x105\\/CAF89035682.jpeg\\" \
38 38
         alt=\\"Conf\\u00e9rence de presse du G\\u00e9n\\u00e9ral de Gaulle \\">\\n\
39
-        \\t\\t\\t\\t\\t<\/a>\\n\
39
+        \\t\\t\\t\\t\\t<\\/a>\\n\
40 40
         \\t\\t\\t\\t\\t<div class=\\"media-body\\">\\n\\t\\t\\t\\t\\t\\t<h3 class=\\"h3--title media-heading\\">\\n\
41 41
         \\t\\t\\t\\t\\t\\t\\t<a class=\\"xiti_click_action\\" \
42 42
         data-xiti-params=\\"recherche_v4::resultats_conference_de_presse_du_general_de_gaulle::N\\" \
43
-        href=\\"\/video\/CAF89035682\/conference-de-presse-du-general-de-gaulle-video.html\\">\
44
-        Conf\\u00e9rence de presse du G\\u00e9n\\u00e9ral de Gaulle <\/a>\\n\
45
-        <\/h3>\\n\
46
-        <div class=\\"media-body__info\\">\\n<span class=\\"broadcast\\">27\/11\/1967<\/span>\\n\
47
-        <span class=\\"views\\">29321 vues<\/span>\\n\
48
-        <span class=\\"duration\\">01h 33m 07s<\/span>\\n\
49
-        <\/div>\\n\
43
+        href=\\"\\/video\\/CAF89035682\\/conference-de-presse-du-general-de-gaulle-video.html\\">\
44
+        Conf\\u00e9rence de presse du G\\u00e9n\\u00e9ral de Gaulle <\\/a>\\n\
45
+        <\\/h3>\\n\
46
+        <div class=\\"media-body__info\\">\\n<span class=\\"broadcast\\">27\\/11\\/1967<\\/span>\\n\
47
+        <span class=\\"views\\">29321 vues<\\/span>\\n\
48
+        <span class=\\"duration\\">01h 33m 07s<\\/span>\\n\
49
+        <\\/div>\\n\
50 50
         <p class=\\"media-body__summary\\">VERSION INTEGRALE DE LA CONFERENCE DE PRESSE DU GENERAL DE GAULLE . \
51
-              - PA le Pr\\u00e9sident DE GAULLE : il ouvre les bras et s'assied. DP journalis...<\/p>\\n\
52
-        <\/div>\\n<\/div><!-- \/.media -->\\n"
51
+              - PA le Pr\\u00e9sident DE GAULLE : il ouvre les bras et s'assied. DP journalis...<\\/p>\\n\
52
+        <\\/div>\\n<\\/div><!-- \\/.media -->\\n"
53 53
         }
54 54
         """
55 55
         response = mock.Mock(text=json)

+ 1
- 1
tests/unit/engines/test_mediawiki.py View File

@@ -118,7 +118,7 @@ class TestMediawikiEngine(SearxTestCase):
118 118
         self.assertEqual(type(results), list)
119 119
         self.assertEqual(len(results), 0)
120 120
 
121
-        json = """
121
+        json = r"""
122 122
         {"toto":[
123 123
             {"id":200,"name":"Artist Name",
124 124
             "link":"http:\/\/www.mediawiki.com\/artist\/1217","type":"artist"}

+ 1
- 1
tests/unit/engines/test_mixcloud.py View File

@@ -55,7 +55,7 @@ class TestMixcloudEngine(SearxTestCase):
55 55
         self.assertEqual(results[0]['content'], 'User')
56 56
         self.assertTrue('http://www.mixcloud.com/user/this-is-the-url/' in results[0]['embedded'])
57 57
 
58
-        json = """
58
+        json = r"""
59 59
         {"toto":[
60 60
             {"id":200,"name":"Artist Name",
61 61
             "link":"http:\/\/www.mixcloud.com\/artist\/1217","type":"artist"}

+ 1
- 1
tests/unit/engines/test_searchcode_code.py View File

@@ -63,7 +63,7 @@ class TestSearchcodeCodeEngine(SearxTestCase):
63 63
         self.assertEqual(results[0]['repository'], 'https://repo')
64 64
         self.assertEqual(results[0]['code_language'], 'cpp')
65 65
 
66
-        json = """
66
+        json = r"""
67 67
         {"toto":[
68 68
             {"id":200,"name":"Artist Name",
69 69
             "link":"http:\/\/www.searchcode_code.com\/artist\/1217","type":"artist"}

+ 1
- 1
tests/unit/engines/test_searchcode_doc.py View File

@@ -61,7 +61,7 @@ class TestSearchcodeDocEngine(SearxTestCase):
61 61
         self.assertIn('test', results[0]['content'])
62 62
         self.assertIn('Description', results[0]['content'])
63 63
 
64
-        json = """
64
+        json = r"""
65 65
         {"toto":[
66 66
             {"id":200,"name":"Artist Name",
67 67
             "link":"http:\/\/www.searchcode_doc.com\/artist\/1217","type":"artist"}

+ 3
- 3
tests/unit/engines/test_wolframalpha_noapi.py View File

@@ -28,7 +28,7 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase):
28 28
         request = Request(headers={'Referer': referer_url})
29 29
 
30 30
         # test failure
31
-        json = '''
31
+        json = r'''
32 32
         {"queryresult" : {
33 33
             "success" : false,
34 34
             "error" : false,
@@ -42,7 +42,7 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase):
42 42
         self.assertEqual(wolframalpha_noapi.response(response), [])
43 43
 
44 44
         # test basic case
45
-        json = '''
45
+        json = r'''
46 46
         {"queryresult" : {
47 47
             "success" : true,
48 48
             "error" : false,
@@ -143,7 +143,7 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase):
143 143
         self.assertEqual('Wolfram|Alpha', results[1]['title'])
144 144
 
145 145
         # test calc
146
-        json = """
146
+        json = r"""
147 147
         {"queryresult" : {
148 148
             "success" : true,
149 149
             "error" : false,