Browse Source

Fix anomalous backslash in string

stepshal 8 years ago
parent
commit
b3ab221b98

+ 1
- 1
searx/engines/currency_convert.py View File

9
 url = 'https://download.finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X'
9
 url = 'https://download.finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X'
10
 weight = 100
10
 weight = 100
11
 
11
 
12
-parser_re = re.compile(u'.*?(\d+(?:\.\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)  # noqa
12
+parser_re = re.compile(u'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)  # noqa
13
 
13
 
14
 db = 1
14
 db = 1
15
 
15
 

+ 1
- 1
searx/engines/deviantart.py View File

47
 
47
 
48
     dom = html.fromstring(resp.text)
48
     dom = html.fromstring(resp.text)
49
 
49
 
50
-    regex = re.compile('\/200H\/')
50
+    regex = re.compile(r'\/200H\/')
51
 
51
 
52
     # parse results
52
     # parse results
53
     for result in dom.xpath('//div[contains(@class, "tt-a tt-fh")]'):
53
     for result in dom.xpath('//div[contains(@class, "tt-a tt-fh")]'):

+ 2
- 2
searx/engines/google.py View File

300
     results = []
300
     results = []
301
 
301
 
302
     # try to parse the geoloc
302
     # try to parse the geoloc
303
-    m = re.search('@([0-9\.]+),([0-9\.]+),([0-9]+)', parsed_url.path)
303
+    m = re.search(r'@([0-9\.]+),([0-9\.]+),([0-9]+)', parsed_url.path)
304
     if m is None:
304
     if m is None:
305
-        m = re.search('ll\=([0-9\.]+),([0-9\.]+)\&z\=([0-9]+)', parsed_url.query)
305
+        m = re.search(r'll\=([0-9\.]+),([0-9\.]+)\&z\=([0-9]+)', parsed_url.query)
306
 
306
 
307
     if m is not None:
307
     if m is not None:
308
         # geoloc found (ignored)
308
         # geoloc found (ignored)

+ 5
- 5
searx/engines/startpage.py View File

68
         url = link.attrib.get('href')
68
         url = link.attrib.get('href')
69
 
69
 
70
         # block google-ad url's
70
         # block google-ad url's
71
-        if re.match("^http(s|)://(www\.)?google\.[a-z]+/aclk.*$", url):
71
+        if re.match(r"^http(s|)://(www\.)?google\.[a-z]+/aclk.*$", url):
72
             continue
72
             continue
73
 
73
 
74
         # block startpage search url's
74
         # block startpage search url's
75
-        if re.match("^http(s|)://(www\.)?startpage\.com/do/search\?.*$", url):
75
+        if re.match(r"^http(s|)://(www\.)?startpage\.com/do/search\?.*$", url):
76
             continue
76
             continue
77
 
77
 
78
         # block ixquick search url's
78
         # block ixquick search url's
79
-        if re.match("^http(s|)://(www\.)?ixquick\.com/do/search\?.*$", url):
79
+        if re.match(r"^http(s|)://(www\.)?ixquick\.com/do/search\?.*$", url):
80
             continue
80
             continue
81
 
81
 
82
         title = escape(extract_text(link))
82
         title = escape(extract_text(link))
89
         published_date = None
89
         published_date = None
90
 
90
 
91
         # check if search result starts with something like: "2 Sep 2014 ... "
91
         # check if search result starts with something like: "2 Sep 2014 ... "
92
-        if re.match("^([1-9]|[1-2][0-9]|3[0-1]) [A-Z][a-z]{2} [0-9]{4} \.\.\. ", content):
92
+        if re.match(r"^([1-9]|[1-2][0-9]|3[0-1]) [A-Z][a-z]{2} [0-9]{4} \.\.\. ", content):
93
             date_pos = content.find('...') + 4
93
             date_pos = content.find('...') + 4
94
             date_string = content[0:date_pos - 5]
94
             date_string = content[0:date_pos - 5]
95
             published_date = parser.parse(date_string, dayfirst=True)
95
             published_date = parser.parse(date_string, dayfirst=True)
98
             content = content[date_pos:]
98
             content = content[date_pos:]
99
 
99
 
100
         # check if search result starts with something like: "5 days ago ... "
100
         # check if search result starts with something like: "5 days ago ... "
101
-        elif re.match("^[0-9]+ days? ago \.\.\. ", content):
101
+        elif re.match(r"^[0-9]+ days? ago \.\.\. ", content):
102
             date_pos = content.find('...') + 4
102
             date_pos = content.find('...') + 4
103
             date_string = content[0:date_pos - 5]
103
             date_string = content[0:date_pos - 5]
104
 
104
 

+ 4
- 4
searx/engines/swisscows.py View File

25
 search_string = '?{query}&page={page}'
25
 search_string = '?{query}&page={page}'
26
 
26
 
27
 # regex
27
 # regex
28
-regex_json = re.compile('initialData: {"Request":(.|\n)*},\s*environment')
29
-regex_json_remove_start = re.compile('^initialData:\s*')
30
-regex_json_remove_end = re.compile(',\s*environment$')
31
-regex_img_url_remove_start = re.compile('^https?://i\.swisscows\.ch/\?link=')
28
+regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
29
+regex_json_remove_start = re.compile(r'^initialData:\s*')
30
+regex_json_remove_end = re.compile(r',\s*environment$')
31
+regex_img_url_remove_start = re.compile(r'^https?://i\.swisscows\.ch/\?link=')
32
 
32
 
33
 
33
 
34
 # do search-request
34
 # do search-request

+ 1
- 1
searx/engines/tokyotoshokan.py View File

48
         return []
48
         return []
49
 
49
 
50
     # regular expression for parsing torrent size strings
50
     # regular expression for parsing torrent size strings
51
-    size_re = re.compile('Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE)
51
+    size_re = re.compile(r'Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE)
52
 
52
 
53
     # processing the results, two rows at a time
53
     # processing the results, two rows at a time
54
     for i in xrange(0, len(rows), 2):
54
     for i in xrange(0, len(rows), 2):

+ 1
- 1
searx/engines/www500px.py View File

41
     results = []
41
     results = []
42
 
42
 
43
     dom = html.fromstring(resp.text)
43
     dom = html.fromstring(resp.text)
44
-    regex = re.compile('3\.jpg.*$')
44
+    regex = re.compile(r'3\.jpg.*$')
45
 
45
 
46
     # parse results
46
     # parse results
47
     for result in dom.xpath('//div[@class="photo"]'):
47
     for result in dom.xpath('//div[@class="photo"]'):

+ 1
- 1
searx/engines/yahoo_news.py View File

55
 
55
 
56
 def sanitize_url(url):
56
 def sanitize_url(url):
57
     if ".yahoo.com/" in url:
57
     if ".yahoo.com/" in url:
58
-        return re.sub(u"\;\_ylt\=.+$", "", url)
58
+        return re.sub(u"\\;\\_ylt\\=.+$", "", url)
59
     else:
59
     else:
60
         return url
60
         return url
61
 
61
 

+ 1
- 1
searx/plugins/https_rewrite.py View File

87
 
87
 
88
             # convert host-rule to valid regex
88
             # convert host-rule to valid regex
89
             host = ruleset.attrib.get('host')\
89
             host = ruleset.attrib.get('host')\
90
-                .replace('.', '\.').replace('*', '.*')
90
+                .replace('.', r'\.').replace('*', '.*')
91
 
91
 
92
             # append to host list
92
             # append to host list
93
             hosts.append(host)
93
             hosts.append(host)

+ 1
- 1
searx/results.py View File

5
 from urlparse import urlparse, unquote
5
 from urlparse import urlparse, unquote
6
 from searx.engines import engines
6
 from searx.engines import engines
7
 
7
 
8
-CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile('[,;:!?\./\\\\ ()-_]', re.M | re.U)
8
+CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
9
 WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
9
 WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
10
 
10
 
11
 
11
 

+ 1
- 1
searx/utils.py View File

63
         regex_parts = []
63
         regex_parts = []
64
         for chunk in query.split():
64
         for chunk in query.split():
65
             if len(chunk) == 1:
65
             if len(chunk) == 1:
66
-                regex_parts.append(u'\W+{0}\W+'.format(re.escape(chunk)))
66
+                regex_parts.append(u'\\W+{0}\\W+'.format(re.escape(chunk)))
67
             else:
67
             else:
68
                 regex_parts.append(u'{0}'.format(re.escape(chunk)))
68
                 regex_parts.append(u'{0}'.format(re.escape(chunk)))
69
         query_regex = u'({0})'.format('|'.join(regex_parts))
69
         query_regex = u'({0})'.format('|'.join(regex_parts))

+ 1
- 1
tests/unit/engines/test_dailymotion.py View File

62
         self.assertEqual(results[0]['content'], 'Description')
62
         self.assertEqual(results[0]['content'], 'Description')
63
         self.assertIn('x2fit7q', results[0]['embedded'])
63
         self.assertIn('x2fit7q', results[0]['embedded'])
64
 
64
 
65
-        json = """
65
+        json = r"""
66
         {"toto":[
66
         {"toto":[
67
             {"id":200,"name":"Artist Name",
67
             {"id":200,"name":"Artist Name",
68
             "link":"http:\/\/www.dailymotion.com\/artist\/1217","type":"artist"}
68
             "link":"http:\/\/www.dailymotion.com\/artist\/1217","type":"artist"}

+ 2
- 2
tests/unit/engines/test_deezer.py View File

27
         response = mock.Mock(text='{"data": []}')
27
         response = mock.Mock(text='{"data": []}')
28
         self.assertEqual(deezer.response(response), [])
28
         self.assertEqual(deezer.response(response), [])
29
 
29
 
30
-        json = """
30
+        json = r"""
31
         {"data":[
31
         {"data":[
32
             {"id":100, "title":"Title of track",
32
             {"id":100, "title":"Title of track",
33
             "link":"https:\/\/www.deezer.com\/track\/1094042","duration":232,
33
             "link":"https:\/\/www.deezer.com\/track\/1094042","duration":232,
45
         self.assertEqual(results[0]['content'], 'Artist Name • Album Title • Title of track')
45
         self.assertEqual(results[0]['content'], 'Artist Name • Album Title • Title of track')
46
         self.assertTrue('100' in results[0]['embedded'])
46
         self.assertTrue('100' in results[0]['embedded'])
47
 
47
 
48
-        json = """
48
+        json = r"""
49
         {"data":[
49
         {"data":[
50
             {"id":200,"name":"Artist Name",
50
             {"id":200,"name":"Artist Name",
51
             "link":"https:\/\/www.deezer.com\/artist\/1217","type":"artist"}
51
             "link":"https:\/\/www.deezer.com\/artist\/1217","type":"artist"}

+ 5
- 5
tests/unit/engines/test_flickr.py View File

27
         response = mock.Mock(text='{"data": []}')
27
         response = mock.Mock(text='{"data": []}')
28
         self.assertEqual(flickr.response(response), [])
28
         self.assertEqual(flickr.response(response), [])
29
 
29
 
30
-        json = """
30
+        json = r"""
31
         { "photos": { "page": 1, "pages": "41001", "perpage": 100, "total": "4100032",
31
         { "photos": { "page": 1, "pages": "41001", "perpage": 100, "total": "4100032",
32
             "photo": [
32
             "photo": [
33
             { "id": "15751017054", "owner": "66847915@N08",
33
             { "id": "15751017054", "owner": "66847915@N08",
55
         self.assertTrue('Owner' in results[0]['content'])
55
         self.assertTrue('Owner' in results[0]['content'])
56
         self.assertTrue('Description' in results[0]['content'])
56
         self.assertTrue('Description' in results[0]['content'])
57
 
57
 
58
-        json = """
58
+        json = r"""
59
         { "photos": { "page": 1, "pages": "41001", "perpage": 100, "total": "4100032",
59
         { "photos": { "page": 1, "pages": "41001", "perpage": 100, "total": "4100032",
60
             "photo": [
60
             "photo": [
61
             { "id": "15751017054", "owner": "66847915@N08",
61
             { "id": "15751017054", "owner": "66847915@N08",
79
         self.assertTrue('Owner' in results[0]['content'])
79
         self.assertTrue('Owner' in results[0]['content'])
80
         self.assertTrue('Description' in results[0]['content'])
80
         self.assertTrue('Description' in results[0]['content'])
81
 
81
 
82
-        json = """
82
+        json = r"""
83
         { "photos": { "page": 1, "pages": "41001", "perpage": 100, "total": "4100032",
83
         { "photos": { "page": 1, "pages": "41001", "perpage": 100, "total": "4100032",
84
             "photo": [
84
             "photo": [
85
             { "id": "15751017054", "owner": "66847915@N08",
85
             { "id": "15751017054", "owner": "66847915@N08",
103
         self.assertTrue('Owner' in results[0]['content'])
103
         self.assertTrue('Owner' in results[0]['content'])
104
         self.assertTrue('Description' in results[0]['content'])
104
         self.assertTrue('Description' in results[0]['content'])
105
 
105
 
106
-        json = """
106
+        json = r"""
107
         { "photos": { "page": 1, "pages": "41001", "perpage": 100, "total": "4100032",
107
         { "photos": { "page": 1, "pages": "41001", "perpage": 100, "total": "4100032",
108
             "photo": [
108
             "photo": [
109
             { "id": "15751017054", "owner": "66847915@N08",
109
             { "id": "15751017054", "owner": "66847915@N08",
130
         self.assertEqual(type(results), list)
130
         self.assertEqual(type(results), list)
131
         self.assertEqual(len(results), 0)
131
         self.assertEqual(len(results), 0)
132
 
132
 
133
-        json = """
133
+        json = r"""
134
         {"toto":[
134
         {"toto":[
135
             {"id":200,"name":"Artist Name",
135
             {"id":200,"name":"Artist Name",
136
             "link":"http:\/\/www.flickr.com\/artist\/1217","type":"artist"}
136
             "link":"http:\/\/www.flickr.com\/artist\/1217","type":"artist"}

+ 1
- 1
tests/unit/engines/test_flickr_noapi.py View File

316
         self.assertEqual(len(results), 0)
316
         self.assertEqual(len(results), 0)
317
 
317
 
318
         # garbage test
318
         # garbage test
319
-        json = """
319
+        json = r"""
320
         {"toto":[
320
         {"toto":[
321
             {"id":200,"name":"Artist Name",
321
             {"id":200,"name":"Artist Name",
322
             "link":"http:\/\/www.flickr.com\/artist\/1217","type":"artist"}
322
             "link":"http:\/\/www.flickr.com\/artist\/1217","type":"artist"}

+ 12
- 12
tests/unit/engines/test_ina.py View File

33
         <div class=\\"search-results--list\\"><div class=\\"media\\">\\n\
33
         <div class=\\"search-results--list\\"><div class=\\"media\\">\\n\
34
         \\t\\t\\t\\t<a class=\\"media-left  media-video  premium    xiti_click_action\\" \
34
         \\t\\t\\t\\t<a class=\\"media-left  media-video  premium    xiti_click_action\\" \
35
         data-xiti-params=\\"recherche_v4::resultats_conference_de_presse_du_general_de_gaulle::N\\" \
35
         data-xiti-params=\\"recherche_v4::resultats_conference_de_presse_du_general_de_gaulle::N\\" \
36
-        href=\\"\/video\/CAF89035682\/conference-de-presse-du-general-de-gaulle-video.html\\">\\n\
37
-        <img src=\\"https:\/\/www.ina.fr\/images_v2\/140x105\/CAF89035682.jpeg\\" \
36
+        href=\\"\\/video\\/CAF89035682\\/conference-de-presse-du-general-de-gaulle-video.html\\">\\n\
37
+        <img src=\\"https:\\/\\/www.ina.fr\\/images_v2\\/140x105\\/CAF89035682.jpeg\\" \
38
         alt=\\"Conf\\u00e9rence de presse du G\\u00e9n\\u00e9ral de Gaulle \\">\\n\
38
         alt=\\"Conf\\u00e9rence de presse du G\\u00e9n\\u00e9ral de Gaulle \\">\\n\
39
-        \\t\\t\\t\\t\\t<\/a>\\n\
39
+        \\t\\t\\t\\t\\t<\\/a>\\n\
40
         \\t\\t\\t\\t\\t<div class=\\"media-body\\">\\n\\t\\t\\t\\t\\t\\t<h3 class=\\"h3--title media-heading\\">\\n\
40
         \\t\\t\\t\\t\\t<div class=\\"media-body\\">\\n\\t\\t\\t\\t\\t\\t<h3 class=\\"h3--title media-heading\\">\\n\
41
         \\t\\t\\t\\t\\t\\t\\t<a class=\\"xiti_click_action\\" \
41
         \\t\\t\\t\\t\\t\\t\\t<a class=\\"xiti_click_action\\" \
42
         data-xiti-params=\\"recherche_v4::resultats_conference_de_presse_du_general_de_gaulle::N\\" \
42
         data-xiti-params=\\"recherche_v4::resultats_conference_de_presse_du_general_de_gaulle::N\\" \
43
-        href=\\"\/video\/CAF89035682\/conference-de-presse-du-general-de-gaulle-video.html\\">\
44
-        Conf\\u00e9rence de presse du G\\u00e9n\\u00e9ral de Gaulle <\/a>\\n\
45
-        <\/h3>\\n\
46
-        <div class=\\"media-body__info\\">\\n<span class=\\"broadcast\\">27\/11\/1967<\/span>\\n\
47
-        <span class=\\"views\\">29321 vues<\/span>\\n\
48
-        <span class=\\"duration\\">01h 33m 07s<\/span>\\n\
49
-        <\/div>\\n\
43
+        href=\\"\\/video\\/CAF89035682\\/conference-de-presse-du-general-de-gaulle-video.html\\">\
44
+        Conf\\u00e9rence de presse du G\\u00e9n\\u00e9ral de Gaulle <\\/a>\\n\
45
+        <\\/h3>\\n\
46
+        <div class=\\"media-body__info\\">\\n<span class=\\"broadcast\\">27\\/11\\/1967<\\/span>\\n\
47
+        <span class=\\"views\\">29321 vues<\\/span>\\n\
48
+        <span class=\\"duration\\">01h 33m 07s<\\/span>\\n\
49
+        <\\/div>\\n\
50
         <p class=\\"media-body__summary\\">VERSION INTEGRALE DE LA CONFERENCE DE PRESSE DU GENERAL DE GAULLE . \
50
         <p class=\\"media-body__summary\\">VERSION INTEGRALE DE LA CONFERENCE DE PRESSE DU GENERAL DE GAULLE . \
51
-              - PA le Pr\\u00e9sident DE GAULLE : il ouvre les bras et s'assied. DP journalis...<\/p>\\n\
52
-        <\/div>\\n<\/div><!-- \/.media -->\\n"
51
+              - PA le Pr\\u00e9sident DE GAULLE : il ouvre les bras et s'assied. DP journalis...<\\/p>\\n\
52
+        <\\/div>\\n<\\/div><!-- \\/.media -->\\n"
53
         }
53
         }
54
         """
54
         """
55
         response = mock.Mock(text=json)
55
         response = mock.Mock(text=json)

+ 1
- 1
tests/unit/engines/test_mediawiki.py View File

118
         self.assertEqual(type(results), list)
118
         self.assertEqual(type(results), list)
119
         self.assertEqual(len(results), 0)
119
         self.assertEqual(len(results), 0)
120
 
120
 
121
-        json = """
121
+        json = r"""
122
         {"toto":[
122
         {"toto":[
123
             {"id":200,"name":"Artist Name",
123
             {"id":200,"name":"Artist Name",
124
             "link":"http:\/\/www.mediawiki.com\/artist\/1217","type":"artist"}
124
             "link":"http:\/\/www.mediawiki.com\/artist\/1217","type":"artist"}

+ 1
- 1
tests/unit/engines/test_mixcloud.py View File

55
         self.assertEqual(results[0]['content'], 'User')
55
         self.assertEqual(results[0]['content'], 'User')
56
         self.assertTrue('http://www.mixcloud.com/user/this-is-the-url/' in results[0]['embedded'])
56
         self.assertTrue('http://www.mixcloud.com/user/this-is-the-url/' in results[0]['embedded'])
57
 
57
 
58
-        json = """
58
+        json = r"""
59
         {"toto":[
59
         {"toto":[
60
             {"id":200,"name":"Artist Name",
60
             {"id":200,"name":"Artist Name",
61
             "link":"http:\/\/www.mixcloud.com\/artist\/1217","type":"artist"}
61
             "link":"http:\/\/www.mixcloud.com\/artist\/1217","type":"artist"}

+ 1
- 1
tests/unit/engines/test_searchcode_code.py View File

63
         self.assertEqual(results[0]['repository'], 'https://repo')
63
         self.assertEqual(results[0]['repository'], 'https://repo')
64
         self.assertEqual(results[0]['code_language'], 'cpp')
64
         self.assertEqual(results[0]['code_language'], 'cpp')
65
 
65
 
66
-        json = """
66
+        json = r"""
67
         {"toto":[
67
         {"toto":[
68
             {"id":200,"name":"Artist Name",
68
             {"id":200,"name":"Artist Name",
69
             "link":"http:\/\/www.searchcode_code.com\/artist\/1217","type":"artist"}
69
             "link":"http:\/\/www.searchcode_code.com\/artist\/1217","type":"artist"}

+ 1
- 1
tests/unit/engines/test_searchcode_doc.py View File

61
         self.assertIn('test', results[0]['content'])
61
         self.assertIn('test', results[0]['content'])
62
         self.assertIn('Description', results[0]['content'])
62
         self.assertIn('Description', results[0]['content'])
63
 
63
 
64
-        json = """
64
+        json = r"""
65
         {"toto":[
65
         {"toto":[
66
             {"id":200,"name":"Artist Name",
66
             {"id":200,"name":"Artist Name",
67
             "link":"http:\/\/www.searchcode_doc.com\/artist\/1217","type":"artist"}
67
             "link":"http:\/\/www.searchcode_doc.com\/artist\/1217","type":"artist"}

+ 3
- 3
tests/unit/engines/test_wolframalpha_noapi.py View File

28
         request = Request(headers={'Referer': referer_url})
28
         request = Request(headers={'Referer': referer_url})
29
 
29
 
30
         # test failure
30
         # test failure
31
-        json = '''
31
+        json = r'''
32
         {"queryresult" : {
32
         {"queryresult" : {
33
             "success" : false,
33
             "success" : false,
34
             "error" : false,
34
             "error" : false,
42
         self.assertEqual(wolframalpha_noapi.response(response), [])
42
         self.assertEqual(wolframalpha_noapi.response(response), [])
43
 
43
 
44
         # test basic case
44
         # test basic case
45
-        json = '''
45
+        json = r'''
46
         {"queryresult" : {
46
         {"queryresult" : {
47
             "success" : true,
47
             "success" : true,
48
             "error" : false,
48
             "error" : false,
143
         self.assertEqual('Wolfram|Alpha', results[1]['title'])
143
         self.assertEqual('Wolfram|Alpha', results[1]['title'])
144
 
144
 
145
         # test calc
145
         # test calc
146
-        json = """
146
+        json = r"""
147
         {"queryresult" : {
147
         {"queryresult" : {
148
             "success" : true,
148
             "success" : true,
149
             "error" : false,
149
             "error" : false,