Browse Source

Merge pull request #746 from kvch/moar-time-range-support

Support time range search in more engines
Adam Tauber 8 years ago
parent
commit
e23c8f954b

+ 7
- 0
searx/engines/bing_images.py View File

24
 categories = ['images']
24
 categories = ['images']
25
 paging = True
25
 paging = True
26
 safesearch = True
26
 safesearch = True
27
+time_range_support = True
27
 
28
 
28
 # search-url
29
 # search-url
29
 base_url = 'https://www.bing.com/'
30
 base_url = 'https://www.bing.com/'
30
 search_string = 'images/search?{query}&count=10&first={offset}'
31
 search_string = 'images/search?{query}&count=10&first={offset}'
32
+time_range_string = '&qft=+filterui:age-lt{interval}'
31
 thumb_url = "https://www.bing.com/th?id={ihk}"
33
 thumb_url = "https://www.bing.com/th?id={ihk}"
34
+time_range_dict = {'day': '1440',
35
+                   'week': '10080',
36
+                   'month': '43200'}
32
 
37
 
33
 # safesearch definitions
38
 # safesearch definitions
34
 safesearch_types = {2: 'STRICT',
39
 safesearch_types = {2: 'STRICT',
58
         '&ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
63
         '&ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
59
 
64
 
60
     params['url'] = base_url + search_path
65
     params['url'] = base_url + search_path
66
+    if params['time_range'] in time_range_dict:
67
+        params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']])
61
 
68
 
62
     return params
69
     return params
63
 
70
 

+ 19
- 5
searx/engines/bing_news.py View File

22
 categories = ['news']
22
 categories = ['news']
23
 paging = True
23
 paging = True
24
 language_support = True
24
 language_support = True
25
+time_range_support = True
25
 
26
 
26
 # search-url
27
 # search-url
27
 base_url = 'https://www.bing.com/'
28
 base_url = 'https://www.bing.com/'
28
 search_string = 'news/search?{query}&first={offset}&format=RSS'
29
 search_string = 'news/search?{query}&first={offset}&format=RSS'
30
+search_string_with_time = 'news/search?{query}&first={offset}&qft=interval%3d"{interval}"&format=RSS'
31
+time_range_dict = {'day': '7',
32
+                   'week': '8',
33
+                   'month': '9'}
29
 
34
 
30
 
35
 
31
 # remove click
36
 # remove click
46
     return url_string
51
     return url_string
47
 
52
 
48
 
53
 
54
+def _get_url(query, language, offset, time_range):
55
+    if time_range in time_range_dict:
56
+        search_path = search_string_with_time.format(
57
+            query=urlencode({'q': query, 'setmkt': language}),
58
+            offset=offset,
59
+            interval=time_range_dict[time_range])
60
+    else:
61
+        search_path = search_string.format(
62
+            query=urlencode({'q': query, 'setmkt': language}),
63
+            offset=offset)
64
+    return base_url + search_path
65
+
66
+
49
 # do search-request
67
 # do search-request
50
 def request(query, params):
68
 def request(query, params):
51
     offset = (params['pageno'] - 1) * 10 + 1
69
     offset = (params['pageno'] - 1) * 10 + 1
55
     else:
73
     else:
56
         language = params['language'].replace('_', '-')
74
         language = params['language'].replace('_', '-')
57
 
75
 
58
-    search_path = search_string.format(
59
-        query=urlencode({'q': query, 'setmkt': language}),
60
-        offset=offset)
61
-
62
-    params['url'] = base_url + search_path
76
+    params['url'] = _get_url(query, language, offset, params['time_range'])
63
 
77
 
64
     return params
78
     return params
65
 
79
 

+ 14
- 3
searx/engines/flickr_noapi.py View File

14
 
14
 
15
 from urllib import urlencode
15
 from urllib import urlencode
16
 from json import loads
16
 from json import loads
17
+from time import time
17
 import re
18
 import re
18
 from searx.engines import logger
19
 from searx.engines import logger
19
 
20
 
24
 
25
 
25
 url = 'https://www.flickr.com/'
26
 url = 'https://www.flickr.com/'
26
 search_url = url + 'search?{query}&page={page}'
27
 search_url = url + 'search?{query}&page={page}'
28
+time_range_url = '&min_upload_date={start}&max_upload_date={end}'
27
 photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
29
 photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
28
 regex = re.compile(r"\"search-photos-lite-models\",\"photos\":(.*}),\"totalItems\":", re.DOTALL)
30
 regex = re.compile(r"\"search-photos-lite-models\",\"photos\":(.*}),\"totalItems\":", re.DOTALL)
29
 image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's')
31
 image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's')
30
 
32
 
31
 paging = True
33
 paging = True
34
+time_range_support = True
35
+time_range_dict = {'day': 60 * 60 * 24,
36
+                   'week': 60 * 60 * 24 * 7,
37
+                   'month': 60 * 60 * 24 * 7 * 4}
32
 
38
 
33
 
39
 
34
 def build_flickr_url(user_id, photo_id):
40
 def build_flickr_url(user_id, photo_id):
35
     return photo_url.format(userid=user_id, photoid=photo_id)
41
     return photo_url.format(userid=user_id, photoid=photo_id)
36
 
42
 
37
 
43
 
38
-def request(query, params):
39
-    params['url'] = search_url.format(query=urlencode({'text': query}),
40
-                                      page=params['pageno'])
44
+def _get_time_range_url(time_range):
45
+    if time_range in time_range_dict:
46
+        return time_range_url.format(start=time(), end=str(int(time()) - time_range_dict[time_range]))
47
+    return ''
48
+
41
 
49
 
50
+def request(query, params):
51
+    params['url'] = (search_url.format(query=urlencode({'text': query}), page=params['pageno'])
52
+                     + _get_time_range_url(params['time_range']))
42
     return params
53
     return params
43
 
54
 
44
 
55
 

+ 7
- 0
searx/engines/youtube_noapi.py View File

17
 categories = ['videos', 'music']
17
 categories = ['videos', 'music']
18
 paging = True
18
 paging = True
19
 language_support = False
19
 language_support = False
20
+time_range_support = True
20
 
21
 
21
 # search-url
22
 # search-url
22
 base_url = 'https://www.youtube.com/results'
23
 base_url = 'https://www.youtube.com/results'
23
 search_url = base_url + '?search_query={query}&page={page}'
24
 search_url = base_url + '?search_query={query}&page={page}'
25
+time_range_url = '&sp=EgII{time_range}%253D%253D'
26
+time_range_dict = {'day': 'Ag',
27
+                   'week': 'Aw',
28
+                   'month': 'BA'}
24
 
29
 
25
 embedded_url = '<iframe width="540" height="304" ' +\
30
 embedded_url = '<iframe width="540" height="304" ' +\
26
     'data-src="//www.youtube-nocookie.com/embed/{videoid}" ' +\
31
     'data-src="//www.youtube-nocookie.com/embed/{videoid}" ' +\
47
 def request(query, params):
52
 def request(query, params):
48
     params['url'] = search_url.format(query=quote_plus(query),
53
     params['url'] = search_url.format(query=quote_plus(query),
49
                                       page=params['pageno'])
54
                                       page=params['pageno'])
55
+    if params['time_range'] in time_range_dict:
56
+        params['url'] += time_range_url.format(time_range=time_range_dict[params['time_range']])
50
 
57
 
51
     return params
58
     return params
52
 
59
 

+ 2
- 0
searx/templates/oscar/preferences.html View File

158
 				    <th>{{ _("Engine name") }}</th>
158
 				    <th>{{ _("Engine name") }}</th>
159
 				    <th>{{ _("Shortcut") }}</th>
159
 				    <th>{{ _("Shortcut") }}</th>
160
 				    <th>{{ _("SafeSearch") }}</th>
160
 				    <th>{{ _("SafeSearch") }}</th>
161
+				    <th>{{ _("Time range") }}</th>
161
 				    <th>{{ _("Avg. time") }}</th>
162
 				    <th>{{ _("Avg. time") }}</th>
162
 				    <th>{{ _("Max time") }}</th>
163
 				    <th>{{ _("Max time") }}</th>
163
                                     {% else %}
164
                                     {% else %}
179
                                     <th>{{ search_engine.name }}</th>
180
                                     <th>{{ search_engine.name }}</th>
180
 				    <td>{{ shortcuts[search_engine.name] }}</td>
181
 				    <td>{{ shortcuts[search_engine.name] }}</td>
181
 				    <td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td>
182
 				    <td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td>
183
+				    <td><input type="checkbox" {{ "checked" if search_engine.time_range_support==True else ""}} readonly="readonly" disabled="disabled"></td>
182
 				    <td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td>
184
 				    <td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td>
183
 				    <td class="{{ 'danger' if stats[search_engine.name]['warn_timeout'] else '' }}">{{ search_engine.timeout }}</td>
185
 				    <td class="{{ 'danger' if stats[search_engine.name]['warn_timeout'] else '' }}">{{ search_engine.timeout }}</td>
184
                                     {% else %}
186
                                     {% else %}

+ 1
- 0
tests/unit/engines/test_bing_images.py View File

13
         dicto['pageno'] = 1
13
         dicto['pageno'] = 1
14
         dicto['language'] = 'fr_FR'
14
         dicto['language'] = 'fr_FR'
15
         dicto['safesearch'] = 1
15
         dicto['safesearch'] = 1
16
+        dicto['time_range'] = ''
16
         params = bing_images.request(query, dicto)
17
         params = bing_images.request(query, dicto)
17
         self.assertTrue('url' in params)
18
         self.assertTrue('url' in params)
18
         self.assertTrue(query in params['url'])
19
         self.assertTrue(query in params['url'])

+ 1
- 0
tests/unit/engines/test_bing_news.py View File

12
         dicto = defaultdict(dict)
12
         dicto = defaultdict(dict)
13
         dicto['pageno'] = 1
13
         dicto['pageno'] = 1
14
         dicto['language'] = 'fr_FR'
14
         dicto['language'] = 'fr_FR'
15
+        dicto['time_range'] = ''
15
         params = bing_news.request(query, dicto)
16
         params = bing_news.request(query, dicto)
16
         self.assertIn('url', params)
17
         self.assertIn('url', params)
17
         self.assertIn(query, params['url'])
18
         self.assertIn(query, params['url'])

+ 1
- 0
tests/unit/engines/test_flickr_noapi.py View File

15
         query = 'test_query'
15
         query = 'test_query'
16
         dicto = defaultdict(dict)
16
         dicto = defaultdict(dict)
17
         dicto['pageno'] = 1
17
         dicto['pageno'] = 1
18
+        dicto['time_range'] = ''
18
         params = flickr_noapi.request(query, dicto)
19
         params = flickr_noapi.request(query, dicto)
19
         self.assertIn('url', params)
20
         self.assertIn('url', params)
20
         self.assertIn(query, params['url'])
21
         self.assertIn(query, params['url'])

+ 1
- 0
tests/unit/engines/test_youtube_noapi.py View File

11
         query = 'test_query'
11
         query = 'test_query'
12
         dicto = defaultdict(dict)
12
         dicto = defaultdict(dict)
13
         dicto['pageno'] = 0
13
         dicto['pageno'] = 0
14
+        dicto['time_range'] = ''
14
         params = youtube_noapi.request(query, dicto)
15
         params = youtube_noapi.request(query, dicto)
15
         self.assertIn('url', params)
16
         self.assertIn('url', params)
16
         self.assertIn(query, params['url'])
17
         self.assertIn(query, params['url'])