浏览代码

add time range search with yahoo

Noemi Vanyi 8 年前
父节点
当前提交
93c0c49e9a
共有 4 个文件被更改,包括 35 次插入11 次删除
  1. 2
    1
      searx/engines/__init__.py
  2. 25
    8
      searx/engines/yahoo.py
  3. 7
    2
      searx/search.py
  4. 1
    0
      searx/webapp.py

+ 2
- 1
searx/engines/__init__.py 查看文件

@@ -42,7 +42,8 @@ engine_default_args = {'paging': False,
42 42
                        'shortcut': '-',
43 43
                        'disabled': False,
44 44
                        'suspend_end_time': 0,
45
-                       'continuous_errors': 0}
45
+                       'continuous_errors': 0,
46
+                       'time_range_support': False}
46 47
 
47 48
 
48 49
 def load_module(filename):

+ 25
- 8
searx/engines/yahoo.py 查看文件

@@ -20,10 +20,12 @@ from searx.engines.xpath import extract_text, extract_url
20 20
 categories = ['general']
21 21
 paging = True
22 22
 language_support = True
23
+time_range_support = True
23 24
 
24 25
 # search-url
25 26
 base_url = 'https://search.yahoo.com/'
26 27
 search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
28
+search_url_with_time = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}&age={age}&btf={btf}&fr2=time'
27 29
 
28 30
 # specific xpath variables
29 31
 results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]"
@@ -32,6 +34,9 @@ title_xpath = './/h3/a'
32 34
 content_xpath = './/div[@class="compText aAbs"]'
33 35
 suggestion_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' AlsoTry ')]//a"
34 36
 
37
+time_range_dict = {'day': ['1d', 'd'],
38
+                   'week': ['1w', 'w'],
39
+                   'month': ['1m', 'm']}
35 40
 
36 41
 # remove yahoo-specific tracking-url
37 42
 def parse_url(url_string):
@@ -51,18 +56,30 @@ def parse_url(url_string):
51 56
         return unquote(url_string[start:end])
52 57
 
53 58
 
59
+def _get_url(query, offset, language, time_range):
60
+    if time_range:
61
+        return base_url + search_url_with_time.format(offset=offset,
62
+                                                      query=urlencode({'p': query}),
63
+                                                      lang=language,
64
+                                                      age=time_range_dict[time_range][0],
65
+                                                      btf=time_range_dict[time_range][1])
66
+    return base_url + search_url.format(offset=offset,
67
+                                        query=urlencode({'p': query}),
68
+                                        lang=language)
69
+
70
+
71
+def _get_language(params):
72
+    if params['language'] == 'all':
73
+        return 'en'
74
+    return params['language'].split('_')[0]
75
+
76
+
54 77
 # do search-request
55 78
 def request(query, params):
56 79
     offset = (params['pageno'] - 1) * 10 + 1
80
+    language = _get_language(params)
57 81
 
58
-    if params['language'] == 'all':
59
-        language = 'en'
60
-    else:
61
-        language = params['language'].split('_')[0]
62
-
63
-    params['url'] = base_url + search_url.format(offset=offset,
64
-                                                 query=urlencode({'p': query}),
65
-                                                 lang=language)
82
+    params['url'] = _get_url(query, offset, language, params['time_range'])
66 83
 
67 84
     # TODO required?
68 85
     params['cookies']['sB'] = 'fl=1&vl=lang_{lang}&sh=1&rw=new&v=1'\

+ 7
- 2
searx/search.py 查看文件

@@ -138,6 +138,7 @@ class Search(object):
138 138
         self.paging = False
139 139
         self.pageno = 1
140 140
         self.lang = 'all'
141
+        self.time_range = None
141 142
 
142 143
         # set blocked engines
143 144
         self.disabled_engines = request.preferences.engines.get_disabled()
@@ -178,9 +179,9 @@ class Search(object):
178 179
         if len(query_obj.languages):
179 180
             self.lang = query_obj.languages[-1]
180 181
 
181
-        self.engines = query_obj.engines
182
+        self.time_range = self.request_data.get('time_range')
182 183
 
183
-        self.categories = []
184
+        self.engines = query_obj.engines
184 185
 
185 186
         # if engines are calculated from query,
186 187
         # set categories by using that informations
@@ -279,6 +280,9 @@ class Search(object):
279 280
             if self.lang != 'all' and not engine.language_support:
280 281
                 continue
281 282
 
283
+            if self.time_range and not engine.time_range_support:
284
+                continue
285
+
282 286
             # set default request parameters
283 287
             request_params = default_request_params()
284 288
             request_params['headers']['User-Agent'] = user_agent
@@ -293,6 +297,7 @@ class Search(object):
293 297
 
294 298
             # 0 = None, 1 = Moderate, 2 = Strict
295 299
             request_params['safesearch'] = request.preferences.get_value('safesearch')
300
+            request_params['time_range'] = self.time_range
296 301
 
297 302
             # update request parameters dependent on
298 303
             # search-engine (contained in engines folder)

+ 1
- 0
searx/webapp.py 查看文件

@@ -459,6 +459,7 @@ def index():
459 459
         paging=search.paging,
460 460
         number_of_results=format_decimal(number_of_results),
461 461
         pageno=search.pageno,
462
+        time_range=search.time_range,
462 463
         base_url=get_base_url(),
463 464
         suggestions=search.result_container.suggestions,
464 465
         answers=search.result_container.answers,