|
@@ -20,10 +20,12 @@ from searx.engines.xpath import extract_text, extract_url
|
20
|
20
|
categories = ['general']
|
21
|
21
|
paging = True
|
22
|
22
|
language_support = True
|
|
23
|
+time_range_support = True
|
23
|
24
|
|
24
|
25
|
# search-url
|
25
|
26
|
base_url = 'https://search.yahoo.com/'
|
26
|
27
|
search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
|
|
28
|
+search_url_with_time = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}&age={age}&btf={btf}&fr2=time'
|
27
|
29
|
|
28
|
30
|
# specific xpath variables
|
29
|
31
|
results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]"
|
|
@@ -32,6 +34,9 @@ title_xpath = './/h3/a'
|
32
|
34
|
content_xpath = './/div[@class="compText aAbs"]'
|
33
|
35
|
suggestion_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' AlsoTry ')]//a"
|
34
|
36
|
|
|
37
|
+time_range_dict = {'day': ['1d', 'd'],
|
|
38
|
+ 'week': ['1w', 'w'],
|
|
39
|
+ 'month': ['1m', 'm']}
|
35
|
40
|
|
36
|
41
|
# remove yahoo-specific tracking-url
|
37
|
42
|
def parse_url(url_string):
|
|
@@ -51,18 +56,30 @@ def parse_url(url_string):
|
51
|
56
|
return unquote(url_string[start:end])
|
52
|
57
|
|
53
|
58
|
|
|
59
|
+def _get_url(query, offset, language, time_range):
|
|
60
|
+ if time_range:
|
|
61
|
+ return base_url + search_url_with_time.format(offset=offset,
|
|
62
|
+ query=urlencode({'p': query}),
|
|
63
|
+ lang=language,
|
|
64
|
+ age=time_range_dict[time_range][0],
|
|
65
|
+ btf=time_range_dict[time_range][1])
|
|
66
|
+ return base_url + search_url.format(offset=offset,
|
|
67
|
+ query=urlencode({'p': query}),
|
|
68
|
+ lang=language)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+def _get_language(params):
|
|
72
|
+ if params['language'] == 'all':
|
|
73
|
+ return 'en'
|
|
74
|
+ return params['language'].split('_')[0]
|
|
75
|
+
|
|
76
|
+
|
54
|
77
|
# do search-request
|
55
|
78
|
def request(query, params):
|
56
|
79
|
offset = (params['pageno'] - 1) * 10 + 1
|
|
80
|
+ language = _get_language(params)
|
57
|
81
|
|
58
|
|
- if params['language'] == 'all':
|
59
|
|
- language = 'en'
|
60
|
|
- else:
|
61
|
|
- language = params['language'].split('_')[0]
|
62
|
|
-
|
63
|
|
- params['url'] = base_url + search_url.format(offset=offset,
|
64
|
|
- query=urlencode({'p': query}),
|
65
|
|
- lang=language)
|
|
82
|
+ params['url'] = _get_url(query, offset, language, params['time_range'])
|
66
|
83
|
|
67
|
84
|
# TODO required?
|
68
|
85
|
params['cookies']['sB'] = 'fl=1&vl=lang_{lang}&sh=1&rw=new&v=1'\
|