Thomas Pointhuber 10 lat temu
rodzic
commit
a508d540ac

+ 8
- 3
searx/engines/bing_news.py Wyświetl plik

57
         link = result.xpath('.//div[@class="newstitle"]/a')[0]
57
         link = result.xpath('.//div[@class="newstitle"]/a')[0]
58
         url = link.attrib.get('href')
58
         url = link.attrib.get('href')
59
         title = ' '.join(link.xpath('.//text()'))
59
         title = ' '.join(link.xpath('.//text()'))
60
-        contentXPath = result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_snip"]//text()')
60
+        contentXPath = result.xpath('.//div[@class="sn_txt"]/div'
61
+                                    '//span[@class="sn_snip"]//text()')
61
         if contentXPath is not None:
62
         if contentXPath is not None:
62
             content = escape(' '.join(contentXPath))
63
             content = escape(' '.join(contentXPath))
63
 
64
 
64
         # parse publishedDate
65
         # parse publishedDate
65
-        publishedDateXPath = result.xpath('.//div[@class="sn_txt"]/div//span[contains(@class,"sn_ST")]//span[contains(@class,"sn_tm")]//text()')
66
+        publishedDateXPath = result.xpath('.//div[@class="sn_txt"]/div'
67
+                                          '//span[contains(@class,"sn_ST")]'
68
+                                          '//span[contains(@class,"sn_tm")]'
69
+                                          '//text()')
66
         if publishedDateXPath is not None:
70
         if publishedDateXPath is not None:
67
             publishedDate = escape(' '.join(publishedDateXPath))
71
             publishedDate = escape(' '.join(publishedDateXPath))
68
 
72
 
74
             timeNumbers = re.findall(r'\d+', publishedDate)
78
             timeNumbers = re.findall(r'\d+', publishedDate)
75
             publishedDate = datetime.now()\
79
             publishedDate = datetime.now()\
76
                 - timedelta(hours=int(timeNumbers[0]))
80
                 - timedelta(hours=int(timeNumbers[0]))
77
-        elif re.match("^[0-9]+ hour(s|), [0-9]+ minute(s|) ago$", publishedDate):
81
+        elif re.match("^[0-9]+ hour(s|),"
82
+                      " [0-9]+ minute(s|) ago$", publishedDate):
78
             timeNumbers = re.findall(r'\d+', publishedDate)
83
             timeNumbers = re.findall(r'\d+', publishedDate)
79
             publishedDate = datetime.now()\
84
             publishedDate = datetime.now()\
80
                 - timedelta(hours=int(timeNumbers[0]))\
85
                 - timedelta(hours=int(timeNumbers[0]))\

+ 11
- 4
searx/engines/faroo.py Wyświetl plik

22
 
22
 
23
 # search-url
23
 # search-url
24
 url = 'http://www.faroo.com/'
24
 url = 'http://www.faroo.com/'
25
-search_url = url + 'api?{query}&start={offset}&length={number_of_results}&l={language}&src={categorie}&i=false&f=json&key={api_key}'
25
+search_url = url + 'api?{query}'\
26
+                      '&start={offset}'\
27
+                      '&length={number_of_results}'\
28
+                      '&l={language}'\
29
+                      '&src={categorie}'\
30
+                      '&i=false'\
31
+                      '&f=json'\
32
+                      '&key={api_key}'  # noqa
26
 
33
 
27
 search_category = {'general': 'web',
34
 search_category = {'general': 'web',
28
-                'news': 'news'}
35
+                   'news': 'news'}
29
 
36
 
30
 
37
 
31
 # do search-request
38
 # do search-request
80
     # parse results
87
     # parse results
81
     for result in search_res['results']:
88
     for result in search_res['results']:
82
         if result['news']:
89
         if result['news']:
83
-            # timestamp (how many milliseconds have passed between now and the beginning of 1970)
84
-            publishedDate = datetime.datetime.fromtimestamp(result['date']/1000.0)
90
+            # timestamp (milliseconds since 1970)
91
+            publishedDate = datetime.datetime.fromtimestamp(result['date']/1000.0)  # noqa
85
 
92
 
86
             # append news result
93
             # append news result
87
             results.append({'url': result['url'],
94
             results.append({'url': result['url'],

+ 1
- 1
searx/engines/google_images.py Wyświetl plik

9
 # @stable      yes (but deprecated)
9
 # @stable      yes (but deprecated)
10
 # @parse       url, title, img_src
10
 # @parse       url, title, img_src
11
 
11
 
12
-from urllib import urlencode,unquote
12
+from urllib import urlencode, unquote
13
 from json import loads
13
 from json import loads
14
 
14
 
15
 # engine dependent config
15
 # engine dependent config

+ 4
- 4
searx/engines/kickass.py Wyświetl plik

1
 ## Kickass Torrent (Videos, Music, Files)
1
 ## Kickass Torrent (Videos, Music, Files)
2
-# 
2
+#
3
 # @website     https://kickass.so
3
 # @website     https://kickass.so
4
 # @provide-api no (nothing found)
4
 # @provide-api no (nothing found)
5
-# 
5
+#
6
 # @using-api   no
6
 # @using-api   no
7
 # @results     HTML (using search portal)
7
 # @results     HTML (using search portal)
8
 # @stable      yes (HTML can change)
8
 # @stable      yes (HTML can change)
13
 from urllib import quote
13
 from urllib import quote
14
 from lxml import html
14
 from lxml import html
15
 from operator import itemgetter
15
 from operator import itemgetter
16
-from dateutil import parser
17
 
16
 
18
 # engine dependent config
17
 # engine dependent config
19
 categories = ['videos', 'music', 'files']
18
 categories = ['videos', 'music', 'files']
33
     params['url'] = search_url.format(search_term=quote(query),
32
     params['url'] = search_url.format(search_term=quote(query),
34
                                       pageno=params['pageno'])
33
                                       pageno=params['pageno'])
35
 
34
 
36
-    # FIX: SSLError: hostname 'kickass.so' doesn't match either of '*.kickass.to', 'kickass.to'
35
+    # FIX: SSLError: hostname 'kickass.so'
36
+    # doesn't match either of '*.kickass.to', 'kickass.to'
37
     params['verify'] = False
37
     params['verify'] = False
38
 
38
 
39
     return params
39
     return params

+ 6
- 1
searx/engines/soundcloud.py Wyświetl plik

20
 
20
 
21
 # search-url
21
 # search-url
22
 url = 'https://api.soundcloud.com/'
22
 url = 'https://api.soundcloud.com/'
23
-search_url = url + 'search?{query}&facet=model&limit=20&offset={offset}&linked_partitioning=1&client_id={client_id}'
23
+search_url = url + 'search?{query}'\
24
+                         '&facet=model'\
25
+                         '&limit=20'\
26
+                         '&offset={offset}'\
27
+                         '&linked_partitioning=1'\
28
+                         '&client_id={client_id}'   # noqa
24
 
29
 
25
 
30
 
26
 # do search-request
31
 # do search-request

+ 5
- 4
searx/engines/yahoo.py Wyświetl plik

20
 language_support = True
20
 language_support = True
21
 
21
 
22
 # search-url
22
 # search-url
23
-search_url = 'https://search.yahoo.com/search?{query}&b={offset}&fl=1&vl=lang_{lang}'
23
+base_url = 'https://search.yahoo.com/'
24
+search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
24
 
25
 
25
 # specific xpath variables
26
 # specific xpath variables
26
 results_xpath = '//div[@class="res"]'
27
 results_xpath = '//div[@class="res"]'
57
     else:
58
     else:
58
         language = params['language'].split('_')[0]
59
         language = params['language'].split('_')[0]
59
 
60
 
60
-    params['url'] = search_url.format(offset=offset,
61
-                                      query=urlencode({'p': query}),
62
-                                      lang=language)
61
+    params['url'] = base_url + search_url.format(offset=offset,
62
+                                                 query=urlencode({'p': query}),
63
+                                                 lang=language)
63
 
64
 
64
     # TODO required?
65
     # TODO required?
65
     params['cookies']['sB'] = 'fl=1&vl=lang_{lang}&sh=1&rw=new&v=1'\
66
     params['cookies']['sB'] = 'fl=1&vl=lang_{lang}&sh=1&rw=new&v=1'\