瀏覽代碼

add comments to google-engines

Thomas Pointhuber 10 年之前
父節點
當前提交
144f89bf78
共有 3 個文件被更改,包括 75 次插入15 次删除
  1. 24
    4
      searx/engines/google.py
  2. 26
    5
      searx/engines/google_images.py
  3. 25
    6
      searx/engines/google_news.py

+ 24
- 4
searx/engines/google.py 查看文件

@@ -1,37 +1,57 @@
1
-#!/usr/bin/env python
1
+## Google (Web)
2
+# 
3
+# @website     https://www.google.com
4
+# @provide-api yes (https://developers.google.com/web-search/docs/), deprecated!
5
+# 
6
+# @using-api   yes
7
+# @results     JSON
8
+# @stable      yes (but deprecated)
9
+# @parse       url, title, content
2 10
 
3 11
 from urllib import urlencode
4 12
 from json import loads
5 13
 
14
+# engine dependent config
6 15
 categories = ['general']
16
+paging = True
17
+language_support = True
7 18
 
19
+# search-url
8 20
 url = 'https://ajax.googleapis.com/'
9 21
 search_url = url + 'ajax/services/search/web?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}'  # noqa
10 22
 
11
-paging = True
12
-language_support = True
13
-
14 23
 
24
+# do search-request
15 25
 def request(query, params):
16 26
     offset = (params['pageno'] - 1) * 8
27
+
17 28
     language = 'en-US'
18 29
     if params['language'] != 'all':
19 30
         language = params['language'].replace('_', '-')
31
+
20 32
     params['url'] = search_url.format(offset=offset,
21 33
                                       query=urlencode({'q': query}),
22 34
                                       language=language)
35
+
23 36
     return params
24 37
 
25 38
 
39
+# get response from search-request
26 40
 def response(resp):
27 41
     results = []
42
+
28 43
     search_res = loads(resp.text)
29 44
 
45
+    # return empty array if there are no results
30 46
     if not search_res.get('responseData', {}).get('results'):
31 47
         return []
32 48
 
49
+    # parse results
33 50
     for result in search_res['responseData']['results']:
51
+        # append result
34 52
         results.append({'url': result['unescapedUrl'],
35 53
                         'title': result['titleNoFormatting'],
36 54
                         'content': result['content']})
55
+
56
+    # return results
37 57
     return results

+ 26
- 5
searx/engines/google_images.py 查看文件

@@ -1,37 +1,58 @@
1
-#!/usr/bin/env python
1
+## Google (Images)
2
+# 
3
+# @website     https://www.google.com
4
+# @provide-api yes (https://developers.google.com/web-search/docs/), deprecated!
5
+# 
6
+# @using-api   yes
7
+# @results     JSON
8
+# @stable      yes (but deprecated)
9
+# @parse       url, title, img_src
2 10
 
3 11
 from urllib import urlencode
4 12
 from json import loads
5 13
 
14
+# engine dependent config
6 15
 categories = ['images']
16
+paging = True
7 17
 
18
+# search-url
8 19
 url = 'https://ajax.googleapis.com/'
9 20
 search_url = url + 'ajax/services/search/images?v=1.0&start={offset}&rsz=large&safe=off&filter=off&{query}'  # noqa
10 21
 
11
-paging = True
12 22
 
23
+# do search-request
13 24
 def request(query, params):
14 25
     offset = (params['pageno'] - 1) * 8
26
+
15 27
     params['url'] = search_url.format(query=urlencode({'q': query}),
16 28
                                       offset=offset)
29
+
17 30
     return params
18 31
 
19 32
 
33
+# get response from search-request
20 34
 def response(resp):
21 35
     results = []
36
+
22 37
     search_res = loads(resp.text)
23
-    if not search_res.get('responseData'):
24
-        return []
25
-    if not search_res['responseData'].get('results'):
38
+
39
+    # return empty array if there are no results
40
+    if not search_res.get('responseData', {}).get('results'):
26 41
         return []
42
+
43
+    # parse results
27 44
     for result in search_res['responseData']['results']:
28 45
         href = result['originalContextUrl']
29 46
         title = result['title']
30 47
         if not result['url']:
31 48
             continue
49
+
50
+        # append result
32 51
         results.append({'url': href,
33 52
                         'title': title,
34 53
                         'content': '',
35 54
                         'img_src': result['url'],
36 55
                         'template': 'images.html'})
56
+
57
+    # return results
37 58
     return results

+ 25
- 6
searx/engines/google_news.py 查看文件

@@ -1,43 +1,62 @@
1
-#!/usr/bin/env python
1
+## Google (News)
2
+# 
3
+# @website     https://www.google.com
4
+# @provide-api yes (https://developers.google.com/web-search/docs/), deprecated!
5
+# 
6
+# @using-api   yes
7
+# @results     JSON
8
+# @stable      yes (but deprecated)
9
+# @parse       url, title, content, publishedDate
2 10
 
3 11
 from urllib import urlencode
4 12
 from json import loads
5 13
 from dateutil import parser
6 14
 
15
+# search-url
7 16
 categories = ['news']
17
+paging = True
18
+language_support = True
8 19
 
20
+# engine dependent config
9 21
 url = 'https://ajax.googleapis.com/'
10 22
 search_url = url + 'ajax/services/search/news?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}'  # noqa
11 23
 
12
-paging = True
13
-language_support = True
14
-
15 24
 
25
+# do search-request
16 26
 def request(query, params):
17 27
     offset = (params['pageno'] - 1) * 8
28
+
18 29
     language = 'en-US'
19 30
     if params['language'] != 'all':
20 31
         language = params['language'].replace('_', '-')
32
+
21 33
     params['url'] = search_url.format(offset=offset,
22 34
                                       query=urlencode({'q': query}),
23 35
                                       language=language)
36
+
24 37
     return params
25 38
 
26 39
 
40
+# get response from search-request
27 41
 def response(resp):
28 42
     results = []
43
+
29 44
     search_res = loads(resp.text)
30 45
 
46
+    # return empty array if there are no results
31 47
     if not search_res.get('responseData', {}).get('results'):
32 48
         return []
33 49
 
50
+    # parse results
34 51
     for result in search_res['responseData']['results']:
35
-
36
-# Mon, 10 Mar 2014 16:26:15 -0700
52
+        # parse publishedDate
37 53
         publishedDate = parser.parse(result['publishedDate'])
38 54
 
55
+        # append result
39 56
         results.append({'url': result['unescapedUrl'],
40 57
                         'title': result['titleNoFormatting'],
41 58
                         'publishedDate': publishedDate,
42 59
                         'content': result['content']})
60
+
61
+    # return results
43 62
     return results