Преглед изворни кода

add comments to google-engines

Thomas Pointhuber пре 10 година
родитељ
комит
144f89bf78
3 измењених фајлова са 75 додато и 15 уклоњено
  1. 24
    4
      searx/engines/google.py
  2. 26
    5
      searx/engines/google_images.py
  3. 25
    6
      searx/engines/google_news.py

+ 24
- 4
searx/engines/google.py Прегледај датотеку

1
-#!/usr/bin/env python
1
+## Google (Web)
2
+# 
3
+# @website     https://www.google.com
4
+# @provide-api yes (https://developers.google.com/web-search/docs/), deprecated!
5
+# 
6
+# @using-api   yes
7
+# @results     JSON
8
+# @stable      yes (but deprecated)
9
+# @parse       url, title, content
2
 
10
 
3
 from urllib import urlencode
11
 from urllib import urlencode
4
 from json import loads
12
 from json import loads
5
 
13
 
14
+# engine dependent config
6
 categories = ['general']
15
 categories = ['general']
16
+paging = True
17
+language_support = True
7
 
18
 
19
+# search-url
8
 url = 'https://ajax.googleapis.com/'
20
 url = 'https://ajax.googleapis.com/'
9
 search_url = url + 'ajax/services/search/web?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}'  # noqa
21
 search_url = url + 'ajax/services/search/web?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}'  # noqa
10
 
22
 
11
-paging = True
12
-language_support = True
13
-
14
 
23
 
24
+# do search-request
15
 def request(query, params):
25
 def request(query, params):
16
     offset = (params['pageno'] - 1) * 8
26
     offset = (params['pageno'] - 1) * 8
27
+
17
     language = 'en-US'
28
     language = 'en-US'
18
     if params['language'] != 'all':
29
     if params['language'] != 'all':
19
         language = params['language'].replace('_', '-')
30
         language = params['language'].replace('_', '-')
31
+
20
     params['url'] = search_url.format(offset=offset,
32
     params['url'] = search_url.format(offset=offset,
21
                                       query=urlencode({'q': query}),
33
                                       query=urlencode({'q': query}),
22
                                       language=language)
34
                                       language=language)
35
+
23
     return params
36
     return params
24
 
37
 
25
 
38
 
39
+# get response from search-request
26
 def response(resp):
40
 def response(resp):
27
     results = []
41
     results = []
42
+
28
     search_res = loads(resp.text)
43
     search_res = loads(resp.text)
29
 
44
 
45
+    # return empty array if there are no results
30
     if not search_res.get('responseData', {}).get('results'):
46
     if not search_res.get('responseData', {}).get('results'):
31
         return []
47
         return []
32
 
48
 
49
+    # parse results
33
     for result in search_res['responseData']['results']:
50
     for result in search_res['responseData']['results']:
51
+        # append result
34
         results.append({'url': result['unescapedUrl'],
52
         results.append({'url': result['unescapedUrl'],
35
                         'title': result['titleNoFormatting'],
53
                         'title': result['titleNoFormatting'],
36
                         'content': result['content']})
54
                         'content': result['content']})
55
+
56
+    # return results
37
     return results
57
     return results

+ 26
- 5
searx/engines/google_images.py Прегледај датотеку

1
-#!/usr/bin/env python
1
+## Google (Images)
2
+# 
3
+# @website     https://www.google.com
4
+# @provide-api yes (https://developers.google.com/web-search/docs/), deprecated!
5
+# 
6
+# @using-api   yes
7
+# @results     JSON
8
+# @stable      yes (but deprecated)
9
+# @parse       url, title, img_src
2
 
10
 
3
 from urllib import urlencode
11
 from urllib import urlencode
4
 from json import loads
12
 from json import loads
5
 
13
 
14
+# engine dependent config
6
 categories = ['images']
15
 categories = ['images']
16
+paging = True
7
 
17
 
18
+# search-url
8
 url = 'https://ajax.googleapis.com/'
19
 url = 'https://ajax.googleapis.com/'
9
 search_url = url + 'ajax/services/search/images?v=1.0&start={offset}&rsz=large&safe=off&filter=off&{query}'  # noqa
20
 search_url = url + 'ajax/services/search/images?v=1.0&start={offset}&rsz=large&safe=off&filter=off&{query}'  # noqa
10
 
21
 
11
-paging = True
12
 
22
 
23
+# do search-request
13
 def request(query, params):
24
 def request(query, params):
14
     offset = (params['pageno'] - 1) * 8
25
     offset = (params['pageno'] - 1) * 8
26
+
15
     params['url'] = search_url.format(query=urlencode({'q': query}),
27
     params['url'] = search_url.format(query=urlencode({'q': query}),
16
                                       offset=offset)
28
                                       offset=offset)
29
+
17
     return params
30
     return params
18
 
31
 
19
 
32
 
33
+# get response from search-request
20
 def response(resp):
34
 def response(resp):
21
     results = []
35
     results = []
36
+
22
     search_res = loads(resp.text)
37
     search_res = loads(resp.text)
23
-    if not search_res.get('responseData'):
24
-        return []
25
-    if not search_res['responseData'].get('results'):
38
+
39
+    # return empty array if there are no results
40
+    if not search_res.get('responseData', {}).get('results'):
26
         return []
41
         return []
42
+
43
+    # parse results
27
     for result in search_res['responseData']['results']:
44
     for result in search_res['responseData']['results']:
28
         href = result['originalContextUrl']
45
         href = result['originalContextUrl']
29
         title = result['title']
46
         title = result['title']
30
         if not result['url']:
47
         if not result['url']:
31
             continue
48
             continue
49
+
50
+        # append result
32
         results.append({'url': href,
51
         results.append({'url': href,
33
                         'title': title,
52
                         'title': title,
34
                         'content': '',
53
                         'content': '',
35
                         'img_src': result['url'],
54
                         'img_src': result['url'],
36
                         'template': 'images.html'})
55
                         'template': 'images.html'})
56
+
57
+    # return results
37
     return results
58
     return results

+ 25
- 6
searx/engines/google_news.py Прегледај датотеку

1
-#!/usr/bin/env python
1
+## Google (News)
2
+# 
3
+# @website     https://www.google.com
4
+# @provide-api yes (https://developers.google.com/web-search/docs/), deprecated!
5
+# 
6
+# @using-api   yes
7
+# @results     JSON
8
+# @stable      yes (but deprecated)
9
+# @parse       url, title, content, publishedDate
2
 
10
 
3
 from urllib import urlencode
11
 from urllib import urlencode
4
 from json import loads
12
 from json import loads
5
 from dateutil import parser
13
 from dateutil import parser
6
 
14
 
15
+# search-url
7
 categories = ['news']
16
 categories = ['news']
17
+paging = True
18
+language_support = True
8
 
19
 
20
+# engine dependent config
9
 url = 'https://ajax.googleapis.com/'
21
 url = 'https://ajax.googleapis.com/'
10
 search_url = url + 'ajax/services/search/news?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}'  # noqa
22
 search_url = url + 'ajax/services/search/news?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}'  # noqa
11
 
23
 
12
-paging = True
13
-language_support = True
14
-
15
 
24
 
25
+# do search-request
16
 def request(query, params):
26
 def request(query, params):
17
     offset = (params['pageno'] - 1) * 8
27
     offset = (params['pageno'] - 1) * 8
28
+
18
     language = 'en-US'
29
     language = 'en-US'
19
     if params['language'] != 'all':
30
     if params['language'] != 'all':
20
         language = params['language'].replace('_', '-')
31
         language = params['language'].replace('_', '-')
32
+
21
     params['url'] = search_url.format(offset=offset,
33
     params['url'] = search_url.format(offset=offset,
22
                                       query=urlencode({'q': query}),
34
                                       query=urlencode({'q': query}),
23
                                       language=language)
35
                                       language=language)
36
+
24
     return params
37
     return params
25
 
38
 
26
 
39
 
40
+# get response from search-request
27
 def response(resp):
41
 def response(resp):
28
     results = []
42
     results = []
43
+
29
     search_res = loads(resp.text)
44
     search_res = loads(resp.text)
30
 
45
 
46
+    # return empty array if there are no results
31
     if not search_res.get('responseData', {}).get('results'):
47
     if not search_res.get('responseData', {}).get('results'):
32
         return []
48
         return []
33
 
49
 
50
+    # parse results
34
     for result in search_res['responseData']['results']:
51
     for result in search_res['responseData']['results']:
35
-
36
-# Mon, 10 Mar 2014 16:26:15 -0700
52
+        # parse publishedDate
37
         publishedDate = parser.parse(result['publishedDate'])
53
         publishedDate = parser.parse(result['publishedDate'])
38
 
54
 
55
+        # append result
39
         results.append({'url': result['unescapedUrl'],
56
         results.append({'url': result['unescapedUrl'],
40
                         'title': result['titleNoFormatting'],
57
                         'title': result['titleNoFormatting'],
41
                         'publishedDate': publishedDate,
58
                         'publishedDate': publishedDate,
42
                         'content': result['content']})
59
                         'content': result['content']})
60
+
61
+    # return results
43
     return results
62
     return results