浏览代码

Rework Flickr Engine

Everything was redone to use the API. It needs an API key, but it's worth it.
Everything works. Title, Image, Content, URL
The API allow lots of things. Thumbnails and date will be easy to add when it will be implemented in Searx.

Fix asciimoo/searx#126
Cqoicebordel 10 年前
父节点
当前提交
cfdcbdd9b5
共有 2 个文件被更改,包括 61 次插入31 次删除
  1. 55
    26
      searx/engines/flickr.py
  2. 6
    5
      searx/settings.yml

+ 55
- 26
searx/engines/flickr.py 查看文件

1
 #!/usr/bin/env python
1
 #!/usr/bin/env python
2
 
2
 
3
+## Flickr (Images)
4
+# 
5
+# @website     https://www.flickr.com
6
+# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html) 
7
+# 
8
+# @using-api   yes
9
+# @results     JSON
10
+# @stable      yes
11
+# @parse       url, title, thumbnail, img_src
12
+#More info on api-key : https://www.flickr.com/services/apps/create/
13
+
3
 from urllib import urlencode
14
 from urllib import urlencode
4
-#from json import loads
15
+from json import loads
5
 from urlparse import urljoin
16
 from urlparse import urljoin
6
 from lxml import html
17
 from lxml import html
7
 from time import time
18
 from time import time
8
 
19
 
9
 categories = ['images']
20
 categories = ['images']
10
 
21
 
11
-url = 'https://secure.flickr.com/'
12
-search_url = url+'search/?{query}&page={page}'
13
-results_xpath = '//div[@class="view display-item-tile"]/figure/div'
22
+nb_per_page = 15
23
+paging = True
24
+api_key= None
25
+
26
+
27
+url = 'https://api.flickr.com/services/rest/?method=flickr.photos.search&api_key={api_key}&{text}&sort=relevance&extras=description%2C+owner_name%2C+url_o%2C+url_z&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}'
28
+photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
14
 
29
 
15
 paging = True
30
 paging = True
16
 
31
 
32
+def build_flickr_url(user_id, photo_id):
33
+    return photo_url.format(userid=user_id,photoid=photo_id)
34
+
17
 
35
 
18
 def request(query, params):
36
 def request(query, params):
19
-    params['url'] = search_url.format(query=urlencode({'text': query}),
20
-                                      page=params['pageno'])
21
-    time_string = str(int(time())-3)
22
-    params['cookies']['BX'] = '3oqjr6d9nmpgl&b=3&s=dh'
23
-    params['cookies']['xb'] = '421409'
24
-    params['cookies']['localization'] = 'en-us'
25
-    params['cookies']['flrbp'] = time_string +\
26
-        '-3a8cdb85a427a33efda421fbda347b2eaf765a54'
27
-    params['cookies']['flrbs'] = time_string +\
28
-        '-ed142ae8765ee62c9ec92a9513665e0ee1ba6776'
29
-    params['cookies']['flrb'] = '9'
37
+    params['url'] = url.format(text=urlencode({'text': query}),
38
+                               api_key=api_key,
39
+                               nb_per_page=nb_per_page,
40
+                               page=params['pageno'])
30
     return params
41
     return params
31
 
42
 
32
 
43
 
33
 def response(resp):
44
 def response(resp):
34
     results = []
45
     results = []
35
-    dom = html.fromstring(resp.text)
36
-    for result in dom.xpath(results_xpath):
37
-        img = result.xpath('.//img')
46
+    
47
+    search_results = loads(resp.text)
38
 
48
 
39
-        if not img:
40
-            continue
49
+    # return empty array if there are no results
50
+    if not 'photos' in search_results:
51
+        return []
52
+
53
+    if not 'photo' in search_results['photos']:
54
+        return []
41
 
55
 
42
-        img = img[0]
43
-        img_src = 'https:'+img.attrib.get('src')
56
+    photos = search_results['photos']['photo']
44
 
57
 
45
-        if not img_src:
58
+    # parse results
59
+    for photo in photos:
60
+        if 'url_o' in photo:
61
+            img_src = photo['url_o']
62
+        elif 'url_z' in photo:
63
+            img_src = photo['url_z']
64
+        else:
46
             continue
65
             continue
47
 
66
 
48
-        href = urljoin(url, result.xpath('.//a')[0].attrib.get('href'))
49
-        title = img.attrib.get('alt', '')
50
-        results.append({'url': href,
67
+        url = build_flickr_url(photo['owner'], photo['id'])
68
+
69
+        title = photo['title']
70
+        
71
+        content = '<span class="photo-author">'+ photo['ownername'] +'</span><br />'
72
+        
73
+        content = content + ' <span class="description">' + photo['description']['_content'] + '</span>'
74
+        
75
+        # append result
76
+        results.append({'url': url,
51
                         'title': title,
77
                         'title': title,
52
                         'img_src': img_src,
78
                         'img_src': img_src,
79
+                        'content': content,
53
                         'template': 'images.html'})
80
                         'template': 'images.html'})
81
+
82
+    # return results
54
     return results
83
     return results

+ 6
- 5
searx/settings.yml 查看文件

65
 #    categories : files
65
 #    categories : files
66
 #    shortcut : fc
66
 #    shortcut : fc
67
 
67
 
68
-  - name : flickr
69
-    engine : flickr
70
-    categories : images
71
-    shortcut : fl
72
-    timeout: 3.0
68
+# api-key required: https://www.flickr.com/services/apps/create/
69
+#  - name : flickr
70
+#    engine : flickr
71
+#    categories : images
72
+#    shortcut : fl
73
+#    api_key: 'apikey' # required!
73
 
74
 
74
   - name : general-file
75
   - name : general-file
75
     engine : generalfile
76
     engine : generalfile