Ver código fonte

Rework Flickr Engine

Everything was redone to use the API. It needs an API key, but it's worth it.
Everything works. Title, Image, Content, URL
The API allow lots of things. Thumbnails and date will be easy to add when it will be implemented in Searx.

Fix asciimoo/searx#126
Cqoicebordel 10 anos atrás
pai
commit
0059d08f13
2 arquivos alterados com 61 adições e 31 exclusões
  1. 55
    26
      searx/engines/flickr.py
  2. 6
    5
      searx/settings.yml

+ 55
- 26
searx/engines/flickr.py Ver arquivo

@@ -1,54 +1,83 @@
1 1
 #!/usr/bin/env python
2 2
 
3
+## Flickr (Images)
4
+# 
5
+# @website     https://www.flickr.com
6
+# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html) 
7
+# 
8
+# @using-api   yes
9
+# @results     JSON
10
+# @stable      yes
11
+# @parse       url, title, thumbnail, img_src
12
+#More info on api-key : https://www.flickr.com/services/apps/create/
13
+
3 14
 from urllib import urlencode
4
-#from json import loads
15
+from json import loads
5 16
 from urlparse import urljoin
6 17
 from lxml import html
7 18
 from time import time
8 19
 
9 20
 categories = ['images']
10 21
 
11
-url = 'https://secure.flickr.com/'
12
-search_url = url+'search/?{query}&page={page}'
13
-results_xpath = '//div[@class="view display-item-tile"]/figure/div'
22
+nb_per_page = 15
23
+paging = True
24
+api_key= None
25
+
26
+
27
+url = 'https://api.flickr.com/services/rest/?method=flickr.photos.search&api_key={api_key}&{text}&sort=relevance&extras=description%2C+owner_name%2C+url_o%2C+url_z&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}'
28
+photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
14 29
 
15 30
 paging = True
16 31
 
32
+def build_flickr_url(user_id, photo_id):
33
+    return photo_url.format(userid=user_id,photoid=photo_id)
34
+
17 35
 
18 36
 def request(query, params):
19
-    params['url'] = search_url.format(query=urlencode({'text': query}),
20
-                                      page=params['pageno'])
21
-    time_string = str(int(time())-3)
22
-    params['cookies']['BX'] = '3oqjr6d9nmpgl&b=3&s=dh'
23
-    params['cookies']['xb'] = '421409'
24
-    params['cookies']['localization'] = 'en-us'
25
-    params['cookies']['flrbp'] = time_string +\
26
-        '-3a8cdb85a427a33efda421fbda347b2eaf765a54'
27
-    params['cookies']['flrbs'] = time_string +\
28
-        '-ed142ae8765ee62c9ec92a9513665e0ee1ba6776'
29
-    params['cookies']['flrb'] = '9'
37
+    params['url'] = url.format(text=urlencode({'text': query}),
38
+                               api_key=api_key,
39
+                               nb_per_page=nb_per_page,
40
+                               page=params['pageno'])
30 41
     return params
31 42
 
32 43
 
33 44
 def response(resp):
34 45
     results = []
35
-    dom = html.fromstring(resp.text)
36
-    for result in dom.xpath(results_xpath):
37
-        img = result.xpath('.//img')
46
+    
47
+    search_results = loads(resp.text)
38 48
 
39
-        if not img:
40
-            continue
49
+    # return empty array if there are no results
50
+    if not 'photos' in search_results:
51
+        return []
52
+
53
+    if not 'photo' in search_results['photos']:
54
+        return []
41 55
 
42
-        img = img[0]
43
-        img_src = 'https:'+img.attrib.get('src')
56
+    photos = search_results['photos']['photo']
44 57
 
45
-        if not img_src:
58
+    # parse results
59
+    for photo in photos:
60
+        if 'url_o' in photo:
61
+            img_src = photo['url_o']
62
+        elif 'url_z' in photo:
63
+            img_src = photo['url_z']
64
+        else:
46 65
             continue
47 66
 
48
-        href = urljoin(url, result.xpath('.//a')[0].attrib.get('href'))
49
-        title = img.attrib.get('alt', '')
50
-        results.append({'url': href,
67
+        url = build_flickr_url(photo['owner'], photo['id'])
68
+
69
+        title = photo['title']
70
+        
71
+        content = '<span class="photo-author">'+ photo['ownername'] +'</span><br />'
72
+        
73
+        content = content + ' <span class="description">' + photo['description']['_content'] + '</span>'
74
+        
75
+        # append result
76
+        results.append({'url': url,
51 77
                         'title': title,
52 78
                         'img_src': img_src,
79
+                        'content': content,
53 80
                         'template': 'images.html'})
81
+
82
+    # return results
54 83
     return results

+ 6
- 5
searx/settings.yml Ver arquivo

@@ -65,11 +65,12 @@ engines:
65 65
 #    categories : files
66 66
 #    shortcut : fc
67 67
 
68
-  - name : flickr
69
-    engine : flickr
70
-    categories : images
71
-    shortcut : fl
72
-    timeout: 3.0
68
+# api-key required: https://www.flickr.com/services/apps/create/
69
+#  - name : flickr
70
+#    engine : flickr
71
+#    categories : images
72
+#    shortcut : fl
73
+#    api_key: 'apikey' # required!
73 74
 
74 75
   - name : general-file
75 76
     engine : generalfile