Ver código fonte

[fix] vimeo engine change follow-up

Adam Tauber 8 anos atrás
pai
commit
f6e9c074bb
2 arquivos alterados com 19 adições e 92 exclusões
  1. 12
    20
      searx/engines/vimeo.py
  2. 7
    72
      tests/unit/engines/test_vimeo.py

+ 12
- 20
searx/engines/vimeo.py Ver arquivo

12
 # @todo        rewrite to api
12
 # @todo        rewrite to api
13
 # @todo        set content-parameter with correct data
13
 # @todo        set content-parameter with correct data
14
 
14
 
15
+from json import loads
15
 from urllib import urlencode
16
 from urllib import urlencode
16
-from lxml import html
17
-from HTMLParser import HTMLParser
18
-from searx.engines.xpath import extract_text
19
 from dateutil import parser
17
 from dateutil import parser
20
 
18
 
21
 # engine dependent config
19
 # engine dependent config
23
 paging = True
21
 paging = True
24
 
22
 
25
 # search-url
23
 # search-url
26
-base_url = 'https://vimeo.com'
24
+base_url = 'https://vimeo.com/'
27
 search_url = base_url + '/search/page:{pageno}?{query}'
25
 search_url = base_url + '/search/page:{pageno}?{query}'
28
 
26
 
29
-# specific xpath variables
30
-results_xpath = '//div[contains(@class,"results_grid")]/ul/li'
31
-url_xpath = './/a/@href'
32
-title_xpath = './/span[@class="title"]'
33
-thumbnail_xpath = './/img[@class="js-clip_thumbnail_image"]/@src'
34
-publishedDate_xpath = './/time/attribute::datetime'
35
-
36
-embedded_url = '<iframe data-src="//player.vimeo.com/video{videoid}" ' +\
27
+embedded_url = '<iframe data-src="//player.vimeo.com/video/{videoid}" ' +\
37
     'width="540" height="304" frameborder="0" ' +\
28
     'width="540" height="304" frameborder="0" ' +\
38
     'webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe>'
29
     'webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe>'
39
 
30
 
49
 # get response from search-request
40
 # get response from search-request
50
 def response(resp):
41
 def response(resp):
51
     results = []
42
     results = []
52
-
53
-    dom = html.fromstring(resp.text)
54
-    p = HTMLParser()
43
+    data_start_pos = resp.text.find('{"filtered"')
44
+    data_end_pos = resp.text.find(';\n', data_start_pos + 1)
45
+    data = loads(resp.text[data_start_pos:data_end_pos])
55
 
46
 
56
     # parse results
47
     # parse results
57
-    for result in dom.xpath(results_xpath):
58
-        videoid = result.xpath(url_xpath)[0]
48
+    for result in data['filtered']['data']:
49
+        result = result[result['type']]
50
+        videoid = result['uri'].split('/')[-1]
59
         url = base_url + videoid
51
         url = base_url + videoid
60
-        title = p.unescape(extract_text(result.xpath(title_xpath)))
61
-        thumbnail = extract_text(result.xpath(thumbnail_xpath)[0])
62
-        publishedDate = parser.parse(extract_text(result.xpath(publishedDate_xpath)[0]))
52
+        title = result['name']
53
+        thumbnail = result['pictures']['sizes'][-1]['link']
54
+        publishedDate = parser.parse(result['created_time'])
63
         embedded = embedded_url.format(videoid=videoid)
55
         embedded = embedded_url.format(videoid=videoid)
64
 
56
 
65
         # append result
57
         # append result

+ 7
- 72
tests/unit/engines/test_vimeo.py Ver arquivo

22
         self.assertRaises(AttributeError, vimeo.response, '')
22
         self.assertRaises(AttributeError, vimeo.response, '')
23
         self.assertRaises(AttributeError, vimeo.response, '[]')
23
         self.assertRaises(AttributeError, vimeo.response, '[]')
24
 
24
 
25
-        response = mock.Mock(text='<html></html>')
26
-        self.assertEqual(vimeo.response(response), [])
25
+        json = u"""
26
+{"filtered":{"total":274641,"page":1,"per_page":18,"paging":{"next":"?sizes=590x332&page=2","previous":null,"first":"?sizes=590x332&page=1","last":"?sizes=590x332&page=15258"},"data":[{"is_staffpick":false,"is_featured":true,"type":"clip","clip":{"uri":"\\/videos\\/106557563","name":"Hot Rod Revue: The South","link":"https:\\/\\/vimeo.com\\/106557563","duration":4069,"created_time":"2014-09-19T03:38:04+00:00","privacy":{"view":"ptv"},"pictures":{"sizes":[{"width":"590","height":"332","link":"https:\\/\\/i.vimeocdn.com\\/video\\/489717884_590x332.jpg?r=pad","link_with_play_button":"https:\\/\\/i.vimeocdn.com\\/filter\\/overlay?src0=https%3A%2F%2Fi.vimeocdn.com%2Fvideo%2F489717884_590x332.jpg&src1=http%3A%2F%2Ff.vimeocdn.com%2Fp%2Fimages%2Fcrawler_play.png"}]},"stats":{"plays":null},"metadata":{"connections":{"comments":{"total":0},"likes":{"total":5}},"interactions":[]},"user":{"name":"Cal Thorley","link":"https:\\/\\/vimeo.com\\/calthorley","pictures":{"sizes":[{"width":30,"height":30,"link":"https:\\/\\/i.vimeocdn.com\\/portrait\\/2545308_30x30?r=pad"},{"width":75,"height":75,"link":"https:\\/\\/i.vimeocdn.com\\/portrait\\/2545308_75x75?r=pad"},{"width":100,"height":100,"link":"https:\\/\\/i.vimeocdn.com\\/portrait\\/2545308_100x100?r=pad"},{"width":300,"height":300,"link":"https:\\/\\/i.vimeocdn.com\\/portrait\\/2545308_300x300?r=pad"}]}}}}]}};
27
 
27
 
28
-        html = """
29
-        <div id="browse_content" class="results_grid" data-search-id="696d5f8366914ec4ffec33cf7652de384976d4f4">
30
-            <ul class="js-browse_list clearfix browse browse_videos browse_videos_thumbnails kane"
31
-                data-stream="c2VhcmNoOjo6ZGVzYzp7InF1ZXJ5IjoidGVzdCJ9">
32
-                <li data-position="7" data-result-id="clip_79600943">
33
-                    <div class="clip_thumbnail">
34
-                        <a href="/videoid" class="js-result_url">
35
-                            <div class="thumbnail_wrapper">
36
-                                <img src="http://image.url.webp" class="js-clip_thumbnail_image">
37
-                                <div class="overlay overlay_clip_meta">
38
-                                    <div class="meta_data_footer">
39
-                                        <span class="clip_upload_date">
40
-                                            <time datetime="2013-11-17T08:49:09-05:00"
41
-                                                title="dimanche 17 novembre 2013 08:49">Il y a 1 an</time>
42
-                                        </span>
43
-                                        <span class="clip_likes">
44
-                                            <img src="https://f.vimeocdn.com/images_v6/svg/heart-icon.svg">2 215
45
-                                        </span>
46
-                                        <span class="clip_comments">
47
-                                            <img src="https://f.vimeocdn.com/images_v6/svg/comment-icon.svg">75
48
-                                        </span>
49
-                                        <span class="overlay meta_data_footer clip_duration">01:12</span>
50
-                                    </div>
51
-                                </div>
52
-                            </div>
53
-                            <span class="title">This is the title</span>
54
-                        </a>
55
-                    </div>
56
-                    <div class="clip_thumbnail_attribution">
57
-                        <a href="/fedorshmidt">
58
-                            <img src="https://i.vimeocdn.com/portrait/6628061_100x100.jpg" class="avatar">
59
-                            <span class="display_name">Fedor Shmidt</span>
60
-                        </a>
61
-                        <span class="plays">2,1M lectures</span>
62
-                    </div>
63
-                </li>
64
-            </ul>
65
-        </div>
66
-        """
67
-        response = mock.Mock(text=html)
28
+"""  # noqa
29
+        response = mock.Mock(text=json)
68
         results = vimeo.response(response)
30
         results = vimeo.response(response)
69
         self.assertEqual(type(results), list)
31
         self.assertEqual(type(results), list)
70
         self.assertEqual(len(results), 1)
32
         self.assertEqual(len(results), 1)
71
-        self.assertEqual(results[0]['title'], 'This is the title')
72
-        self.assertEqual(results[0]['url'], 'https://vimeo.com/videoid')
33
+        self.assertEqual(results[0]['title'], u'Hot Rod Revue: The South')
34
+        self.assertEqual(results[0]['url'], 'https://vimeo.com/106557563')
73
         self.assertEqual(results[0]['content'], '')
35
         self.assertEqual(results[0]['content'], '')
74
-        self.assertEqual(results[0]['thumbnail'], 'http://image.url.webp')
75
-        self.assertIn('/videoid', results[0]['embedded'])
76
-
77
-        html = """
78
-        <ol class="js-browse_list clearfix browse browse_videos browse_videos_thumbnails kane"
79
-            data-stream="c2VhcmNoOjo6ZGVzYzp7InF1ZXJ5IjoidGVzdCJ9">
80
-            <li id="clip_100785455" data-start-page="/search/page:1/sort:relevant/" data-position="1">
81
-                <a href="/videoid" title="Futurama 3d (test shot)">
82
-                    <img src="http://image.url.webp"
83
-                        srcset="http://i.vimeocdn.com/video/482375085_590x332.webp 2x" alt=""
84
-                        class="thumbnail thumbnail_lg_wide">
85
-                    <div class="data">
86
-                        <p class="title">
87
-                            This is the title
88
-                        </p>
89
-                        <p class="meta">
90
-                            <time datetime="2014-07-15T04:16:27-04:00"
91
-                                title="mardi 15 juillet 2014 04:16">Il y a 6 mois</time>
92
-                        </p>
93
-                    </div>
94
-                </a>
95
-            </li>
96
-        </ol>
97
-        """
98
-        response = mock.Mock(text=html)
99
-        results = vimeo.response(response)
100
-        self.assertEqual(type(results), list)
101
-        self.assertEqual(len(results), 0)
36
+        self.assertEqual(results[0]['thumbnail'], 'https://i.vimeocdn.com/video/489717884_590x332.jpg?r=pad')