浏览代码

Adds two engines : Youtube with or without API

The API needs an API_KEY
The NOAPI doesn't have the published dates.
Cqoicebordel 10 年前
父节点
当前提交
f965c97822

+ 83
- 0
searx/engines/youtube_api.py 查看文件

@@ -0,0 +1,83 @@
1
+# Youtube (Videos)
2
+#
3
+# @website     https://www.youtube.com/
4
+# @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list)
5
+#
6
+# @using-api   yes
7
+# @results     JSON
8
+# @stable      yes
9
+# @parse       url, title, content, publishedDate, thumbnail, embedded
10
+
11
+from json import loads
12
+from urllib import urlencode
13
+from dateutil import parser
14
+
15
+# engine dependent config
16
+categories = ['videos', 'music']
17
+paging = False
18
+language_support = True
19
+api_key = None
20
+
21
+# search-url
22
+base_url = 'https://www.googleapis.com/youtube/v3/search'
23
+search_url = base_url + '?part=snippet&{query}&maxResults=20&key={api_key}'
24
+
25
+embedded_url = '<iframe width="540" height="304" ' +\
26
+    'data-src="//www.youtube-nocookie.com/embed/{videoid}" ' +\
27
+    'frameborder="0" allowfullscreen></iframe>'
28
+
29
+base_youtube_url = 'https://www.youtube.com/watch?v='
30
+
31
+
32
+# do search-request
33
+def request(query, params):
34
+    params['url'] = search_url.format(query=urlencode({'q': query}),
35
+                                      api_key=api_key)
36
+
37
+    # add language tag if specified
38
+    if params['language'] != 'all':
39
+        params['url'] += '&relevanceLanguage=' + params['language'].split('_')[0]
40
+
41
+    return params
42
+
43
+
44
+# get response from search-request
45
+def response(resp):
46
+    results = []
47
+
48
+    search_results = loads(resp.text)
49
+
50
+    # return empty array if there are no results
51
+    if 'items' not in search_results:
52
+        return []
53
+
54
+    # parse results
55
+    for result in search_results['items']:
56
+        videoid = result['id']['videoId']
57
+
58
+        title = result['snippet']['title']
59
+        content = ''
60
+        thumbnail = ''
61
+
62
+        pubdate = result['snippet']['publishedAt']
63
+        publishedDate = parser.parse(pubdate)
64
+
65
+        thumbnail = result['snippet']['thumbnails']['high']['url']
66
+
67
+        content = result['snippet']['description']
68
+
69
+        url = base_youtube_url + videoid
70
+
71
+        embedded = embedded_url.format(videoid=videoid)
72
+
73
+        # append result
74
+        results.append({'url': url,
75
+                        'title': title,
76
+                        'content': content,
77
+                        'template': 'videos.html',
78
+                        'publishedDate': publishedDate,
79
+                        'embedded': embedded,
80
+                        'thumbnail': thumbnail})
81
+
82
+    # return results
83
+    return results

+ 72
- 0
searx/engines/youtube_noapi.py 查看文件

@@ -0,0 +1,72 @@
1
+# Youtube (Videos)
2
+#
3
+# @website     https://www.youtube.com/
4
+# @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list)
5
+#
6
+# @using-api   no
7
+# @results     HTML
8
+# @stable      no
9
+# @parse       url, title, content, publishedDate, thumbnail, embedded
10
+
11
+from urllib import quote_plus
12
+from lxml import html
13
+from searx.engines.xpath import extract_text
14
+
15
+# engine dependent config
16
+categories = ['videos', 'music']
17
+paging = True
18
+language_support = False
19
+
20
+# search-url
21
+base_url = 'https://www.youtube.com/results'
22
+search_url = base_url + '?search_query={query}&page={page}'
23
+
24
+embedded_url = '<iframe width="540" height="304" ' +\
25
+    'data-src="//www.youtube-nocookie.com/embed/{videoid}" ' +\
26
+    'frameborder="0" allowfullscreen></iframe>'
27
+
28
+base_youtube_url = 'https://www.youtube.com/watch?v='
29
+
30
+# specific xpath variables
31
+results_xpath = "//ol/li/div[contains(@class, 'yt-lockup yt-lockup-tile yt-lockup-video vve-check')]"
32
+url_xpath = './/h3/a/@href'
33
+title_xpath = './/div[@class="yt-lockup-content"]/h3/a'
34
+content_xpath = './/div[@class="yt-lockup-content"]/div[@class="yt-lockup-description yt-ui-ellipsis yt-ui-ellipsis-2"]'
35
+
36
+
37
+# do search-request
38
+def request(query, params):
39
+    params['url'] = search_url.format(query=quote_plus(query),
40
+                                      page=params['pageno'])
41
+
42
+    return params
43
+
44
+
45
+# get response from search-request
46
+def response(resp):
47
+    results = []
48
+
49
+    dom = html.fromstring(resp.text)
50
+
51
+    # parse results
52
+    for result in dom.xpath(results_xpath):
53
+        videoid = result.xpath('@data-context-item-id')[0]
54
+
55
+        url = base_youtube_url + videoid
56
+        thumbnail = 'https://i.ytimg.com/vi/' + videoid + '/hqdefault.jpg'
57
+
58
+        title = extract_text(result.xpath(title_xpath)[0])
59
+        content = extract_text(result.xpath(content_xpath)[0])
60
+
61
+        embedded = embedded_url.format(videoid=videoid)
62
+
63
+        # append result
64
+        results.append({'url': url,
65
+                        'title': title,
66
+                        'content': content,
67
+                        'template': 'videos.html',
68
+                        'embedded': embedded,
69
+                        'thumbnail': thumbnail})
70
+
71
+    # return results
72
+    return results

+ 6
- 1
searx/settings.yml 查看文件

@@ -242,8 +242,13 @@ engines:
242 242
     shortcut : yhn
243 243
 
244 244
   - name : youtube
245
-    engine : youtube
246 245
     shortcut : yt
246
+    # You can use the engine using the official stable API, but you need an API key
247
+    # See : https://console.developers.google.com/project
248
+    #    engine : youtube_api
249
+    #    api_key: 'apikey' # required!
250
+    # Or you can use the html non-stable engine, activated by default
251
+    engine : youtube_noapi
247 252
 
248 253
   - name : dailymotion
249 254
     engine : dailymotion

+ 111
- 0
searx/tests/engines/test_youtube_api.py 查看文件

@@ -0,0 +1,111 @@
1
+from collections import defaultdict
2
+import mock
3
+from searx.engines import youtube_api
4
+from searx.testing import SearxTestCase
5
+
6
+
7
+class TestYoutubeAPIEngine(SearxTestCase):
8
+
9
+    def test_request(self):
10
+        query = 'test_query'
11
+        dicto = defaultdict(dict)
12
+        dicto['pageno'] = 0
13
+        dicto['language'] = 'fr_FR'
14
+        params = youtube_api.request(query, dicto)
15
+        self.assertTrue('url' in params)
16
+        self.assertTrue(query in params['url'])
17
+        self.assertIn('googleapis.com', params['url'])
18
+        self.assertIn('youtube', params['url'])
19
+        self.assertIn('fr', params['url'])
20
+
21
+        dicto['language'] = 'all'
22
+        params = youtube_api.request(query, dicto)
23
+        self.assertFalse('fr' in params['url'])
24
+
25
+    def test_response(self):
26
+        self.assertRaises(AttributeError, youtube_api.response, None)
27
+        self.assertRaises(AttributeError, youtube_api.response, [])
28
+        self.assertRaises(AttributeError, youtube_api.response, '')
29
+        self.assertRaises(AttributeError, youtube_api.response, '[]')
30
+
31
+        response = mock.Mock(text='{}')
32
+        self.assertEqual(youtube_api.response(response), [])
33
+
34
+        response = mock.Mock(text='{"data": []}')
35
+        self.assertEqual(youtube_api.response(response), [])
36
+
37
+        json = """
38
+        {
39
+         "kind": "youtube#searchListResponse",
40
+         "etag": "xmg9xJZuZD438sF4hb-VcBBREXc/YJQDcTBCDcaBvl-sRZJoXdvy1ME",
41
+         "nextPageToken": "CAUQAA",
42
+         "pageInfo": {
43
+          "totalResults": 1000000,
44
+          "resultsPerPage": 20
45
+         },
46
+         "items": [
47
+          {
48
+           "kind": "youtube#searchResult",
49
+           "etag": "xmg9xJZuZD438sF4hb-VcBBREXc/IbLO64BMhbHIgWLwLw7MDYe7Hs4",
50
+           "id": {
51
+            "kind": "youtube#video",
52
+            "videoId": "DIVZCPfAOeM"
53
+           },
54
+           "snippet": {
55
+            "publishedAt": "2015-05-29T22:41:04.000Z",
56
+            "channelId": "UCNodmx1ERIjKqvcJLtdzH5Q",
57
+            "title": "Title",
58
+            "description": "Description",
59
+            "thumbnails": {
60
+             "default": {
61
+              "url": "https://i.ytimg.com/vi/DIVZCPfAOeM/default.jpg"
62
+             },
63
+             "medium": {
64
+              "url": "https://i.ytimg.com/vi/DIVZCPfAOeM/mqdefault.jpg"
65
+             },
66
+             "high": {
67
+              "url": "https://i.ytimg.com/vi/DIVZCPfAOeM/hqdefault.jpg"
68
+             }
69
+            },
70
+            "channelTitle": "MinecraftUniverse",
71
+            "liveBroadcastContent": "none"
72
+           }
73
+          }
74
+          ]
75
+        }
76
+        """
77
+        response = mock.Mock(text=json)
78
+        results = youtube_api.response(response)
79
+        self.assertEqual(type(results), list)
80
+        self.assertEqual(len(results), 1)
81
+        self.assertEqual(results[0]['title'], 'Title')
82
+        self.assertEqual(results[0]['url'], 'https://www.youtube.com/watch?v=DIVZCPfAOeM')
83
+        self.assertEqual(results[0]['content'], 'Description')
84
+        self.assertEqual(results[0]['thumbnail'], 'https://i.ytimg.com/vi/DIVZCPfAOeM/hqdefault.jpg')
85
+        self.assertTrue('DIVZCPfAOeM' in results[0]['embedded'])
86
+
87
+        json = """
88
+        {
89
+         "kind": "youtube#searchListResponse",
90
+         "etag": "xmg9xJZuZD438sF4hb-VcBBREXc/YJQDcTBCDcaBvl-sRZJoXdvy1ME",
91
+         "nextPageToken": "CAUQAA",
92
+         "pageInfo": {
93
+          "totalResults": 1000000,
94
+          "resultsPerPage": 20
95
+         }
96
+        }
97
+        """
98
+        response = mock.Mock(text=json)
99
+        results = youtube_api.response(response)
100
+        self.assertEqual(type(results), list)
101
+        self.assertEqual(len(results), 0)
102
+
103
+        json = """
104
+        {"toto":{"entry":[]
105
+        }
106
+        }
107
+        """
108
+        response = mock.Mock(text=json)
109
+        results = youtube_api.response(response)
110
+        self.assertEqual(type(results), list)
111
+        self.assertEqual(len(results), 0)

+ 103
- 0
searx/tests/engines/test_youtube_noapi.py 查看文件

@@ -0,0 +1,103 @@
1
+# -*- coding: utf-8 -*-
2
+from collections import defaultdict
3
+import mock
4
+from searx.engines import youtube_noapi
5
+from searx.testing import SearxTestCase
6
+
7
+
8
+class TestYoutubeNoAPIEngine(SearxTestCase):
9
+
10
+    def test_request(self):
11
+        query = 'test_query'
12
+        dicto = defaultdict(dict)
13
+        dicto['pageno'] = 0
14
+        params = youtube_noapi.request(query, dicto)
15
+        self.assertIn('url', params)
16
+        self.assertIn(query, params['url'])
17
+        self.assertIn('youtube.com', params['url'])
18
+
19
+    def test_response(self):
20
+        self.assertRaises(AttributeError, youtube_noapi.response, None)
21
+        self.assertRaises(AttributeError, youtube_noapi.response, [])
22
+        self.assertRaises(AttributeError, youtube_noapi.response, '')
23
+        self.assertRaises(AttributeError, youtube_noapi.response, '[]')
24
+
25
+        response = mock.Mock(text='<html></html>')
26
+        self.assertEqual(youtube_noapi.response(response), [])
27
+
28
+        html = """
29
+        <ol id="item-section-063864" class="item-section">
30
+            <li>
31
+                <div class="yt-lockup yt-lockup-tile yt-lockup-video vve-check clearfix yt-uix-tile"
32
+                data-context-item-id="DIVZCPfAOeM"
33
+                data-visibility-tracking="CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JECx_-GK5uqMpcIB">
34
+                <div class="yt-lockup-dismissable"><div class="yt-lockup-thumbnail contains-addto">
35
+                <a aria-hidden="true" href="/watch?v=DIVZCPfAOeM" class=" yt-uix-sessionlink pf-link"
36
+                data-sessionlink="itct=CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JFIEdGVzdA">
37
+                <div class="yt-thumb video-thumb"><img src="//i.ytimg.com/vi/DIVZCPfAOeM/mqdefault.jpg"
38
+                width="196" height="110"/></div><span class="video-time" aria-hidden="true">11:35</span></a>
39
+                <span class="thumb-menu dark-overflow-action-menu video-actions">
40
+                </span>
41
+                </div>
42
+                <div class="yt-lockup-content">
43
+                <h3 class="yt-lockup-title">
44
+                <a href="/watch?v=DIVZCPfAOeM"
45
+                class="yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink spf-link"
46
+                data-sessionlink="itct=CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JFIEdGVzdA"
47
+                title="Top Speed Test Kawasaki Ninja H2 (Thailand) By. MEHAY SUPERBIKE"
48
+                aria-describedby="description-id-259079" rel="spf-prefetch" dir="ltr">
49
+                Title
50
+                </a>
51
+                <span class="accessible-description" id="description-id-259079"> - Durée : 11:35.</span>
52
+                </h3>
53
+                <div class="yt-lockup-byline">de
54
+                <a href="/user/mheejapan" class=" yt-uix-sessionlink spf-link g-hovercard"
55
+                data-sessionlink="itct=CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JA" data-ytid="UCzEesu54Hjs0uRKmpy66qeA"
56
+                data-name="">MEHAY SUPERBIKE</a></div><div class="yt-lockup-meta">
57
+                <ul class="yt-lockup-meta-info">
58
+                    <li>il y a 20 heures</li>
59
+                    <li>8 424 vues</li>
60
+                </ul>
61
+                </div>
62
+                <div class="yt-lockup-description yt-ui-ellipsis yt-ui-ellipsis-2" dir="ltr">
63
+                Description
64
+                </div>
65
+                <div class="yt-lockup-badges">
66
+                <ul class="yt-badge-list ">
67
+                    <li class="yt-badge-item" >
68
+                        <span class="yt-badge">Nouveauté</span>
69
+                    </li>
70
+                    <li class="yt-badge-item" ><span class="yt-badge " >HD</span></li>
71
+                </ul>
72
+                </div>
73
+                <div class="yt-lockup-action-menu yt-uix-menu-container">
74
+                <div class="yt-uix-menu yt-uix-videoactionmenu hide-until-delayloaded"
75
+                data-video-id="DIVZCPfAOeM" data-menu-content-id="yt-uix-videoactionmenu-menu">
76
+                </div>
77
+                </div>
78
+                </div>
79
+                </div>
80
+                </div>
81
+            </li>
82
+        </ol>
83
+        """
84
+        response = mock.Mock(text=html)
85
+        results = youtube_noapi.response(response)
86
+        self.assertEqual(type(results), list)
87
+        self.assertEqual(len(results), 1)
88
+        self.assertEqual(results[0]['title'], 'Title')
89
+        self.assertEqual(results[0]['url'], 'https://www.youtube.com/watch?v=DIVZCPfAOeM')
90
+        self.assertEqual(results[0]['content'], 'Description')
91
+        self.assertEqual(results[0]['thumbnail'], 'https://i.ytimg.com/vi/DIVZCPfAOeM/hqdefault.jpg')
92
+        self.assertTrue('DIVZCPfAOeM' in results[0]['embedded'])
93
+
94
+        html = """
95
+        <ol id="item-section-063864" class="item-section">
96
+            <li>
97
+            </li>
98
+        </ol>
99
+        """
100
+        response = mock.Mock(text=html)
101
+        results = youtube_noapi.response(response)
102
+        self.assertEqual(type(results), list)
103
+        self.assertEqual(len(results), 0)

+ 2
- 0
searx/tests/test_engines.py 查看文件

@@ -39,4 +39,6 @@ from searx.tests.engines.test_www500px import *  # noqa
39 39
 from searx.tests.engines.test_yacy import *  # noqa
40 40
 from searx.tests.engines.test_yahoo import *  # noqa
41 41
 from searx.tests.engines.test_youtube import *  # noqa
42
+from searx.tests.engines.test_youtube_api import *  # noqa
43
+from searx.tests.engines.test_youtube_noapi import *  # noqa
42 44
 from searx.tests.engines.test_yahoo_news import *  # noqa