浏览代码

PirateBay unit test + reactivation in Settings

Cqoicebordel 10 年前
父节点
当前提交
5a16077455
共有 4 个文件被更改,包括 149 次插入7 次删除
  1. 8
    4
      searx/engines/piratebay.py
  2. 3
    3
      searx/settings.yml
  3. 137
    0
      searx/tests/engines/test_piratebay.py
  4. 1
    0
      searx/tests/test_engines.py

+ 8
- 4
searx/engines/piratebay.py 查看文件

13
 from urllib import quote
13
 from urllib import quote
14
 from lxml import html
14
 from lxml import html
15
 from operator import itemgetter
15
 from operator import itemgetter
16
+from searx.engines.xpath import extract_text
16
 
17
 
17
 # engine dependent config
18
 # engine dependent config
18
 categories = ['videos', 'music', 'files']
19
 categories = ['videos', 'music', 'files']
29
 
30
 
30
 # specific xpath variables
31
 # specific xpath variables
31
 magnet_xpath = './/a[@title="Download this torrent using magnet"]'
32
 magnet_xpath = './/a[@title="Download this torrent using magnet"]'
32
-content_xpath = './/font[@class="detDesc"]//text()'
33
+torrent_xpath = './/a[@title="Download this torrent"]'
34
+content_xpath = './/font[@class="detDesc"]'
33
 
35
 
34
 
36
 
35
 # do search-request
37
 # do search-request
59
     for result in search_res[1:]:
61
     for result in search_res[1:]:
60
         link = result.xpath('.//div[@class="detName"]//a')[0]
62
         link = result.xpath('.//div[@class="detName"]//a')[0]
61
         href = urljoin(url, link.attrib.get('href'))
63
         href = urljoin(url, link.attrib.get('href'))
62
-        title = ' '.join(link.xpath('.//text()'))
63
-        content = escape(' '.join(result.xpath(content_xpath)))
64
+        title = extract_text(link)
65
+        content = escape(extract_text(result.xpath(content_xpath)))
64
         seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]
66
         seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]
65
 
67
 
66
         # convert seed to int if possible
68
         # convert seed to int if possible
76
             leech = 0
78
             leech = 0
77
 
79
 
78
         magnetlink = result.xpath(magnet_xpath)[0]
80
         magnetlink = result.xpath(magnet_xpath)[0]
81
+        torrentfile = result.xpath(torrent_xpath)[0]
79
 
82
 
80
         # append result
83
         # append result
81
         results.append({'url': href,
84
         results.append({'url': href,
83
                         'content': content,
86
                         'content': content,
84
                         'seed': seed,
87
                         'seed': seed,
85
                         'leech': leech,
88
                         'leech': leech,
86
-                        'magnetlink': magnetlink.attrib['href'],
89
+                        'magnetlink': magnetlink.attrib.get('href'),
90
+                        'torrentfile': torrentfile.attrib.get('href'),
87
                         'template': 'torrent.html'})
91
                         'template': 'torrent.html'})
88
 
92
 
89
     # return results sorted by seeder
93
     # return results sorted by seeder

+ 3
- 3
searx/settings.yml 查看文件

152
     engine : photon
152
     engine : photon
153
     shortcut : ph
153
     shortcut : ph
154
 
154
 
155
-#  - name : piratebay
156
-#    engine : piratebay
157
-#    shortcut : tpb
155
+  - name : piratebay
156
+    engine : piratebay
157
+    shortcut : tpb
158
 
158
 
159
   - name : kickass
159
   - name : kickass
160
     engine : kickass
160
     engine : kickass

+ 137
- 0
searx/tests/engines/test_piratebay.py 查看文件

1
+# -*- coding: utf-8 -*-
2
+from collections import defaultdict
3
+import mock
4
+from searx.engines import piratebay
5
+from searx.testing import SearxTestCase
6
+
7
+
8
+class TestPiratebayEngine(SearxTestCase):
9
+
10
+    def test_request(self):
11
+        query = 'test_query'
12
+        dicto = defaultdict(dict)
13
+        dicto['pageno'] = 1
14
+        dicto['category'] = 'Toto'
15
+        params = piratebay.request(query, dicto)
16
+        self.assertIn('url', params)
17
+        self.assertIn(query, params['url'])
18
+        self.assertIn('piratebay.cr', params['url'])
19
+        self.assertIn('0', params['url'])
20
+
21
+        dicto['category'] = 'music'
22
+        params = piratebay.request(query, dicto)
23
+        self.assertIn('100', params['url'])
24
+
25
+    def test_response(self):
26
+        self.assertRaises(AttributeError, piratebay.response, None)
27
+        self.assertRaises(AttributeError, piratebay.response, [])
28
+        self.assertRaises(AttributeError, piratebay.response, '')
29
+        self.assertRaises(AttributeError, piratebay.response, '[]')
30
+
31
+        response = mock.Mock(text='<html></html>')
32
+        self.assertEqual(piratebay.response(response), [])
33
+
34
+        html = """
35
+        <table id="searchResult">
36
+            <tr>
37
+            </tr>
38
+            <tr>
39
+                <td class="vertTh">
40
+                    <center>
41
+                        <a href="#" title="More from this category">Anime</a><br/>
42
+                        (<a href="#" title="More from this category">Anime</a>)
43
+                    </center>
44
+                </td>
45
+                <td>
46
+                    <div class="detName">
47
+                        <a href="/this.is.the.link" class="detLink" title="Title">
48
+                            This is the title
49
+                        </a>
50
+                    </div>
51
+                    <a href="magnet:?xt=urn:btih:MAGNETLINK" title="Download this torrent using magnet">
52
+                        <img src="/static/img/icon-magnet.gif" alt="Magnet link"/>
53
+                    </a>
54
+                    <a href="http://torcache.net/torrent/TORRENTFILE.torrent" title="Download this torrent">
55
+                        <img src="/static/img/dl.gif" class="dl" alt="Download"/>
56
+                    </a>
57
+                    <a href="/user/HorribleSubs">
58
+                        <img src="/static/img/vip.gif" alt="VIP" title="VIP" style="width:11px;" border='0'/>
59
+                    </a>
60
+                    <img src="/static/img/11x11p.png"/>
61
+                    <font class="detDesc">
62
+                        This is the content <span>and should be</span> OK
63
+                    </font>
64
+                </td>
65
+                <td align="right">13</td>
66
+                <td align="right">334</td>
67
+            </tr>
68
+        </table>
69
+        """
70
+        response = mock.Mock(text=html)
71
+        results = piratebay.response(response)
72
+        self.assertEqual(type(results), list)
73
+        self.assertEqual(len(results), 1)
74
+        self.assertEqual(results[0]['title'], 'This is the title')
75
+        self.assertEqual(results[0]['url'], 'https://thepiratebay.cr/this.is.the.link')
76
+        self.assertEqual(results[0]['content'], 'This is the content and should be OK')
77
+        self.assertEqual(results[0]['seed'], 13)
78
+        self.assertEqual(results[0]['leech'], 334)
79
+        self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:MAGNETLINK')
80
+        self.assertEqual(results[0]['torrentfile'], 'http://torcache.net/torrent/TORRENTFILE.torrent')
81
+
82
+        html = """
83
+        <table id="searchResult">
84
+            <tr>
85
+            </tr>
86
+            <tr>
87
+                <td class="vertTh">
88
+                    <center>
89
+                        <a href="#" title="More from this category">Anime</a><br/>
90
+                        (<a href="#" title="More from this category">Anime</a>)
91
+                    </center>
92
+                </td>
93
+                <td>
94
+                    <div class="detName">
95
+                        <a href="/this.is.the.link" class="detLink" title="Title">
96
+                            This is the title
97
+                        </a>
98
+                    </div>
99
+                    <a href="magnet:?xt=urn:btih:MAGNETLINK" title="Download this torrent using magnet">
100
+                        <img src="/static/img/icon-magnet.gif" alt="Magnet link"/>
101
+                    </a>
102
+                    <a href="http://torcache.net/torrent/TORRENTFILE.torrent" title="Download this torrent">
103
+                        <img src="/static/img/dl.gif" class="dl" alt="Download"/>
104
+                    </a>
105
+                    <a href="/user/HorribleSubs">
106
+                        <img src="/static/img/vip.gif" alt="VIP" title="VIP" style="width:11px;" border='0'/>
107
+                    </a>
108
+                    <img src="/static/img/11x11p.png"/>
109
+                    <font class="detDesc">
110
+                        This is the content <span>and should be</span> OK
111
+                    </font>
112
+                </td>
113
+                <td align="right">s</td>
114
+                <td align="right">d</td>
115
+            </tr>
116
+        </table>
117
+        """
118
+        response = mock.Mock(text=html)
119
+        results = piratebay.response(response)
120
+        self.assertEqual(type(results), list)
121
+        self.assertEqual(len(results), 1)
122
+        self.assertEqual(results[0]['title'], 'This is the title')
123
+        self.assertEqual(results[0]['url'], 'https://thepiratebay.cr/this.is.the.link')
124
+        self.assertEqual(results[0]['content'], 'This is the content and should be OK')
125
+        self.assertEqual(results[0]['seed'], 0)
126
+        self.assertEqual(results[0]['leech'], 0)
127
+        self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:MAGNETLINK')
128
+        self.assertEqual(results[0]['torrentfile'], 'http://torcache.net/torrent/TORRENTFILE.torrent')
129
+
130
+        html = """
131
+        <table id="searchResult">
132
+        </table>
133
+        """
134
+        response = mock.Mock(text=html)
135
+        results = piratebay.response(response)
136
+        self.assertEqual(type(results), list)
137
+        self.assertEqual(len(results), 0)

+ 1
- 0
searx/tests/test_engines.py 查看文件

14
 from searx.tests.engines.test_google_news import *  # noqa
14
 from searx.tests.engines.test_google_news import *  # noqa
15
 from searx.tests.engines.test_kickass import *  # noqa
15
 from searx.tests.engines.test_kickass import *  # noqa
16
 from searx.tests.engines.test_mixcloud import *  # noqa
16
 from searx.tests.engines.test_mixcloud import *  # noqa
17
+from searx.tests.engines.test_piratebay import *  # noqa
17
 from searx.tests.engines.test_searchcode_code import *  # noqa
18
 from searx.tests.engines.test_searchcode_code import *  # noqa
18
 from searx.tests.engines.test_searchcode_doc import *  # noqa
19
 from searx.tests.engines.test_searchcode_doc import *  # noqa
19
 from searx.tests.engines.test_soundcloud import *  # noqa
20
 from searx.tests.engines.test_soundcloud import *  # noqa