Browse Source

PirateBay unit test + reactivation in Settings

Cqoicebordel 10 years ago
parent
commit
5a16077455

+ 8
- 4
searx/engines/piratebay.py View File

13
 from urllib import quote
13
 from urllib import quote
14
 from lxml import html
14
 from lxml import html
15
 from operator import itemgetter
15
 from operator import itemgetter
16
+from searx.engines.xpath import extract_text
16
 
17
 
17
 # engine dependent config
18
 # engine dependent config
18
 categories = ['videos', 'music', 'files']
19
 categories = ['videos', 'music', 'files']
29
 
30
 
30
 # specific xpath variables
31
 # specific xpath variables
31
 magnet_xpath = './/a[@title="Download this torrent using magnet"]'
32
 magnet_xpath = './/a[@title="Download this torrent using magnet"]'
32
-content_xpath = './/font[@class="detDesc"]//text()'
33
+torrent_xpath = './/a[@title="Download this torrent"]'
34
+content_xpath = './/font[@class="detDesc"]'
33
 
35
 
34
 
36
 
35
 # do search-request
37
 # do search-request
59
     for result in search_res[1:]:
61
     for result in search_res[1:]:
60
         link = result.xpath('.//div[@class="detName"]//a')[0]
62
         link = result.xpath('.//div[@class="detName"]//a')[0]
61
         href = urljoin(url, link.attrib.get('href'))
63
         href = urljoin(url, link.attrib.get('href'))
62
-        title = ' '.join(link.xpath('.//text()'))
63
-        content = escape(' '.join(result.xpath(content_xpath)))
64
+        title = extract_text(link)
65
+        content = escape(extract_text(result.xpath(content_xpath)))
64
         seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]
66
         seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]
65
 
67
 
66
         # convert seed to int if possible
68
         # convert seed to int if possible
76
             leech = 0
78
             leech = 0
77
 
79
 
78
         magnetlink = result.xpath(magnet_xpath)[0]
80
         magnetlink = result.xpath(magnet_xpath)[0]
81
+        torrentfile = result.xpath(torrent_xpath)[0]
79
 
82
 
80
         # append result
83
         # append result
81
         results.append({'url': href,
84
         results.append({'url': href,
83
                         'content': content,
86
                         'content': content,
84
                         'seed': seed,
87
                         'seed': seed,
85
                         'leech': leech,
88
                         'leech': leech,
86
-                        'magnetlink': magnetlink.attrib['href'],
89
+                        'magnetlink': magnetlink.attrib.get('href'),
90
+                        'torrentfile': torrentfile.attrib.get('href'),
87
                         'template': 'torrent.html'})
91
                         'template': 'torrent.html'})
88
 
92
 
89
     # return results sorted by seeder
93
     # return results sorted by seeder

+ 3
- 3
searx/settings.yml View File

152
     engine : photon
152
     engine : photon
153
     shortcut : ph
153
     shortcut : ph
154
 
154
 
155
-#  - name : piratebay
156
-#    engine : piratebay
157
-#    shortcut : tpb
155
+  - name : piratebay
156
+    engine : piratebay
157
+    shortcut : tpb
158
 
158
 
159
   - name : kickass
159
   - name : kickass
160
     engine : kickass
160
     engine : kickass

+ 137
- 0
searx/tests/engines/test_piratebay.py View File

1
+# -*- coding: utf-8 -*-
2
+from collections import defaultdict
3
+import mock
4
+from searx.engines import piratebay
5
+from searx.testing import SearxTestCase
6
+
7
+
8
+class TestPiratebayEngine(SearxTestCase):
9
+
10
+    def test_request(self):
11
+        query = 'test_query'
12
+        dicto = defaultdict(dict)
13
+        dicto['pageno'] = 1
14
+        dicto['category'] = 'Toto'
15
+        params = piratebay.request(query, dicto)
16
+        self.assertIn('url', params)
17
+        self.assertIn(query, params['url'])
18
+        self.assertIn('piratebay.cr', params['url'])
19
+        self.assertIn('0', params['url'])
20
+
21
+        dicto['category'] = 'music'
22
+        params = piratebay.request(query, dicto)
23
+        self.assertIn('100', params['url'])
24
+
25
+    def test_response(self):
26
+        self.assertRaises(AttributeError, piratebay.response, None)
27
+        self.assertRaises(AttributeError, piratebay.response, [])
28
+        self.assertRaises(AttributeError, piratebay.response, '')
29
+        self.assertRaises(AttributeError, piratebay.response, '[]')
30
+
31
+        response = mock.Mock(text='<html></html>')
32
+        self.assertEqual(piratebay.response(response), [])
33
+
34
+        html = """
35
+        <table id="searchResult">
36
+            <tr>
37
+            </tr>
38
+            <tr>
39
+                <td class="vertTh">
40
+                    <center>
41
+                        <a href="#" title="More from this category">Anime</a><br/>
42
+                        (<a href="#" title="More from this category">Anime</a>)
43
+                    </center>
44
+                </td>
45
+                <td>
46
+                    <div class="detName">
47
+                        <a href="/this.is.the.link" class="detLink" title="Title">
48
+                            This is the title
49
+                        </a>
50
+                    </div>
51
+                    <a href="magnet:?xt=urn:btih:MAGNETLINK" title="Download this torrent using magnet">
52
+                        <img src="/static/img/icon-magnet.gif" alt="Magnet link"/>
53
+                    </a>
54
+                    <a href="http://torcache.net/torrent/TORRENTFILE.torrent" title="Download this torrent">
55
+                        <img src="/static/img/dl.gif" class="dl" alt="Download"/>
56
+                    </a>
57
+                    <a href="/user/HorribleSubs">
58
+                        <img src="/static/img/vip.gif" alt="VIP" title="VIP" style="width:11px;" border='0'/>
59
+                    </a>
60
+                    <img src="/static/img/11x11p.png"/>
61
+                    <font class="detDesc">
62
+                        This is the content <span>and should be</span> OK
63
+                    </font>
64
+                </td>
65
+                <td align="right">13</td>
66
+                <td align="right">334</td>
67
+            </tr>
68
+        </table>
69
+        """
70
+        response = mock.Mock(text=html)
71
+        results = piratebay.response(response)
72
+        self.assertEqual(type(results), list)
73
+        self.assertEqual(len(results), 1)
74
+        self.assertEqual(results[0]['title'], 'This is the title')
75
+        self.assertEqual(results[0]['url'], 'https://thepiratebay.cr/this.is.the.link')
76
+        self.assertEqual(results[0]['content'], 'This is the content and should be OK')
77
+        self.assertEqual(results[0]['seed'], 13)
78
+        self.assertEqual(results[0]['leech'], 334)
79
+        self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:MAGNETLINK')
80
+        self.assertEqual(results[0]['torrentfile'], 'http://torcache.net/torrent/TORRENTFILE.torrent')
81
+
82
+        html = """
83
+        <table id="searchResult">
84
+            <tr>
85
+            </tr>
86
+            <tr>
87
+                <td class="vertTh">
88
+                    <center>
89
+                        <a href="#" title="More from this category">Anime</a><br/>
90
+                        (<a href="#" title="More from this category">Anime</a>)
91
+                    </center>
92
+                </td>
93
+                <td>
94
+                    <div class="detName">
95
+                        <a href="/this.is.the.link" class="detLink" title="Title">
96
+                            This is the title
97
+                        </a>
98
+                    </div>
99
+                    <a href="magnet:?xt=urn:btih:MAGNETLINK" title="Download this torrent using magnet">
100
+                        <img src="/static/img/icon-magnet.gif" alt="Magnet link"/>
101
+                    </a>
102
+                    <a href="http://torcache.net/torrent/TORRENTFILE.torrent" title="Download this torrent">
103
+                        <img src="/static/img/dl.gif" class="dl" alt="Download"/>
104
+                    </a>
105
+                    <a href="/user/HorribleSubs">
106
+                        <img src="/static/img/vip.gif" alt="VIP" title="VIP" style="width:11px;" border='0'/>
107
+                    </a>
108
+                    <img src="/static/img/11x11p.png"/>
109
+                    <font class="detDesc">
110
+                        This is the content <span>and should be</span> OK
111
+                    </font>
112
+                </td>
113
+                <td align="right">s</td>
114
+                <td align="right">d</td>
115
+            </tr>
116
+        </table>
117
+        """
118
+        response = mock.Mock(text=html)
119
+        results = piratebay.response(response)
120
+        self.assertEqual(type(results), list)
121
+        self.assertEqual(len(results), 1)
122
+        self.assertEqual(results[0]['title'], 'This is the title')
123
+        self.assertEqual(results[0]['url'], 'https://thepiratebay.cr/this.is.the.link')
124
+        self.assertEqual(results[0]['content'], 'This is the content and should be OK')
125
+        self.assertEqual(results[0]['seed'], 0)
126
+        self.assertEqual(results[0]['leech'], 0)
127
+        self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:MAGNETLINK')
128
+        self.assertEqual(results[0]['torrentfile'], 'http://torcache.net/torrent/TORRENTFILE.torrent')
129
+
130
+        html = """
131
+        <table id="searchResult">
132
+        </table>
133
+        """
134
+        response = mock.Mock(text=html)
135
+        results = piratebay.response(response)
136
+        self.assertEqual(type(results), list)
137
+        self.assertEqual(len(results), 0)

+ 1
- 0
searx/tests/test_engines.py View File

14
 from searx.tests.engines.test_google_news import *  # noqa
14
 from searx.tests.engines.test_google_news import *  # noqa
15
 from searx.tests.engines.test_kickass import *  # noqa
15
 from searx.tests.engines.test_kickass import *  # noqa
16
 from searx.tests.engines.test_mixcloud import *  # noqa
16
 from searx.tests.engines.test_mixcloud import *  # noqa
17
+from searx.tests.engines.test_piratebay import *  # noqa
17
 from searx.tests.engines.test_searchcode_code import *  # noqa
18
 from searx.tests.engines.test_searchcode_code import *  # noqa
18
 from searx.tests.engines.test_searchcode_doc import *  # noqa
19
 from searx.tests.engines.test_searchcode_doc import *  # noqa
19
 from searx.tests.engines.test_soundcloud import *  # noqa
20
 from searx.tests.engines.test_soundcloud import *  # noqa