Browse Source

PirateBay unit test + reactivation in Settings

Cqoicebordel 10 years ago
parent
commit
5a16077455

+ 8
- 4
searx/engines/piratebay.py View File

@@ -13,6 +13,7 @@ from cgi import escape
13 13
 from urllib import quote
14 14
 from lxml import html
15 15
 from operator import itemgetter
16
+from searx.engines.xpath import extract_text
16 17
 
17 18
 # engine dependent config
18 19
 categories = ['videos', 'music', 'files']
@@ -29,7 +30,8 @@ search_types = {'files': '0',
29 30
 
30 31
 # specific xpath variables
31 32
 magnet_xpath = './/a[@title="Download this torrent using magnet"]'
32
-content_xpath = './/font[@class="detDesc"]//text()'
33
+torrent_xpath = './/a[@title="Download this torrent"]'
34
+content_xpath = './/font[@class="detDesc"]'
33 35
 
34 36
 
35 37
 # do search-request
@@ -59,8 +61,8 @@ def response(resp):
59 61
     for result in search_res[1:]:
60 62
         link = result.xpath('.//div[@class="detName"]//a')[0]
61 63
         href = urljoin(url, link.attrib.get('href'))
62
-        title = ' '.join(link.xpath('.//text()'))
63
-        content = escape(' '.join(result.xpath(content_xpath)))
64
+        title = extract_text(link)
65
+        content = escape(extract_text(result.xpath(content_xpath)))
64 66
         seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]
65 67
 
66 68
         # convert seed to int if possible
@@ -76,6 +78,7 @@ def response(resp):
76 78
             leech = 0
77 79
 
78 80
         magnetlink = result.xpath(magnet_xpath)[0]
81
+        torrentfile = result.xpath(torrent_xpath)[0]
79 82
 
80 83
         # append result
81 84
         results.append({'url': href,
@@ -83,7 +86,8 @@ def response(resp):
83 86
                         'content': content,
84 87
                         'seed': seed,
85 88
                         'leech': leech,
86
-                        'magnetlink': magnetlink.attrib['href'],
89
+                        'magnetlink': magnetlink.attrib.get('href'),
90
+                        'torrentfile': torrentfile.attrib.get('href'),
87 91
                         'template': 'torrent.html'})
88 92
 
89 93
     # return results sorted by seeder

+ 3
- 3
searx/settings.yml View File

@@ -152,9 +152,9 @@ engines:
152 152
     engine : photon
153 153
     shortcut : ph
154 154
 
155
-#  - name : piratebay
156
-#    engine : piratebay
157
-#    shortcut : tpb
155
+  - name : piratebay
156
+    engine : piratebay
157
+    shortcut : tpb
158 158
 
159 159
   - name : kickass
160 160
     engine : kickass

+ 137
- 0
searx/tests/engines/test_piratebay.py View File

@@ -0,0 +1,137 @@
1
+# -*- coding: utf-8 -*-
2
+from collections import defaultdict
3
+import mock
4
+from searx.engines import piratebay
5
+from searx.testing import SearxTestCase
6
+
7
+
8
+class TestPiratebayEngine(SearxTestCase):
9
+
10
+    def test_request(self):
11
+        query = 'test_query'
12
+        dicto = defaultdict(dict)
13
+        dicto['pageno'] = 1
14
+        dicto['category'] = 'Toto'
15
+        params = piratebay.request(query, dicto)
16
+        self.assertIn('url', params)
17
+        self.assertIn(query, params['url'])
18
+        self.assertIn('piratebay.cr', params['url'])
19
+        self.assertIn('0', params['url'])
20
+
21
+        dicto['category'] = 'music'
22
+        params = piratebay.request(query, dicto)
23
+        self.assertIn('100', params['url'])
24
+
25
+    def test_response(self):
26
+        self.assertRaises(AttributeError, piratebay.response, None)
27
+        self.assertRaises(AttributeError, piratebay.response, [])
28
+        self.assertRaises(AttributeError, piratebay.response, '')
29
+        self.assertRaises(AttributeError, piratebay.response, '[]')
30
+
31
+        response = mock.Mock(text='<html></html>')
32
+        self.assertEqual(piratebay.response(response), [])
33
+
34
+        html = """
35
+        <table id="searchResult">
36
+            <tr>
37
+            </tr>
38
+            <tr>
39
+                <td class="vertTh">
40
+                    <center>
41
+                        <a href="#" title="More from this category">Anime</a><br/>
42
+                        (<a href="#" title="More from this category">Anime</a>)
43
+                    </center>
44
+                </td>
45
+                <td>
46
+                    <div class="detName">
47
+                        <a href="/this.is.the.link" class="detLink" title="Title">
48
+                            This is the title
49
+                        </a>
50
+                    </div>
51
+                    <a href="magnet:?xt=urn:btih:MAGNETLINK" title="Download this torrent using magnet">
52
+                        <img src="/static/img/icon-magnet.gif" alt="Magnet link"/>
53
+                    </a>
54
+                    <a href="http://torcache.net/torrent/TORRENTFILE.torrent" title="Download this torrent">
55
+                        <img src="/static/img/dl.gif" class="dl" alt="Download"/>
56
+                    </a>
57
+                    <a href="/user/HorribleSubs">
58
+                        <img src="/static/img/vip.gif" alt="VIP" title="VIP" style="width:11px;" border='0'/>
59
+                    </a>
60
+                    <img src="/static/img/11x11p.png"/>
61
+                    <font class="detDesc">
62
+                        This is the content <span>and should be</span> OK
63
+                    </font>
64
+                </td>
65
+                <td align="right">13</td>
66
+                <td align="right">334</td>
67
+            </tr>
68
+        </table>
69
+        """
70
+        response = mock.Mock(text=html)
71
+        results = piratebay.response(response)
72
+        self.assertEqual(type(results), list)
73
+        self.assertEqual(len(results), 1)
74
+        self.assertEqual(results[0]['title'], 'This is the title')
75
+        self.assertEqual(results[0]['url'], 'https://thepiratebay.cr/this.is.the.link')
76
+        self.assertEqual(results[0]['content'], 'This is the content and should be OK')
77
+        self.assertEqual(results[0]['seed'], 13)
78
+        self.assertEqual(results[0]['leech'], 334)
79
+        self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:MAGNETLINK')
80
+        self.assertEqual(results[0]['torrentfile'], 'http://torcache.net/torrent/TORRENTFILE.torrent')
81
+
82
+        html = """
83
+        <table id="searchResult">
84
+            <tr>
85
+            </tr>
86
+            <tr>
87
+                <td class="vertTh">
88
+                    <center>
89
+                        <a href="#" title="More from this category">Anime</a><br/>
90
+                        (<a href="#" title="More from this category">Anime</a>)
91
+                    </center>
92
+                </td>
93
+                <td>
94
+                    <div class="detName">
95
+                        <a href="/this.is.the.link" class="detLink" title="Title">
96
+                            This is the title
97
+                        </a>
98
+                    </div>
99
+                    <a href="magnet:?xt=urn:btih:MAGNETLINK" title="Download this torrent using magnet">
100
+                        <img src="/static/img/icon-magnet.gif" alt="Magnet link"/>
101
+                    </a>
102
+                    <a href="http://torcache.net/torrent/TORRENTFILE.torrent" title="Download this torrent">
103
+                        <img src="/static/img/dl.gif" class="dl" alt="Download"/>
104
+                    </a>
105
+                    <a href="/user/HorribleSubs">
106
+                        <img src="/static/img/vip.gif" alt="VIP" title="VIP" style="width:11px;" border='0'/>
107
+                    </a>
108
+                    <img src="/static/img/11x11p.png"/>
109
+                    <font class="detDesc">
110
+                        This is the content <span>and should be</span> OK
111
+                    </font>
112
+                </td>
113
+                <td align="right">s</td>
114
+                <td align="right">d</td>
115
+            </tr>
116
+        </table>
117
+        """
118
+        response = mock.Mock(text=html)
119
+        results = piratebay.response(response)
120
+        self.assertEqual(type(results), list)
121
+        self.assertEqual(len(results), 1)
122
+        self.assertEqual(results[0]['title'], 'This is the title')
123
+        self.assertEqual(results[0]['url'], 'https://thepiratebay.cr/this.is.the.link')
124
+        self.assertEqual(results[0]['content'], 'This is the content and should be OK')
125
+        self.assertEqual(results[0]['seed'], 0)
126
+        self.assertEqual(results[0]['leech'], 0)
127
+        self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:MAGNETLINK')
128
+        self.assertEqual(results[0]['torrentfile'], 'http://torcache.net/torrent/TORRENTFILE.torrent')
129
+
130
+        html = """
131
+        <table id="searchResult">
132
+        </table>
133
+        """
134
+        response = mock.Mock(text=html)
135
+        results = piratebay.response(response)
136
+        self.assertEqual(type(results), list)
137
+        self.assertEqual(len(results), 0)

+ 1
- 0
searx/tests/test_engines.py View File

@@ -14,6 +14,7 @@ from searx.tests.engines.test_google_images import *  # noqa
14 14
 from searx.tests.engines.test_google_news import *  # noqa
15 15
 from searx.tests.engines.test_kickass import *  # noqa
16 16
 from searx.tests.engines.test_mixcloud import *  # noqa
17
+from searx.tests.engines.test_piratebay import *  # noqa
17 18
 from searx.tests.engines.test_searchcode_code import *  # noqa
18 19
 from searx.tests.engines.test_searchcode_doc import *  # noqa
19 20
 from searx.tests.engines.test_soundcloud import *  # noqa