Bladeren bron

Merge pull request #1283 from rinpatch/acgsou-engine

[Feature] Acgsou as a searchable engine
Adam Tauber 6 jaren geleden
bovenliggende
commit
1a1f9852f1
Geen account gekoppeld aan de committers e-mail
3 gewijzigde bestanden met toevoegingen van 158 en 0 verwijderingen
  1. 75
    0
      searx/engines/acgsou.py
  2. 6
    0
      searx/settings.yml
  3. 77
    0
      tests/unit/engines/test_acgsou.py

+ 75
- 0
searx/engines/acgsou.py Bestand weergeven

@@ -0,0 +1,75 @@
1
+"""
2
+ Acgsou (Japanese Animation/Music/Comics Bittorrent tracker)
3
+
4
+ @website      https://www.acgsou.com/
5
+ @provide-api  no
6
+ @using-api    no
7
+ @results      HTML
8
+ @stable       no (HTML can change)
9
+ @parse        url, title, content, seed, leech, torrentfile
10
+"""
11
+
12
+from lxml import html
13
+from searx.engines.xpath import extract_text
14
+from searx.url_utils import urlencode
15
+from searx.utils import get_torrent_size, int_or_zero
16
+
17
+# engine dependent config
18
+categories = ['files', 'images', 'videos', 'music']
19
+paging = True
20
+
21
+# search-url
22
+base_url = 'https://www.acgsou.com/'
23
+search_url = base_url + 'search.php?{query}&page={offset}'
24
+# xpath queries
25
+xpath_results = '//table[contains(@class, "list_style table_fixed")]//tr[not(th)]'
26
+xpath_category = './/td[2]/a[1]'
27
+xpath_title = './/td[3]/a[last()]'
28
+xpath_torrent_links = './/td[3]/a'
29
+xpath_filesize = './/td[4]/text()'
30
+
31
+
32
+def request(query, params):
33
+    query = urlencode({'keyword': query})
34
+    params['url'] = search_url.format(query=query, offset=params['pageno'])
35
+    return params
36
+
37
+
38
+def response(resp):
39
+    results = []
40
+    dom = html.fromstring(resp.text)
41
+    for result in dom.xpath(xpath_results):
42
+        # defaults
43
+        filesize = 0
44
+        magnet_link = "magnet:?xt=urn:btih:{}&tr=http://tracker.acgsou.com:2710/announce"
45
+        torrent_link = ""
46
+
47
+        try:
48
+            category = extract_text(result.xpath(xpath_category)[0])
49
+        except:
50
+            pass
51
+
52
+        page_a = result.xpath(xpath_title)[0]
53
+        title = extract_text(page_a)
54
+        href = base_url + page_a.attrib.get('href')
55
+
56
+        magnet_link = magnet_link.format(page_a.attrib.get('href')[5:-5])
57
+
58
+        try:
59
+            filesize_info = result.xpath(xpath_filesize)[0]
60
+            filesize = filesize_info[:-2]
61
+            filesize_multiplier = filesize_info[-2:]
62
+            filesize = get_torrent_size(filesize, filesize_multiplier)
63
+        except:
64
+            pass
65
+        # I didn't add download/seed/leech count since as I figured out they are generated randomly everytime
66
+        content = 'Category: "{category}".'
67
+        content = content.format(category=category)
68
+
69
+        results.append({'url': href,
70
+                        'title': title,
71
+                        'content': content,
72
+                        'filesize': filesize,
73
+                        'magnetlink': magnet_link,
74
+                        'template': 'torrent.html'})
75
+    return results

+ 6
- 0
searx/settings.yml Bestand weergeven

@@ -433,6 +433,12 @@ engines:
433 433
     engine : nyaa
434 434
     shortcut : nt
435 435
     disabled : True
436
+  
437
+  - name : acgsou
438
+    engine : acgsou
439
+    shortcut : acg
440
+    disabled : True
441
+    timeout: 5.0
436 442
 
437 443
   - name : openairedatasets
438 444
     engine : json_engine

+ 77
- 0
tests/unit/engines/test_acgsou.py Bestand weergeven

@@ -0,0 +1,77 @@
1
+from collections import defaultdict
2
+import mock
3
+from searx.engines import acgsou
4
+from searx.testing import SearxTestCase
5
+
6
+
7
+class TestAcgsouEngine(SearxTestCase):
8
+
9
+    def test_request(self):
10
+        query = 'test_query'
11
+        dic = defaultdict(dict)
12
+        dic['pageno'] = 1
13
+        params = acgsou.request(query, dic)
14
+        self.assertTrue('url' in params)
15
+        self.assertTrue(query in params['url'])
16
+        self.assertTrue('acgsou.com' in params['url'])
17
+
18
+    def test_response(self):
19
+        resp = mock.Mock(text='<html></html>')
20
+        self.assertEqual(acgsou.response(resp), [])
21
+
22
+        html = """
23
+        <html>
24
+<table id="listTable" class="list_style table_fixed">
25
+  <thead class="tcat">
26
+      <tr>
27
+        <th axis="string" class="l1 tableHeaderOver">test</th>
28
+        <th axis="string" class="l2 tableHeaderOver">test</th>
29
+        <th axis="string" class="l3 tableHeaderOver">test</th>
30
+        <th axis="size" class="l4 tableHeaderOver">test</th>
31
+        <th axis="number" class="l5 tableHeaderOver">test</th>
32
+        <th axis="number" class="l6 tableHeaderOver">test</th>
33
+        <th axis="number" class="l7 tableHeaderOver">test</th>
34
+        <th axis="string" class="l8 tableHeaderOver">test</th>
35
+      </tr>
36
+  </thead>
37
+  <tbody class="tbody" id="data_list">
38
+ <tr class="alt1 ">
39
+        <td nowrap="nowrap">date</td>
40
+        <td><a href="category.html">testcategory</a></td>
41
+        <td style="text-align:left;">
42
+            <a href="show-torrentid.html" target="_blank">torrentname</a>
43
+        </td>
44
+        <td>1MB</td>
45
+        <td nowrap="nowrap">
46
+            <span class="bts_1">
47
+            29
48
+            </span>
49
+        </td>
50
+        <td nowrap="nowrap">
51
+            <span class="btl_1">
52
+            211
53
+        </span>
54
+        </td>
55
+        <td nowrap="nowrap">
56
+        <span class="btc_">
57
+            168
58
+        </span>
59
+        </td>
60
+        <td><a href="random.html">user</a></td>
61
+      </tr>
62
+      </tbody>
63
+</table>
64
+</html>
65
+        """
66
+
67
+        resp = mock.Mock(text=html)
68
+        results = acgsou.response(resp)
69
+
70
+        self.assertEqual(type(results), list)
71
+        self.assertEqual(len(results), 1)
72
+
73
+        r = results[0]
74
+        self.assertEqual(r['url'], 'https://www.acgsou.com/show-torrentid.html')
75
+        self.assertEqual(r['content'], 'Category: "testcategory".')
76
+        self.assertEqual(r['title'], 'torrentname')
77
+        self.assertEqual(r['filesize'], 1048576)