浏览代码

[fix] deviantart engine xpaths

Noemi Vanyi 8 年前
父节点
当前提交
ba590de7f1
共有 2 个文件被更改,包括 12 次插入44 次删除
  1. 4
    5
      searx/engines/deviantart.py
  2. 8
    39
      tests/unit/engines/test_deviantart.py

+ 4
- 5
searx/engines/deviantart.py 查看文件

@@ -50,11 +50,10 @@ def response(resp):
50 50
     regex = re.compile(r'\/200H\/')
51 51
 
52 52
     # parse results
53
-    for result in dom.xpath('//div[contains(@class, "tt-a tt-fh")]'):
54
-        link = result.xpath('.//a[contains(@class, "thumb")]')[0]
55
-        url = urljoin(base_url, link.attrib.get('href'))
56
-        title_links = result.xpath('.//span[@class="details"]//a[contains(@class, "t")]')
57
-        title = extract_text(title_links[0])
53
+    for result in dom.xpath('.//span[@class="thumb wide"]'):
54
+        link = result.xpath('.//a[@class="torpedo-thumb-link"]')[0]
55
+        url = link.attrib.get('href')
56
+        title = extract_text(result.xpath('.//span[@class="title"]'))
58 57
         thumbnail_src = link.xpath('.//img')[0].attrib.get('src')
59 58
         img_src = regex.sub('/', thumbnail_src)
60 59
 

+ 8
- 39
tests/unit/engines/test_deviantart.py 查看文件

@@ -28,44 +28,13 @@ class TestDeviantartEngine(SearxTestCase):
28 28
         self.assertEqual(deviantart.response(response), [])
29 29
 
30 30
         html = """
31
-        <div class="tt-a tt-fh tt-boxed" collect_rid="1:149167425"
32
-            usericon="http://a.deviantart.net/avatars/t/e/test-0.gif" userid="233301"
33
-            username="test-0" symbol="~" category="digitalart/animation">
34
-            <span class="tt-w" style="width: auto; max-width: 277px;">
35
-                <span class="tt-fh-tc" style="width: 202px;">
36
-                    <span class="tt-bb" style="width: 202px;">
37
-                    </span>
38
-                    <span class="shadow">
39
-                        <a class="thumb" href="http://url.of.result/2nd.part.of.url"
40
-                            title="Behoimi BE Animation Test by test-0, Jan 4,
41
-                            2010 in Digital Art &gt; Animation"> <i></i>
42
-                            <img width="200" height="200" alt="Test"
43
-                                src="http://url.of.thumbnail" data-src="http://th08.deviantart.net/test.jpg">
44
-                        </a>
45
-                    </span>
46
-                    <!-- ^TTT -->
47
-                </span>
48
-                <span class="details">
49
-                    <a href="http://test-0.deviantart.com/art/Test" class="t"
50
-                        title="Behoimi BE Animation Test by test-0, Jan 4, 2010">
51
-                        <span class="tt-fh-oe">Title of image</span> </a>
52
-                    <small>
53
-                    <span class="category">
54
-                        <span class="age">
55
-                            5 years ago
56
-                        </span>
57
-                        in <a title="Behoimi BE Animation Test by test-0, Jan 4, 2010"
58
-                            href="http://www.deviantart.com/browse/all/digitalart/animation/">Animation</a>
59
-                    </span>
60
-                    <div class="commentcount">
61
-                        <a href="http://test-0.deviantart.com/art/Test#comments">
62
-                        <span class="iconcommentsstats"></span>9 Comments</a>
63
-                    </div>
64
-                    <a class="mlt-link" href="http://www.deviantart.com/morelikethis/149167425">
65
-                    <span class="mlt-icon"></span> <span class="mlt-text">More Like This</span> </a>
66
-                </span>
67
-                </small> <!-- TTT$ -->
68
-            </span>
31
+        <div id="page-1-results" class="page-results results-page-thumb torpedo-container">
32
+        <span class="thumb wide" href="http://amai911.deviantart.com/art/Horse-195212845"
33
+        data-super-full-width="900" data-super-full-height="600">
34
+            <a class="torpedo-thumb-link" href="https://url.of.image">
35
+                <img data-sigil="torpedo-img" src="https://url.of.thumbnail" />
36
+            </a>
37
+        <span class="info"><span class="title-wrap"><span class="title">Title of image</span></span>
69 38
         </div>
70 39
         """
71 40
         response = mock.Mock(text=html)
@@ -73,7 +42,7 @@ class TestDeviantartEngine(SearxTestCase):
73 42
         self.assertEqual(type(results), list)
74 43
         self.assertEqual(len(results), 1)
75 44
         self.assertEqual(results[0]['title'], 'Title of image')
76
-        self.assertEqual(results[0]['url'], 'http://url.of.result/2nd.part.of.url')
45
+        self.assertEqual(results[0]['url'], 'https://url.of.image')
77 46
         self.assertNotIn('content', results[0])
78 47
         self.assertEqual(results[0]['thumbnail_src'], 'https://url.of.thumbnail')
79 48