浏览代码

500px unit test

Cqoicebordel 10 年前
父节点
当前提交
8cf2ee5721
共有 3 个文件被更改,包括 90 次插入5 次删除
  1. 6
    5
      searx/engines/www500px.py
  2. 83
    0
      searx/tests/engines/test_www500px.py
  3. 1
    0
      searx/tests/test_engines.py

+ 6
- 5
searx/engines/www500px.py 查看文件

@@ -15,6 +15,7 @@ from urllib import urlencode
15 15
 from urlparse import urljoin
16 16
 from lxml import html
17 17
 import re
18
+from searx.engines.xpath import extract_text
18 19
 
19 20
 # engine dependent config
20 21
 categories = ['images']
@@ -22,7 +23,7 @@ paging = True
22 23
 
23 24
 # search-url
24 25
 base_url = 'https://500px.com'
25
-search_url = base_url+'/search?search?page={pageno}&type=photos&{query}'
26
+search_url = base_url + '/search?search?page={pageno}&type=photos&{query}'
26 27
 
27 28
 
28 29
 # do search-request
@@ -44,11 +45,11 @@ def response(resp):
44 45
     for result in dom.xpath('//div[@class="photo"]'):
45 46
         link = result.xpath('.//a')[0]
46 47
         url = urljoin(base_url, link.attrib.get('href'))
47
-        title = result.xpath('.//div[@class="title"]//text()')[0]
48
-        thumbnail_src = link.xpath('.//img')[0].attrib['src']
48
+        title = extract_text(result.xpath('.//div[@class="title"]'))
49
+        thumbnail_src = link.xpath('.//img')[0].attrib.get('src')
49 50
         # To have a bigger thumbnail, uncomment the next line
50
-        #thumbnail_src = regex.sub('4.jpg', thumbnail_src)
51
-        content = result.xpath('.//div[@class="info"]//text()')[0]
51
+        # thumbnail_src = regex.sub('4.jpg', thumbnail_src)
52
+        content = extract_text(result.xpath('.//div[@class="info"]'))
52 53
         img_src = regex.sub('2048.jpg', thumbnail_src)
53 54
 
54 55
         # append result

+ 83
- 0
searx/tests/engines/test_www500px.py 查看文件

@@ -0,0 +1,83 @@
1
+# -*- coding: utf-8 -*-
2
+from collections import defaultdict
3
+import mock
4
+from searx.engines import www500px
5
+from searx.testing import SearxTestCase
6
+
7
+
8
+class TestWww500pxImagesEngine(SearxTestCase):
9
+
10
+    def test_request(self):
11
+        query = 'test_query'
12
+        dicto = defaultdict(dict)
13
+        dicto['pageno'] = 1
14
+        params = www500px.request(query, dicto)
15
+        self.assertTrue('url' in params)
16
+        self.assertTrue(query in params['url'])
17
+        self.assertTrue('500px.com' in params['url'])
18
+
19
+    def test_response(self):
20
+        self.assertRaises(AttributeError, www500px.response, None)
21
+        self.assertRaises(AttributeError, www500px.response, [])
22
+        self.assertRaises(AttributeError, www500px.response, '')
23
+        self.assertRaises(AttributeError, www500px.response, '[]')
24
+
25
+        response = mock.Mock(text='<html></html>')
26
+        self.assertEqual(www500px.response(response), [])
27
+
28
+        html = """
29
+        <div class="photo">
30
+            <a href="/this.should.be.the.url" data-ga-category="Photo Thumbnail" data-ga-action="Title">
31
+                <img src="https://image.url/3.jpg?v=0" />
32
+            </a>
33
+            <div class="details">
34
+                <div class="inside">
35
+                    <div class="title">
36
+                        <a href="/photo/64312705/branch-out-by-oliver-turpin?feature=">
37
+                            This is the title
38
+                        </a>
39
+                    </div>
40
+                    <div class="info">
41
+                        <a href="/ChronicleUK" data-ga-action="Image" data-ga-category="Photo Thumbnail">
42
+                            This is the content
43
+                        </a>
44
+                    </div>
45
+                    <div class="rating">44.8</div>
46
+                </div>
47
+            </div>
48
+        </div>
49
+        """
50
+        response = mock.Mock(text=html)
51
+        results = www500px.response(response)
52
+        self.assertEqual(type(results), list)
53
+        self.assertEqual(len(results), 1)
54
+        self.assertEqual(results[0]['title'], 'This is the title')
55
+        self.assertEqual(results[0]['url'], 'https://500px.com/this.should.be.the.url')
56
+        self.assertEqual(results[0]['content'], 'This is the content')
57
+        self.assertEqual(results[0]['thumbnail_src'], 'https://image.url/3.jpg?v=0')
58
+        self.assertEqual(results[0]['img_src'], 'https://image.url/2048.jpg')
59
+
60
+        html = """
61
+        <a href="/this.should.be.the.url" data-ga-category="Photo Thumbnail" data-ga-action="Title">
62
+            <img src="https://image.url/3.jpg?v=0" />
63
+        </a>
64
+        <div class="details">
65
+            <div class="inside">
66
+                <div class="title">
67
+                    <a href="/photo/64312705/branch-out-by-oliver-turpin?feature=">
68
+                        This is the title
69
+                    </a>
70
+                </div>
71
+                <div class="info">
72
+                    <a href="/ChronicleUK" data-ga-action="Image" data-ga-category="Photo Thumbnail">
73
+                        Oliver Turpin
74
+                    </a>
75
+                </div>
76
+                <div class="rating">44.8</div>
77
+            </div>
78
+        </div>
79
+        """
80
+        response = mock.Mock(text=html)
81
+        results = www500px.response(response)
82
+        self.assertEqual(type(results), list)
83
+        self.assertEqual(len(results), 0)

+ 1
- 0
searx/tests/test_engines.py 查看文件

@@ -18,4 +18,5 @@ from searx.tests.engines.test_searchcode_doc import *  # noqa
18 18
 from searx.tests.engines.test_soundcloud import *  # noqa
19 19
 from searx.tests.engines.test_stackoverflow import *  # noqa
20 20
 from searx.tests.engines.test_vimeo import *  # noqa
21
+from searx.tests.engines.test_www500px import *  # noqa
21 22
 from searx.tests.engines.test_youtube import *  # noqa