소스 검색

[enh] implement image support for swisscows engine

Thomas Pointhuber 9 년 전
부모
커밋
2d81079384
2개의 변경된 파일55개의 추가작업 그리고 13개의 파일을 삭제
  1. 30
    8
      searx/engines/swisscows.py
  2. 25
    5
      searx/tests/engines/test_swisscows.py

+ 30
- 8
searx/engines/swisscows.py 파일 보기

@@ -1,5 +1,5 @@
1 1
 """
2
- Swisscows (Web)
2
+ Swisscows (Web, Images)
3 3
 
4 4
  @website     https://swisscows.ch
5 5
  @provide-api no
@@ -15,7 +15,7 @@ from urllib import urlencode, unquote
15 15
 import re
16 16
 
17 17
 # engine dependent config
18
-categories = ['general']
18
+categories = ['general', 'images']
19 19
 paging = True
20 20
 language_support = True
21 21
 
@@ -24,7 +24,7 @@ base_url = 'https://swisscows.ch/'
24 24
 search_string = '?{query}&page={page}'
25 25
 
26 26
 # regex
27
-regex_json = re.compile('initialData: {"Request":(.|\n)*}\]},\s*environment')
27
+regex_json = re.compile('initialData: {"Request":(.|\n)*},\s*environment')
28 28
 regex_json_remove_start = re.compile('^initialData:\s*')
29 29
 regex_json_remove_end = re.compile(',\s*environment$')
30 30
 regex_img_url_remove_start = re.compile('^https?://i\.swisscows\.ch/\?link=')
@@ -45,6 +45,10 @@ def request(query, params):
45 45
                          'region': region}),
46 46
         page=params['pageno'])
47 47
 
48
+    # image search query is something like 'image?{query}&page={page}'
49
+    if params['category'] == 'images':
50
+        search_path = 'image' + search_path
51
+
48 52
     params['url'] = base_url + search_path
49 53
 
50 54
     return params
@@ -63,12 +67,30 @@ def response(resp):
63 67
     json_raw = regex_json_remove_end.sub('', regex_json_remove_start.sub('', json_regex.group()))
64 68
     json = loads(json_raw)
65 69
 
66
-    # parse normal results
70
+    # parse results
67 71
     for result in json['Results'].get('items', []):
68
-        # append result
69
-        results.append({'url': result['Url'].replace(u'\uE000', '').replace(u'\uE001', ''),
70
-                        'title': result['Title'].replace(u'\uE000', '').replace(u'\uE001', ''),
71
-                        'content': result['Description'].replace(u'\uE000', '').replace(u'\uE001', '')})
72
+        result_title = result['Title'].replace(u'\uE000', '').replace(u'\uE001', '')
73
+
74
+        # parse image results
75
+        if result.get('ContentType', '').startswith('image'):
76
+            img_url = unquote(regex_img_url_remove_start.sub('', result['Url']))
77
+
78
+            # append result
79
+            results.append({'url': result['SourceUrl'],
80
+                            'title': result['Title'],
81
+                            'content': '',
82
+                            'img_src': img_url,
83
+                            'template': 'images.html'})
84
+
85
+        # parse general results
86
+        else:
87
+            result_url = result['Url'].replace(u'\uE000', '').replace(u'\uE001', '')
88
+            result_content = result['Description'].replace(u'\uE000', '').replace(u'\uE001', '')
89
+
90
+            # append result
91
+            results.append({'url': result_url,
92
+                            'title': result_title,
93
+                            'content': result_content})
72 94
 
73 95
     # parse images
74 96
     for result in json.get('Images', []):

+ 25
- 5
searx/tests/engines/test_swisscows.py 파일 보기

@@ -51,7 +51,23 @@ class TestSwisscowsEngine(SearxTestCase):
51 51
                             "Description":"\uE000This should\uE001 be the content.",
52 52
                             "Url":"http://this.should.be.the.link/",
53 53
                             "DisplayUrl":"www.\uE000this.should.be.the\uE001.link",
54
-                            "Id":"782ef287-e439-451c-b380-6ebc14ba033d"}
54
+                            "Id":"782ef287-e439-451c-b380-6ebc14ba033d"},
55
+                            {"Title":"Datei:This should1.svg",
56
+                            "Url":"https://i.swisscows.ch/?link=http%3a%2f%2fts2.mm.This/should1.png",
57
+                            "SourceUrl":"http://de.wikipedia.org/wiki/Datei:This should1.svg",
58
+                            "DisplayUrl":"de.wikipedia.org/wiki/Datei:This should1.svg",
59
+                            "Width":950,
60
+                            "Height":534,
61
+                            "FileSize":92100,
62
+                            "ContentType":"image/jpeg",
63
+                            "Thumbnail":{
64
+                                "Url":"https://i.swisscows.ch/?link=http%3a%2f%2fts2.mm.This/should1.png",
65
+                                "ContentType":"image/jpeg",
66
+                                "Width":300,
67
+                                "Height":168,
68
+                                "FileSize":9134},
69
+                                "Id":"6a97a542-8f65-425f-b7f6-1178c3aba7be"
70
+                            }
55 71
                         ],"TotalCount":55300,
56 72
                         "Query":"This should "
57 73
                     },
@@ -94,11 +110,15 @@ class TestSwisscowsEngine(SearxTestCase):
94 110
         response = mock.Mock(content=html)
95 111
         results = swisscows.response(response)
96 112
         self.assertEqual(type(results), list)
97
-        self.assertEqual(len(results), 2)
113
+        self.assertEqual(len(results), 3)
98 114
         self.assertEqual(results[0]['title'], 'This should be the title')
99 115
         self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
100 116
         self.assertEqual(results[0]['content'], 'This should be the content.')
101
-        self.assertEqual(results[1]['title'], 'Datei:This should.svg')
102
-        self.assertEqual(results[1]['url'], 'http://de.wikipedia.org/wiki/Datei:This should.svg')
103
-        self.assertEqual(results[1]['img_src'], 'http://ts2.mm.This/should.png')
117
+        self.assertEqual(results[1]['title'], 'Datei:This should1.svg')
118
+        self.assertEqual(results[1]['url'], 'http://de.wikipedia.org/wiki/Datei:This should1.svg')
119
+        self.assertEqual(results[1]['img_src'], 'http://ts2.mm.This/should1.png')
104 120
         self.assertEqual(results[1]['template'], 'images.html')
121
+        self.assertEqual(results[2]['title'], 'Datei:This should.svg')
122
+        self.assertEqual(results[2]['url'], 'http://de.wikipedia.org/wiki/Datei:This should.svg')
123
+        self.assertEqual(results[2]['img_src'], 'http://ts2.mm.This/should.png')
124
+        self.assertEqual(results[2]['template'], 'images.html')