Selaa lähdekoodia

[enh] implement image support for swisscows engine

Thomas Pointhuber 10 vuotta sitten
vanhempi
commit
2d81079384
2 muutettua tiedostoa jossa 55 lisäystä ja 13 poistoa
  1. 30
    8
      searx/engines/swisscows.py
  2. 25
    5
      searx/tests/engines/test_swisscows.py

+ 30
- 8
searx/engines/swisscows.py Näytä tiedosto

1
 """
1
 """
2
- Swisscows (Web)
2
+ Swisscows (Web, Images)
3
 
3
 
4
  @website     https://swisscows.ch
4
  @website     https://swisscows.ch
5
  @provide-api no
5
  @provide-api no
15
 import re
15
 import re
16
 
16
 
17
 # engine dependent config
17
 # engine dependent config
18
-categories = ['general']
18
+categories = ['general', 'images']
19
 paging = True
19
 paging = True
20
 language_support = True
20
 language_support = True
21
 
21
 
24
 search_string = '?{query}&page={page}'
24
 search_string = '?{query}&page={page}'
25
 
25
 
26
 # regex
26
 # regex
27
-regex_json = re.compile('initialData: {"Request":(.|\n)*}\]},\s*environment')
27
+regex_json = re.compile('initialData: {"Request":(.|\n)*},\s*environment')
28
 regex_json_remove_start = re.compile('^initialData:\s*')
28
 regex_json_remove_start = re.compile('^initialData:\s*')
29
 regex_json_remove_end = re.compile(',\s*environment$')
29
 regex_json_remove_end = re.compile(',\s*environment$')
30
 regex_img_url_remove_start = re.compile('^https?://i\.swisscows\.ch/\?link=')
30
 regex_img_url_remove_start = re.compile('^https?://i\.swisscows\.ch/\?link=')
45
                          'region': region}),
45
                          'region': region}),
46
         page=params['pageno'])
46
         page=params['pageno'])
47
 
47
 
48
+    # image search query is something like 'image?{query}&page={page}'
49
+    if params['category'] == 'images':
50
+        search_path = 'image' + search_path
51
+
48
     params['url'] = base_url + search_path
52
     params['url'] = base_url + search_path
49
 
53
 
50
     return params
54
     return params
63
     json_raw = regex_json_remove_end.sub('', regex_json_remove_start.sub('', json_regex.group()))
67
     json_raw = regex_json_remove_end.sub('', regex_json_remove_start.sub('', json_regex.group()))
64
     json = loads(json_raw)
68
     json = loads(json_raw)
65
 
69
 
66
-    # parse normal results
70
+    # parse results
67
     for result in json['Results'].get('items', []):
71
     for result in json['Results'].get('items', []):
68
-        # append result
69
-        results.append({'url': result['Url'].replace(u'\uE000', '').replace(u'\uE001', ''),
70
-                        'title': result['Title'].replace(u'\uE000', '').replace(u'\uE001', ''),
71
-                        'content': result['Description'].replace(u'\uE000', '').replace(u'\uE001', '')})
72
+        result_title = result['Title'].replace(u'\uE000', '').replace(u'\uE001', '')
73
+
74
+        # parse image results
75
+        if result.get('ContentType', '').startswith('image'):
76
+            img_url = unquote(regex_img_url_remove_start.sub('', result['Url']))
77
+
78
+            # append result
79
+            results.append({'url': result['SourceUrl'],
80
+                            'title': result['Title'],
81
+                            'content': '',
82
+                            'img_src': img_url,
83
+                            'template': 'images.html'})
84
+
85
+        # parse general results
86
+        else:
87
+            result_url = result['Url'].replace(u'\uE000', '').replace(u'\uE001', '')
88
+            result_content = result['Description'].replace(u'\uE000', '').replace(u'\uE001', '')
89
+
90
+            # append result
91
+            results.append({'url': result_url,
92
+                            'title': result_title,
93
+                            'content': result_content})
72
 
94
 
73
     # parse images
95
     # parse images
74
     for result in json.get('Images', []):
96
     for result in json.get('Images', []):

+ 25
- 5
searx/tests/engines/test_swisscows.py Näytä tiedosto

51
                             "Description":"\uE000This should\uE001 be the content.",
51
                             "Description":"\uE000This should\uE001 be the content.",
52
                             "Url":"http://this.should.be.the.link/",
52
                             "Url":"http://this.should.be.the.link/",
53
                             "DisplayUrl":"www.\uE000this.should.be.the\uE001.link",
53
                             "DisplayUrl":"www.\uE000this.should.be.the\uE001.link",
54
-                            "Id":"782ef287-e439-451c-b380-6ebc14ba033d"}
54
+                            "Id":"782ef287-e439-451c-b380-6ebc14ba033d"},
55
+                            {"Title":"Datei:This should1.svg",
56
+                            "Url":"https://i.swisscows.ch/?link=http%3a%2f%2fts2.mm.This/should1.png",
57
+                            "SourceUrl":"http://de.wikipedia.org/wiki/Datei:This should1.svg",
58
+                            "DisplayUrl":"de.wikipedia.org/wiki/Datei:This should1.svg",
59
+                            "Width":950,
60
+                            "Height":534,
61
+                            "FileSize":92100,
62
+                            "ContentType":"image/jpeg",
63
+                            "Thumbnail":{
64
+                                "Url":"https://i.swisscows.ch/?link=http%3a%2f%2fts2.mm.This/should1.png",
65
+                                "ContentType":"image/jpeg",
66
+                                "Width":300,
67
+                                "Height":168,
68
+                                "FileSize":9134},
69
+                                "Id":"6a97a542-8f65-425f-b7f6-1178c3aba7be"
70
+                            }
55
                         ],"TotalCount":55300,
71
                         ],"TotalCount":55300,
56
                         "Query":"This should "
72
                         "Query":"This should "
57
                     },
73
                     },
94
         response = mock.Mock(content=html)
110
         response = mock.Mock(content=html)
95
         results = swisscows.response(response)
111
         results = swisscows.response(response)
96
         self.assertEqual(type(results), list)
112
         self.assertEqual(type(results), list)
97
-        self.assertEqual(len(results), 2)
113
+        self.assertEqual(len(results), 3)
98
         self.assertEqual(results[0]['title'], 'This should be the title')
114
         self.assertEqual(results[0]['title'], 'This should be the title')
99
         self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
115
         self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
100
         self.assertEqual(results[0]['content'], 'This should be the content.')
116
         self.assertEqual(results[0]['content'], 'This should be the content.')
101
-        self.assertEqual(results[1]['title'], 'Datei:This should.svg')
102
-        self.assertEqual(results[1]['url'], 'http://de.wikipedia.org/wiki/Datei:This should.svg')
103
-        self.assertEqual(results[1]['img_src'], 'http://ts2.mm.This/should.png')
117
+        self.assertEqual(results[1]['title'], 'Datei:This should1.svg')
118
+        self.assertEqual(results[1]['url'], 'http://de.wikipedia.org/wiki/Datei:This should1.svg')
119
+        self.assertEqual(results[1]['img_src'], 'http://ts2.mm.This/should1.png')
104
         self.assertEqual(results[1]['template'], 'images.html')
120
         self.assertEqual(results[1]['template'], 'images.html')
121
+        self.assertEqual(results[2]['title'], 'Datei:This should.svg')
122
+        self.assertEqual(results[2]['url'], 'http://de.wikipedia.org/wiki/Datei:This should.svg')
123
+        self.assertEqual(results[2]['img_src'], 'http://ts2.mm.This/should.png')
124
+        self.assertEqual(results[2]['template'], 'images.html')