Procházet zdrojové kódy

Remove content field from ArchWiki results; reformat code in archlinux.py

Content field in Arch Wiki search results is of no real use, more often
than not it contains no usable information and includes too many markup
tags which make the text unreadable. It is safe to remove it.
Kirill Isakov před 9 roky
rodič
revize
8b7dc2acb9
2 změnil soubory, kde provedl 13 přidání a 21 odebrání
  1. 7
    10
      searx/engines/archlinux.py
  2. 6
    11
      tests/unit/engines/test_archlinux.py

+ 7
- 10
searx/engines/archlinux.py Zobrazit soubor

3
 """
3
 """
4
  Arch Linux Wiki
4
  Arch Linux Wiki
5
 
5
 
6
- @website	https://wiki.archlinux.org
7
- @provide-api	no (Mediawiki provides API, but Arch Wiki blocks access to it
8
- @using-api	no
9
- @results	HTML
10
- @stable	no (HTML can change)
11
- @parse		url, title, content
6
+ @website      https://wiki.archlinux.org
7
+ @provide-api  no (Mediawiki provides API, but Arch Wiki blocks access to it
8
+ @using-api    no
9
+ @results      HTML
10
+ @stable       no (HTML can change)
11
+ @parse        url, title
12
 """
12
 """
13
 
13
 
14
 from urlparse import urljoin
14
 from urlparse import urljoin
26
 # xpath queries
26
 # xpath queries
27
 xpath_results = '//ul[@class="mw-search-results"]/li'
27
 xpath_results = '//ul[@class="mw-search-results"]/li'
28
 xpath_link = './/div[@class="mw-search-result-heading"]/a'
28
 xpath_link = './/div[@class="mw-search-result-heading"]/a'
29
-xpath_content = './/div[@class="searchresult"]'
30
 
29
 
31
 
30
 
32
 # cut 'en' from 'en_US', 'de' from 'de_CH', and so on
31
 # cut 'en' from 'en_US', 'de' from 'de_CH', and so on
135
         link = result.xpath(xpath_link)[0]
134
         link = result.xpath(xpath_link)[0]
136
         href = urljoin(base_url, link.attrib.get('href'))
135
         href = urljoin(base_url, link.attrib.get('href'))
137
         title = escape(extract_text(link))
136
         title = escape(extract_text(link))
138
-        content = escape(extract_text(result.xpath(xpath_content)))
139
 
137
 
140
         results.append({'url': href,
138
         results.append({'url': href,
141
-                        'title': title,
142
-                        'content': content})
139
+                        'title': title})
143
 
140
 
144
     return results
141
     return results

+ 6
- 11
tests/unit/engines/test_archlinux.py Zobrazit soubor

18
     def test_request(self):
18
     def test_request(self):
19
         query = 'test_query'
19
         query = 'test_query'
20
         dic = defaultdict(dict)
20
         dic = defaultdict(dict)
21
-        dic['pageno'] = 0
21
+        dic['pageno'] = 1
22
         dic['language'] = 'en_US'
22
         dic['language'] = 'en_US'
23
         params = archlinux.request(query, dic)
23
         params = archlinux.request(query, dic)
24
         self.assertTrue('url' in params)
24
         self.assertTrue('url' in params)
31
             self.assertTrue(domain in params['url'])
31
             self.assertTrue(domain in params['url'])
32
 
32
 
33
     def test_response(self):
33
     def test_response(self):
34
-        response = mock.Mock(text='<html></html>')
35
-        response.search_params = {
36
-            'language': 'en_US'
37
-        }
34
+        response = mock.Mock(text='<html></html>',
35
+                             search_params={'language': 'en_US'})
38
         self.assertEqual(archlinux.response(response), [])
36
         self.assertEqual(archlinux.response(response), [])
39
 
37
 
40
         html = """
38
         html = """
79
         expected = [
77
         expected = [
80
             {
78
             {
81
                 'title': 'ATI',
79
                 'title': 'ATI',
82
-                'url': 'https://wiki.archlinux.org/index.php/ATI',
83
-                'content': 'Lorem ipsum dolor sit amet'
80
+                'url': 'https://wiki.archlinux.org/index.php/ATI'
84
             },
81
             },
85
             {
82
             {
86
                 'title': 'Frequently asked questions',
83
                 'title': 'Frequently asked questions',
87
-                'url': 'https://wiki.archlinux.org/index.php/Frequently_asked_questions',
88
-                'content': 'CPUs with AMDs instruction set "AMD64"'
84
+                'url': 'https://wiki.archlinux.org/index.php/Frequently_asked_questions'
89
             },
85
             },
90
             {
86
             {
91
                 'title': 'CPU frequency scaling',
87
                 'title': 'CPU frequency scaling',
92
-                'url': 'https://wiki.archlinux.org/index.php/CPU_frequency_scaling',
93
-                'content': 'ondemand for AMD and older Intel CPU'
88
+                'url': 'https://wiki.archlinux.org/index.php/CPU_frequency_scaling'
94
             }
89
             }
95
         ]
90
         ]
96
 
91