瀏覽代碼

Remove content field from ArchWiki results; reformat code in archlinux.py

Content field in Arch Wiki search results is of no real use, more often
than not it contains no usable information and includes too many markup
tags which make the text unreadable. It is safe to remove it.
Kirill Isakov 9 年之前
父節點
當前提交
8b7dc2acb9
共有 2 個文件被更改,包括 13 次插入21 次删除
  1. 7
    10
      searx/engines/archlinux.py
  2. 6
    11
      tests/unit/engines/test_archlinux.py

+ 7
- 10
searx/engines/archlinux.py 查看文件

@@ -3,12 +3,12 @@
3 3
 """
4 4
  Arch Linux Wiki
5 5
 
6
- @website	https://wiki.archlinux.org
7
- @provide-api	no (Mediawiki provides API, but Arch Wiki blocks access to it
8
- @using-api	no
9
- @results	HTML
10
- @stable	no (HTML can change)
11
- @parse		url, title, content
6
+ @website      https://wiki.archlinux.org
7
+ @provide-api  no (Mediawiki provides API, but Arch Wiki blocks access to it
8
+ @using-api    no
9
+ @results      HTML
10
+ @stable       no (HTML can change)
11
+ @parse        url, title
12 12
 """
13 13
 
14 14
 from urlparse import urljoin
@@ -26,7 +26,6 @@ base_url = 'https://wiki.archlinux.org'
26 26
 # xpath queries
27 27
 xpath_results = '//ul[@class="mw-search-results"]/li'
28 28
 xpath_link = './/div[@class="mw-search-result-heading"]/a'
29
-xpath_content = './/div[@class="searchresult"]'
30 29
 
31 30
 
32 31
 # cut 'en' from 'en_US', 'de' from 'de_CH', and so on
@@ -135,10 +134,8 @@ def response(resp):
135 134
         link = result.xpath(xpath_link)[0]
136 135
         href = urljoin(base_url, link.attrib.get('href'))
137 136
         title = escape(extract_text(link))
138
-        content = escape(extract_text(result.xpath(xpath_content)))
139 137
 
140 138
         results.append({'url': href,
141
-                        'title': title,
142
-                        'content': content})
139
+                        'title': title})
143 140
 
144 141
     return results

+ 6
- 11
tests/unit/engines/test_archlinux.py 查看文件

@@ -18,7 +18,7 @@ class TestArchLinuxEngine(SearxTestCase):
18 18
     def test_request(self):
19 19
         query = 'test_query'
20 20
         dic = defaultdict(dict)
21
-        dic['pageno'] = 0
21
+        dic['pageno'] = 1
22 22
         dic['language'] = 'en_US'
23 23
         params = archlinux.request(query, dic)
24 24
         self.assertTrue('url' in params)
@@ -31,10 +31,8 @@ class TestArchLinuxEngine(SearxTestCase):
31 31
             self.assertTrue(domain in params['url'])
32 32
 
33 33
     def test_response(self):
34
-        response = mock.Mock(text='<html></html>')
35
-        response.search_params = {
36
-            'language': 'en_US'
37
-        }
34
+        response = mock.Mock(text='<html></html>',
35
+                             search_params={'language': 'en_US'})
38 36
         self.assertEqual(archlinux.response(response), [])
39 37
 
40 38
         html = """
@@ -79,18 +77,15 @@ class TestArchLinuxEngine(SearxTestCase):
79 77
         expected = [
80 78
             {
81 79
                 'title': 'ATI',
82
-                'url': 'https://wiki.archlinux.org/index.php/ATI',
83
-                'content': 'Lorem ipsum dolor sit amet'
80
+                'url': 'https://wiki.archlinux.org/index.php/ATI'
84 81
             },
85 82
             {
86 83
                 'title': 'Frequently asked questions',
87
-                'url': 'https://wiki.archlinux.org/index.php/Frequently_asked_questions',
88
-                'content': 'CPUs with AMDs instruction set "AMD64"'
84
+                'url': 'https://wiki.archlinux.org/index.php/Frequently_asked_questions'
89 85
             },
90 86
             {
91 87
                 'title': 'CPU frequency scaling',
92
-                'url': 'https://wiki.archlinux.org/index.php/CPU_frequency_scaling',
93
-                'content': 'ondemand for AMD and older Intel CPU'
88
+                'url': 'https://wiki.archlinux.org/index.php/CPU_frequency_scaling'
94 89
             }
95 90
         ]
96 91