Browse Source

Merge pull request #1260 from MarcAbonce/engine-fixes

[fix] Engine fixes
Adam Tauber 6 years ago
parent
commit
e5def5b019
No account linked to committer's email

+ 2
- 2
searx/engines/google_news.py View File

68
     for result in dom.xpath('//div[@class="g"]|//div[@class="g _cy"]'):
68
     for result in dom.xpath('//div[@class="g"]|//div[@class="g _cy"]'):
69
         try:
69
         try:
70
             r = {
70
             r = {
71
-                'url': result.xpath('.//a[@class="l _PMs"]')[0].attrib.get("href"),
72
-                'title': ''.join(result.xpath('.//a[@class="l _PMs"]//text()')),
71
+                'url': result.xpath('.//a[@class="l lLrAF"]')[0].attrib.get("href"),
72
+                'title': ''.join(result.xpath('.//a[@class="l lLrAF"]//text()')),
73
                 'content': ''.join(result.xpath('.//div[@class="st"]//text()')),
73
                 'content': ''.join(result.xpath('.//div[@class="st"]//text()')),
74
             }
74
             }
75
         except:
75
         except:

+ 6
- 7
searx/engines/wikidata.py View File

27
 # urls
27
 # urls
28
 wikidata_host = 'https://www.wikidata.org'
28
 wikidata_host = 'https://www.wikidata.org'
29
 url_search = wikidata_host \
29
 url_search = wikidata_host \
30
-    + '/wiki/Special:ItemDisambiguation?{query}'
30
+    + '/w/index.php?{query}'
31
 
31
 
32
 wikidata_api = wikidata_host + '/w/api.php'
32
 wikidata_api = wikidata_host + '/w/api.php'
33
 url_detail = wikidata_api\
33
 url_detail = wikidata_api\
40
 url_image = 'https://commons.wikimedia.org/wiki/Special:FilePath/{filename}?width=500&height=400'
40
 url_image = 'https://commons.wikimedia.org/wiki/Special:FilePath/{filename}?width=500&height=400'
41
 
41
 
42
 # xpaths
42
 # xpaths
43
-wikidata_ids_xpath = '//div/ul[@class="wikibase-disambiguation"]/li/a/@title'
43
+wikidata_ids_xpath = '//ul[@class="mw-search-results"]/li//a/@href'
44
 title_xpath = '//*[contains(@class,"wikibase-title-label")]'
44
 title_xpath = '//*[contains(@class,"wikibase-title-label")]'
45
 description_xpath = '//div[contains(@class,"wikibase-entitytermsview-heading-description")]'
45
 description_xpath = '//div[contains(@class,"wikibase-entitytermsview-heading-description")]'
46
 property_xpath = '//div[@id="{propertyid}"]'
46
 property_xpath = '//div[@id="{propertyid}"]'
57
 
57
 
58
 
58
 
59
 def request(query, params):
59
 def request(query, params):
60
-    language = match_language(params['language'], supported_languages).split('-')[0]
61
-
62
     params['url'] = url_search.format(
60
     params['url'] = url_search.format(
63
-        query=urlencode({'label': query, 'language': language}))
61
+        query=urlencode({'search': query}))
64
     return params
62
     return params
65
 
63
 
66
 
64
 
67
 def response(resp):
65
 def response(resp):
68
     results = []
66
     results = []
69
     html = fromstring(resp.text)
67
     html = fromstring(resp.text)
70
-    wikidata_ids = html.xpath(wikidata_ids_xpath)
68
+    search_results = html.xpath(wikidata_ids_xpath)
71
 
69
 
72
     language = match_language(resp.search_params['language'], supported_languages).split('-')[0]
70
     language = match_language(resp.search_params['language'], supported_languages).split('-')[0]
73
 
71
 
74
     # TODO: make requests asynchronous to avoid timeout when result_count > 1
72
     # TODO: make requests asynchronous to avoid timeout when result_count > 1
75
-    for wikidata_id in wikidata_ids[:result_count]:
73
+    for search_result in search_results[:result_count]:
74
+        wikidata_id = search_result.split('/')[-1]
76
         url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
75
         url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
77
         htmlresponse = get(url)
76
         htmlresponse = get(url)
78
         jsonresponse = loads(htmlresponse.text)
77
         jsonresponse = loads(htmlresponse.text)

+ 1
- 1
searx/engines/xpath.py View File

53
     if url.startswith('//'):
53
     if url.startswith('//'):
54
         # add http or https to this kind of url //example.com/
54
         # add http or https to this kind of url //example.com/
55
         parsed_search_url = urlparse(search_url)
55
         parsed_search_url = urlparse(search_url)
56
-        url = u'{0}:{1}'.format(parsed_search_url.scheme, url)
56
+        url = u'{0}:{1}'.format(parsed_search_url.scheme or 'http', url)
57
     elif url.startswith('/'):
57
     elif url.startswith('/'):
58
         # fix relative url to the search engine
58
         # fix relative url to the search engine
59
         url = urljoin(search_url, url)
59
         url = urljoin(search_url, url)

+ 1
- 0
searx/settings.yml View File

174
   - name : wikidata
174
   - name : wikidata
175
     engine : wikidata
175
     engine : wikidata
176
     shortcut : wd
176
     shortcut : wd
177
+    timeout : 3.0
177
     weight : 2
178
     weight : 2
178
 
179
 
179
   - name : duckduckgo
180
   - name : duckduckgo

+ 2
- 2
tests/unit/engines/test_google_news.py View File

42
                 <div class="ts _JGs _JHs _tJs _KGs _jHs">
42
                 <div class="ts _JGs _JHs _tJs _KGs _jHs">
43
                     <div class="_hJs">
43
                     <div class="_hJs">
44
                         <h3 class="r _gJs">
44
                         <h3 class="r _gJs">
45
-                            <a class="l _PMs" href="https://example.com/" onmousedown="return rwt(this,'','','','11','AFQjCNEyehpzD5cJK1KUfXBx9RmsbqqG9g','','0ahUKEwjB58OR54HWAhWnKJoKHSQhAMY4ChCpAggiKAAwAA','','',event)">Example title</a>
45
+                            <a class="l lLrAF" href="https://example.com/" onmousedown="return rwt(this,'','','','11','AFQjCNEyehpzD5cJK1KUfXBx9RmsbqqG9g','','0ahUKEwjB58OR54HWAhWnKJoKHSQhAMY4ChCpAggiKAAwAA','','',event)">Example title</a>
46
                         </h3>
46
                         </h3>
47
                         <div class="slp">
47
                         <div class="slp">
48
                             <span class="_OHs _PHs">
48
                             <span class="_OHs _PHs">
63
                     </a>
63
                     </a>
64
                     <div class="_hJs">
64
                     <div class="_hJs">
65
                         <h3 class="r _gJs">
65
                         <h3 class="r _gJs">
66
-                            <a class="l _PMs" href="https://example2.com/" onmousedown="return rwt(this,'','','','12','AFQjCNHObfH7sYmLWI1SC-YhWXKZFRzRjw','','0ahUKEwjB58OR54HWAhWnKJoKHSQhAMY4ChCpAgglKAAwAQ','','',event)">Example title 2</a>
66
+                            <a class="l lLrAF" href="https://example2.com/" onmousedown="return rwt(this,'','','','12','AFQjCNHObfH7sYmLWI1SC-YhWXKZFRzRjw','','0ahUKEwjB58OR54HWAhWnKJoKHSQhAMY4ChCpAgglKAAwAQ','','',event)">Example title 2</a>
67
                         </h3>
67
                         </h3>
68
                         <div class="slp">
68
                         <div class="slp">
69
                             <span class="_OHs _PHs">
69
                             <span class="_OHs _PHs">

+ 1
- 5
tests/unit/engines/test_wikidata.py View File

9
 class TestWikidataEngine(SearxTestCase):
9
 class TestWikidataEngine(SearxTestCase):
10
 
10
 
11
     def test_request(self):
11
     def test_request(self):
12
-        wikidata.supported_languages = ['en', 'es']
13
         query = 'test_query'
12
         query = 'test_query'
14
         dicto = defaultdict(dict)
13
         dicto = defaultdict(dict)
15
-        dicto['language'] = 'en-US'
16
         params = wikidata.request(query, dicto)
14
         params = wikidata.request(query, dicto)
17
         self.assertIn('url', params)
15
         self.assertIn('url', params)
18
         self.assertIn(query, params['url'])
16
         self.assertIn(query, params['url'])
19
         self.assertIn('wikidata.org', params['url'])
17
         self.assertIn('wikidata.org', params['url'])
20
-        self.assertIn('en', params['url'])
21
 
18
 
22
-        dicto['language'] = 'es-ES'
23
         params = wikidata.request(query, dicto)
19
         params = wikidata.request(query, dicto)
24
         self.assertIn(query, params['url'])
20
         self.assertIn(query, params['url'])
25
-        self.assertIn('es', params['url'])
26
 
21
 
27
     # successful cases are not tested here to avoid sending additional requests
22
     # successful cases are not tested here to avoid sending additional requests
28
     def test_response(self):
23
     def test_response(self):
31
         self.assertRaises(AttributeError, wikidata.response, '')
26
         self.assertRaises(AttributeError, wikidata.response, '')
32
         self.assertRaises(AttributeError, wikidata.response, '[]')
27
         self.assertRaises(AttributeError, wikidata.response, '[]')
33
 
28
 
29
+        wikidata.supported_languages = ['en', 'es']
34
         response = mock.Mock(text='<html></html>', search_params={"language": "en"})
30
         response = mock.Mock(text='<html></html>', search_params={"language": "en"})
35
         self.assertEqual(wikidata.response(response), [])
31
         self.assertEqual(wikidata.response(response), [])
36
 
32