Explorar el Código

[fix] make search requests on wikidata more accurate

Marc Abonce Seguin hace 6 años
padre
commit
b12857a70d
Se han modificado 2 ficheros con 7 adiciones y 7 borrados
  1. 6
    7
      searx/engines/wikidata.py
  2. 1
    0
      searx/settings.yml

+ 6
- 7
searx/engines/wikidata.py Ver fichero

@@ -27,7 +27,7 @@ result_count = 1
27 27
 # urls
28 28
 wikidata_host = 'https://www.wikidata.org'
29 29
 url_search = wikidata_host \
30
-    + '/wiki/Special:ItemDisambiguation?{query}'
30
+    + '/w/index.php?{query}'
31 31
 
32 32
 wikidata_api = wikidata_host + '/w/api.php'
33 33
 url_detail = wikidata_api\
@@ -40,7 +40,7 @@ url_map = 'https://www.openstreetmap.org/'\
40 40
 url_image = 'https://commons.wikimedia.org/wiki/Special:FilePath/{filename}?width=500&height=400'
41 41
 
42 42
 # xpaths
43
-wikidata_ids_xpath = '//div/ul[@class="wikibase-disambiguation"]/li/a/@title'
43
+wikidata_ids_xpath = '//ul[@class="mw-search-results"]/li//a/@href'
44 44
 title_xpath = '//*[contains(@class,"wikibase-title-label")]'
45 45
 description_xpath = '//div[contains(@class,"wikibase-entitytermsview-heading-description")]'
46 46
 property_xpath = '//div[@id="{propertyid}"]'
@@ -57,22 +57,21 @@ calendar_name_xpath = './/sup[contains(@class,"wb-calendar-name")]'
57 57
 
58 58
 
59 59
 def request(query, params):
60
-    language = match_language(params['language'], supported_languages).split('-')[0]
61
-
62 60
     params['url'] = url_search.format(
63
-        query=urlencode({'label': query, 'language': language}))
61
+        query=urlencode({'search': query}))
64 62
     return params
65 63
 
66 64
 
67 65
 def response(resp):
68 66
     results = []
69 67
     html = fromstring(resp.text)
70
-    wikidata_ids = html.xpath(wikidata_ids_xpath)
68
+    search_results = html.xpath(wikidata_ids_xpath)
71 69
 
72 70
     language = match_language(resp.search_params['language'], supported_languages).split('-')[0]
73 71
 
74 72
     # TODO: make requests asynchronous to avoid timeout when result_count > 1
75
-    for wikidata_id in wikidata_ids[:result_count]:
73
+    for search_result in search_results[:result_count]:
74
+        wikidata_id = search_result.split('/')[-1]
76 75
         url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
77 76
         htmlresponse = get(url)
78 77
         jsonresponse = loads(htmlresponse.text)

+ 1
- 0
searx/settings.yml Ver fichero

@@ -174,6 +174,7 @@ engines:
174 174
   - name : wikidata
175 175
     engine : wikidata
176 176
     shortcut : wd
177
+    timeout : 3.0
177 178
     weight : 2
178 179
 
179 180
   - name : duckduckgo