Parcourir la source

[fix] google engine - ignore new useless result type

Adam Tauber il y a 9 ans
Parent
révision
5d49c15f79
1 fichiers modifiés avec 6 ajouts et 2 suppressions
  1. 6
    2
      searx/engines/google.py

+ 6
- 2
searx/engines/google.py Voir le fichier

12
 from cgi import escape
12
 from cgi import escape
13
 from urllib import urlencode
13
 from urllib import urlencode
14
 from urlparse import urlparse, parse_qsl
14
 from urlparse import urlparse, parse_qsl
15
-from lxml import html
15
+from lxml import html, etree
16
 from searx.poolrequests import get
16
 from searx.poolrequests import get
17
 from searx.engines.xpath import extract_text, extract_url
17
 from searx.engines.xpath import extract_text, extract_url
18
+from searx.search import logger
19
+
20
+logger = logger.getChild('google engine')
18
 
21
 
19
 
22
 
20
 # engine dependent config
23
 # engine dependent config
225
 
228
 
226
     # parse results
229
     # parse results
227
     for result in dom.xpath(results_xpath):
230
     for result in dom.xpath(results_xpath):
228
-        title = extract_text(result.xpath(title_xpath)[0])
229
         try:
231
         try:
232
+            title = extract_text(result.xpath(title_xpath)[0])
230
             url = parse_url(extract_url(result.xpath(url_xpath), google_url), google_hostname)
233
             url = parse_url(extract_url(result.xpath(url_xpath), google_url), google_hostname)
231
             parsed_url = urlparse(url, google_hostname)
234
             parsed_url = urlparse(url, google_hostname)
232
 
235
 
269
                                 'content': content
272
                                 'content': content
270
                                 })
273
                                 })
271
         except:
274
         except:
275
+            logger.debug('result parse error in:\n%s', etree.tostring(result, pretty_print=True))
272
             continue
276
             continue
273
 
277
 
274
     # parse suggestion
278
     # parse suggestion