|
@@ -12,9 +12,12 @@ import re
|
12
|
12
|
from cgi import escape
|
13
|
13
|
from urllib import urlencode
|
14
|
14
|
from urlparse import urlparse, parse_qsl
|
15
|
|
-from lxml import html
|
|
15
|
+from lxml import html, etree
|
16
|
16
|
from searx.poolrequests import get
|
17
|
17
|
from searx.engines.xpath import extract_text, extract_url
|
|
18
|
+from searx.search import logger
|
|
19
|
+
|
|
20
|
+logger = logger.getChild('google engine')
|
18
|
21
|
|
19
|
22
|
|
20
|
23
|
# engine dependent config
|
|
@@ -225,8 +228,8 @@ def response(resp):
|
225
|
228
|
|
226
|
229
|
# parse results
|
227
|
230
|
for result in dom.xpath(results_xpath):
|
228
|
|
- title = extract_text(result.xpath(title_xpath)[0])
|
229
|
231
|
try:
|
|
232
|
+ title = extract_text(result.xpath(title_xpath)[0])
|
230
|
233
|
url = parse_url(extract_url(result.xpath(url_xpath), google_url), google_hostname)
|
231
|
234
|
parsed_url = urlparse(url, google_hostname)
|
232
|
235
|
|
|
@@ -269,6 +272,7 @@ def response(resp):
|
269
|
272
|
'content': content
|
270
|
273
|
})
|
271
|
274
|
except:
|
|
275
|
+ logger.debug('result parse error in:\n%s', etree.tostring(result, pretty_print=True))
|
272
|
276
|
continue
|
273
|
277
|
|
274
|
278
|
# parse suggestion
|