|
@@ -124,9 +124,6 @@ image_img_src_xpath = './img/@src'
|
124
|
124
|
# FIXME : no translation
|
125
|
125
|
property_address = "Address"
|
126
|
126
|
property_phone = "Phone number"
|
127
|
|
-property_location = "Location"
|
128
|
|
-property_website = "Web site"
|
129
|
|
-property_gplus_website = "Google plus"
|
130
|
127
|
|
131
|
128
|
# cookies
|
132
|
129
|
pref_cookie = ''
|
|
@@ -166,19 +163,6 @@ def parse_url(url_string, google_hostname):
|
166
|
163
|
return url_string
|
167
|
164
|
|
168
|
165
|
|
169
|
|
-# URL : get label
|
170
|
|
-def url_get_label(url_string):
|
171
|
|
- # sanity check
|
172
|
|
- if url_string is None:
|
173
|
|
- return url_string
|
174
|
|
-
|
175
|
|
- # normal case
|
176
|
|
- parsed_url = urlparse(url_string)
|
177
|
|
- if parsed_url.netloc == 'plus.google.com':
|
178
|
|
- return property_gplus_website
|
179
|
|
- return property_website
|
180
|
|
-
|
181
|
|
-
|
182
|
166
|
# returns extract_text on the first result selected by the xpath or None
|
183
|
167
|
def extract_text_from_dom(result, xpath):
|
184
|
168
|
r = result.xpath(xpath)
|
|
@@ -281,9 +265,9 @@ def response(resp):
|
281
|
265
|
# append result
|
282
|
266
|
results.append({'url': url,
|
283
|
267
|
'title': title,
|
284
|
|
- 'content': content})
|
285
|
|
- except Exception, e:
|
286
|
|
- print e
|
|
268
|
+ 'content': content
|
|
269
|
+ })
|
|
270
|
+ except:
|
287
|
271
|
continue
|
288
|
272
|
|
289
|
273
|
# parse suggestion
|