yahoo, bing_new and dailymotion fixes

11 years ago · 1e99cf2a0e
--- a/searx/engines/bing_news.py
+++ b/searx/engines/bing_news.py
 
															         link = result.xpath('.//div[@class="newstitle"]/a')[0]
														
 
															         url = link.attrib.get('href')
														
 
															         title = ' '.join(link.xpath('.//text()'))
														
 
															-        content = escape(' '.join(result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_snip"]//text()')))
														
 
															-        
														
 
															+        contentXPath = result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_snip"]//text()')
														
 
															+        if contentXPath != None:
														
 
															+            content = escape(' '.join(contentXPath))
														
 
															+            
														
 
															         # parse publishedDate
														
 
															-        publishedDate = escape(' '.join(result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_ST"]//span[@class="sn_tm"]//text()')))
														
 
															+        publishedDateXPath = result.xpath('.//div[@class="sn_txt"]/div//span[contains(@class,"sn_ST")]//span[contains(@class,"sn_tm")]//text()')
														
 
															+        if publishedDateXPath != None:
														
 
															+            publishedDate = escape(' '.join(publishedDateXPath))
														
 
															         if re.match("^[0-9]+ minute(s|) ago$", publishedDate):
														
 
															             timeNumbers = re.findall(r'\d+', publishedDate)
														
 
															             publishedDate = datetime.now()\
														
 
															                 - timedelta(hours=int(timeNumbers[0]))\
														
 
															                 - timedelta(minutes=int(timeNumbers[1]))
														
 
															+        elif re.match("^[0-9]+ day(s|) ago$", publishedDate):
														
 
															+            timeNumbers = re.findall(r'\d+', publishedDate)
														
 
															+            publishedDate = datetime.now()\
														
 
															+                - timedelta(days=int(timeNumbers[0]))
														
 
															         else:
														
 
															-            publishedDate = parser.parse(publishedDate)  
														
 
															-
														
 
															+            try:
														
 
															+                # FIXME use params['language'] to parse either mm/dd or dd/mm
														
 
															+                publishedDate = parser.parse(publishedDate, dayfirst=False)
														
 
															+            except TypeError:
														
 
															+                # FIXME
														
 
															+                publishedDate = datetime.now()
														
 
															+                
														
 
															         # append result
														
 
															         results.append({'url': url, 
														
 
															                         'title': title, 
														
--- a/searx/engines/dailymotion.py
+++ b/searx/engines/dailymotion.py
 
															 # engine dependent config
														
 
															 categories = ['videos']
														
 
															-locale = 'en_US'
														
 
															 paging = True
														
 
															+language_support = True
														
 
															 # search-url
														
 
															 # see http://www.dailymotion.com/doc/api/obj-video.html
														
 
															 # do search-request
														
 
															 def request(query, params):
														
 
															+    if params['language'] == 'all':
														
 
															+        locale = 'en-US'
														
 
															+    else:
														
 
															+        locale = params['language']
														
 
															+
														
 
															     params['url'] = search_url.format(
														
 
															         query=urlencode({'search': query, 'localization': locale}),
														
 
															         pageno=params['pageno'])
														
--- a/searx/engines/yahoo.py
+++ b/searx/engines/yahoo.py
 
															         if endpos > -1:
														
 
															             endpositions.append(endpos)
														
 
															-    end = min(endpositions)
														
 
															-
														
 
															-    return unquote(url_string[start:end])
														
 
															+    if start==0 or len(endpositions) == 0:
														
 
															+        return url_string        
														
 
															+    else:
														
 
															+        end = min(endpositions)
														
 
															+        return unquote(url_string[start:end])
														
 
															 # do search-request