Browse Source

[fix] dailymotion engine : no more html tag in the description

Dalf 11 years ago
parent
commit
49c85fce51
2 changed files with 10 additions and 3 deletions
  1. 1
    0
      engines.cfg_sample
  2. 9
    3
      searx/engines/dailymotion.py

+ 1
- 0
engines.cfg_sample View File

82
 
82
 
83
 [dailymotion]
83
 [dailymotion]
84
 engine = dailymotion
84
 engine = dailymotion
85
+locale = en_US
85
 categories = videos
86
 categories = videos
86
 
87
 

+ 9
- 3
searx/engines/dailymotion.py View File

1
 from urllib import urlencode
1
 from urllib import urlencode
2
+from lxml import html
2
 from json import loads
3
 from json import loads
3
 from cgi import escape
4
 from cgi import escape
4
 
5
 
5
 categories = ['videos']
6
 categories = ['videos']
6
-localization = 'en'
7
+locale = 'en_US'
7
 
8
 
8
 # see http://www.dailymotion.com/doc/api/obj-video.html
9
 # see http://www.dailymotion.com/doc/api/obj-video.html
9
 search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=25&page=1&{query}'
10
 search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=25&page=1&{query}'
10
 
11
 
11
 def request(query, params):
12
 def request(query, params):
12
     global search_url
13
     global search_url
13
-    params['url'] = search_url.format(query=urlencode({'search': query, 'localization': localization }))
14
+    params['url'] = search_url.format(query=urlencode({'search': query, 'localization': locale }))
14
     return params
15
     return params
15
 
16
 
16
 
17
 
27
         else:
28
         else:
28
             content = ''
29
             content = ''
29
         if res['description']:
30
         if res['description']:
30
-            content += escape(res['description'][:500])
31
+            description = text_content_from_html(res['description'])
32
+            content += description[:500]
31
         results.append({'url': url, 'title': title, 'content': content})
33
         results.append({'url': url, 'title': title, 'content': content})
32
     return results
34
     return results
35
+
36
+def text_content_from_html(html_string):
37
+    desc_html = html.fragment_fromstring(html_string, create_parent=True)
38
+    return desc_html.text_content()