Browse Source

[fix] dailymotion engine : no more html tag in the description

Dalf 11 years ago
parent
commit
49c85fce51
2 changed files with 10 additions and 3 deletions
  1. 1
    0
      engines.cfg_sample
  2. 9
    3
      searx/engines/dailymotion.py

+ 1
- 0
engines.cfg_sample View File

@@ -82,5 +82,6 @@ categories = videos
82 82
 
83 83
 [dailymotion]
84 84
 engine = dailymotion
85
+locale = en_US
85 86
 categories = videos
86 87
 

+ 9
- 3
searx/engines/dailymotion.py View File

@@ -1,16 +1,17 @@
1 1
 from urllib import urlencode
2
+from lxml import html
2 3
 from json import loads
3 4
 from cgi import escape
4 5
 
5 6
 categories = ['videos']
6
-localization = 'en'
7
+locale = 'en_US'
7 8
 
8 9
 # see http://www.dailymotion.com/doc/api/obj-video.html
9 10
 search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=25&page=1&{query}'
10 11
 
11 12
 def request(query, params):
12 13
     global search_url
13
-    params['url'] = search_url.format(query=urlencode({'search': query, 'localization': localization }))
14
+    params['url'] = search_url.format(query=urlencode({'search': query, 'localization': locale }))
14 15
     return params
15 16
 
16 17
 
@@ -27,6 +28,11 @@ def response(resp):
27 28
         else:
28 29
             content = ''
29 30
         if res['description']:
30
-            content += escape(res['description'][:500])
31
+            description = text_content_from_html(res['description'])
32
+            content += description[:500]
31 33
         results.append({'url': url, 'title': title, 'content': content})
32 34
     return results
35
+
36
+def text_content_from_html(html_string):
37
+    desc_html = html.fragment_fromstring(html_string, create_parent=True)
38
+    return desc_html.text_content()