浏览代码

[enh] csv output support

asciimoo 11 年前
父节点
当前提交
a192438e9a
共有 2 个文件被更改,包括 46 次插入0 次删除
  1. 33
    0
      searx/utils.py
  2. 13
    0
      searx/webapp.py

+ 33
- 0
searx/utils.py 查看文件

1
 from HTMLParser import HTMLParser
1
 from HTMLParser import HTMLParser
2
 import htmlentitydefs
2
 import htmlentitydefs
3
+import csv
4
+import codecs
5
+import cStringIO
3
 
6
 
4
 class HTMLTextExtractor(HTMLParser):
7
 class HTMLTextExtractor(HTMLParser):
5
     def __init__(self):
8
     def __init__(self):
24
     s = HTMLTextExtractor()
27
     s = HTMLTextExtractor()
25
     s.feed(html)
28
     s.feed(html)
26
     return s.get_text()
29
     return s.get_text()
30
+
31
+
32
+class UnicodeWriter:
33
+    """
34
+    A CSV writer which will write rows to CSV file "f",
35
+    which is encoded in the given encoding.
36
+    """
37
+
38
+    def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
39
+        # Redirect output to a queue
40
+        self.queue = cStringIO.StringIO()
41
+        self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
42
+        self.stream = f
43
+        self.encoder = codecs.getincrementalencoder(encoding)()
44
+
45
+    def writerow(self, row):
46
+        self.writer.writerow([(s.encode("utf-8").strip() if type(s) == str or type(s) == unicode else str(s)) for s in row])
47
+        # Fetch UTF-8 output from the queue ...
48
+        data = self.queue.getvalue()
49
+        data = data.decode("utf-8")
50
+        # ... and reencode it into the target encoding
51
+        data = self.encoder.encode(data)
52
+        # write to the target stream
53
+        self.stream.write(data)
54
+        # empty queue
55
+        self.queue.truncate(0)
56
+
57
+    def writerows(self, rows):
58
+        for row in rows:
59
+            self.writerow(row)

+ 13
- 0
searx/webapp.py 查看文件

26
 from searx.engines import search, categories, engines, get_engines_stats
26
 from searx.engines import search, categories, engines, get_engines_stats
27
 from searx import settings
27
 from searx import settings
28
 import json
28
 import json
29
+import cStringIO
30
+from searx.utils import UnicodeWriter
29
 
31
 
30
 
32
 
31
 app = Flask(__name__)
33
 app = Flask(__name__)
104
              result['pretty_url'] = result['url']
106
              result['pretty_url'] = result['url']
105
     if request_data.get('format') == 'json':
107
     if request_data.get('format') == 'json':
106
         return Response(json.dumps({'query': query, 'results': results}), mimetype='application/json')
108
         return Response(json.dumps({'query': query, 'results': results}), mimetype='application/json')
109
+    elif request_data.get('format') == 'csv':
110
+        csv = UnicodeWriter(cStringIO.StringIO())
111
+        if len(results):
112
+            keys = results[0].keys()
113
+            csv.writerow(keys)
114
+            for row in results:
115
+                csv.writerow([row[key] for key in keys])
116
+        csv.stream.seek(0)
117
+        response = Response(csv.stream.read(), mimetype='application/csv', )
118
+        response.headers.add('Content-Disposition', 'attachment;Filename=searx_-_{0}.csv'.format(query))
119
+        return response
107
     template = render('results.html'
120
     template = render('results.html'
108
                         ,results=results
121
                         ,results=results
109
                         ,q=request_data['q']
122
                         ,q=request_data['q']