I have a list of dictionaries containing unicode strings.
csv.DictWriter can write a list of dictionaries into a CSV file.
I want the CSV file to be encoded in UTF8.
The csv module cannot handle converting unicode strings into UTF8.
The csv module documentation has an example for converting everything to UTF8:
:
def utf_8_encoder(unicode_csv_data):
for line in unicode_csv_data:
yield line.encode('utf-8')
It also has a class UnicodeWriter:.
But... how do I make DictWriter work with these? Wouldn't they have to inject themselves in the middle of it, to catch the disassembled dictionaries and encode them before it writes them to the file? I don't get it.
解决方案
If using Python 2.7 or later, use a dict comprehension to remap the dictionary to utf-8 before passing to DictWriter:
# coding: utf-8
import csv
D = {'name':u'马克','pinyin':u'mǎkè'}
f = open('out.csv','wb')
f.write(u'\ufeff'.encode('utf8')) # BOM (optional...Excel needs it to open UTF-8 file properly)
w = csv.DictWriter(f,sorted(D.keys()))
w.writeheader()
w.writerow({k:v.encode('utf8') for k,v in D.items()})
f.close()
You can use this idea to update UnicodeWriter to DictUnicodeWriter:
# coding: utf-8
import csv
import cStringIO
import codecs
class DictUnicodeWriter(object):
def __init__(self, f, fieldnames, dialect=csv.excel, encoding="utf-8", **kwds):
# Redirect output to a queue
self.queue = cStringIO.StringIO()
self.writer = csv.DictWriter(self.queue, fieldnames, dialect=dialect, **kwds)
self.stream = f
self.encoder = codecs.getincrementalencoder(encoding)()
def writerow(self, D):
self.writer.writerow({k:v.encode("utf-8") for k,v in D.items()})
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode("utf-8")
# ... and reencode it into the target encoding
data = self.encoder.encode(data)
# write to the target stream
self.stream.write(data)
# empty queue
self.queue.truncate(0)
def writerows(self, rows):
for D in rows:
self.writerow(D)
def writeheader(self):
self.writer.writeheader()
D1 = {'name':u'马克','pinyin':u'Mǎkè'}
D2 = {'name':u'美国','pinyin':u'Měiguó'}
f = open('out.csv','wb')
f.write(u'\ufeff'.encode('utf8')) # BOM (optional...Excel needs it to open UTF-8 file properly)
w = DictUnicodeWriter(f,sorted(D.keys()))
w.writeheader()
w.writerows([D1,D2])
f.close()