谢谢你的回复,很管用!
由于源文件是混合格式,我添加了一个按顺序尝试的源格式列表(sourceFormats),并在UnicodeDecodeError上尝试下一种格式:from __future__ import with_statement
import os
import sys
import codecs
from chardet.universaldetector import UniversalDetector
targetFormat = 'utf-8'
outputDir = 'converted'
detector = UniversalDetector()
def get_encoding_type(current_file):
detector.reset()
for line in file(current_file):
detector.feed(line)
if detector.done: break
detector.close()
return detector.result['encoding']
def convertFileBestGuess(filename):
sourceFormats = ['ascii', 'iso-8859-1']
for format in sourceFormats:
try:
with codecs.open(fileName, 'rU', format) as sourceFile:
writeConversion(sourceFile)