import sys
import os
import codecs
""" Usage: ConvertCp.py SrcDir DstDir e.g. if your source folder is "D:/Test/NonUnicode", destination folder is "D:/Test/utf8", just run command as follow : python ConvertCp.py "D:/Test/NonUnicode" "D:/Test/utf8" """
codePageList = (\
"utf-8",
"cp1251",
"cp855",
"cp1252",
"cp1250",
"cp1251",
"cp1254",
"cp936",
"cp950",
"cp932",
"cp949",
"cp874",
"cp1253"
)
fileExtFilter = (\
".cpp",
".c",
".cxx",
".h",
".hpp",
".hxx",
".cc",
".inl"
)
def FileIsBomUtf8Encoding(filePath):
""" Judge the file whether is Bom Utf_8. """
content = ""
try:
f = open(filePath, "rb")
try:
content = f.read()
finally:
f.close()
except IOError:
print ("open file %s failed." % (os.path.basename(filePath)))
return False
if content[0:3] == '\xef\xbb\xbf':
return True
return False
#----------------------------------------------------------------------
def ConvertFileEncoding(sourceFilePath, targetFilePath, targetEncoding = "utf_8"):
""" Convert the text files from ANSI encoding into 'targetEncoding'(utf_8). @param sourceFilePath source files path. @param targetFilePath target files path. @param targetEncoding target files encoding. """
#
# filter file ext.
#
(filePathname, filePathExt) = os.path.splitext(sourceFilePath)
if filePathExt.lower() not in fileExtFilter:
return False
#
# If the file is Bom utf_8 just skip.
#
if FileIsBomUtf8Encoding(sourceFilePath):
# print ("File \"%s\" is utf_8 format, not need convert." % (sourceFilePath))
return False
#
# Get the source content.
#
content = None
sourceEncoding = None
for cp in codePageList:
try:
sourceFile = codecs.open(sourceFilePath, mode = "r", encoding = cp)
content = sourceFile.read()
sourceEncoding = cp
break
except UnicodeDecodeError:
sourceFile.close()
content = None
continue
except IOError:
print ("open file %s failed." % (os.path.basename(sourceFilePath)))
return False
if content == None:
print ("File \"%s\" is not valid encoding." % (sourceFilePath))
return False
#
# ensure the target directory exist.
#
targetPathDir = os.path.dirname(targetFilePath)
if not os.path.exists(targetPathDir):
os.makedirs(targetPathDir)
#
# convert the file content.
#
try:
targetFile = codecs.open(targetFilePath, mode = "w", encoding = targetEncoding)
try:
if targetEncoding.lower().startswith("utf") and targetEncoding.lower()[len(targetEncoding)-1] == "8":
targetFile.write(unicode( codecs.BOM_UTF8, "utf_8" ))
if content[0:3] == u'\xef\xbb\xbf':
content = content[3:]
targetFile.write(content)
except UnicodeDecodeError:
#
# skip the failure file.
#
print ("convert file: \"%s\" failure" % (sourceFilePath) )
sourceFile.close()
targetFile.close()
os.remove(targetFilePath)
return False
finally:
sourceFile.close()
targetFile.close()
except IOError:
print ("open file %s failed." % (targetFilePath))
return False
# print ("convert file: \"%s\" from %s to %s successfully" % (os.path.basename(sourceFilePath), sourceEncoding, targetEncoding) )
return True
if __name__=='__main__':
""""""
if len(sys.argv) <= 2:
# print __doc__
sSourceDir = r"D:\\trunk"
sTargetDir = r"D:\\\trunk_new"
else:
sSourceDir = sys.argv[1]
sTargetDir = sys.argv[2]
for root, dirs, files in os.walk(sSourceDir):
for fileName in files:
sourcePath = os.path.join(root, fileName)
targetPath = sourcePath.replace(sSourceDir, sTargetDir)
ConvertFileEncoding(sourcePath, targetPath)