用python写cs_python使用codecs模块进行文件操作-读写中英文字符

本文介绍了一个Python脚本,用于将源文件夹(如D:/Test/NonUnicode)中的非Unicode文本文件(如.cpp、.c等)转换为UTF-8编码,适用于跨平台文件格式统一。脚本首先检查文件是否已包含BOM头,然后尝试用多种编码进行读取,最终将内容写入目标文件夹(如D:/Test(utf8))。
摘要由CSDN通过智能技术生成

import sys

import os

import codecs

""" Usage: ConvertCp.py SrcDir DstDir e.g. if your source folder is "D:/Test/NonUnicode", destination folder is "D:/Test/utf8", just run command as follow : python ConvertCp.py "D:/Test/NonUnicode" "D:/Test/utf8" """

codePageList = (\

"utf-8",

"cp1251",

"cp855",

"cp1252",

"cp1250",

"cp1251",

"cp1254",

"cp936",

"cp950",

"cp932",

"cp949",

"cp874",

"cp1253"

)

fileExtFilter = (\

".cpp",

".c",

".cxx",

".h",

".hpp",

".hxx",

".cc",

".inl"

)

def FileIsBomUtf8Encoding(filePath):

""" Judge the file whether is Bom Utf_8. """

content = ""

try:

f = open(filePath, "rb")

try:

content = f.read()

finally:

f.close()

except IOError:

print ("open file %s failed." % (os.path.basename(filePath)))

return False

if content[0:3] == '\xef\xbb\xbf':

return True

return False

#----------------------------------------------------------------------

def ConvertFileEncoding(sourceFilePath, targetFilePath, targetEncoding = "utf_8"):

""" Convert the text files from ANSI encoding into 'targetEncoding'(utf_8). @param sourceFilePath source files path. @param targetFilePath target files path. @param targetEncoding target files encoding. """

#

# filter file ext.

#

(filePathname, filePathExt) = os.path.splitext(sourceFilePath)

if filePathExt.lower() not in fileExtFilter:

return False

#

# If the file is Bom utf_8 just skip.

#

if FileIsBomUtf8Encoding(sourceFilePath):

# print ("File \"%s\" is utf_8 format, not need convert." % (sourceFilePath))

return False

#

# Get the source content.

#

content = None

sourceEncoding = None

for cp in codePageList:

try:

sourceFile = codecs.open(sourceFilePath, mode = "r", encoding = cp)

content = sourceFile.read()

sourceEncoding = cp

break

except UnicodeDecodeError:

sourceFile.close()

content = None

continue

except IOError:

print ("open file %s failed." % (os.path.basename(sourceFilePath)))

return False

if content == None:

print ("File \"%s\" is not valid encoding." % (sourceFilePath))

return False

#

# ensure the target directory exist.

#

targetPathDir = os.path.dirname(targetFilePath)

if not os.path.exists(targetPathDir):

os.makedirs(targetPathDir)

#

# convert the file content.

#

try:

targetFile = codecs.open(targetFilePath, mode = "w", encoding = targetEncoding)

try:

if targetEncoding.lower().startswith("utf") and targetEncoding.lower()[len(targetEncoding)-1] == "8":

targetFile.write(unicode( codecs.BOM_UTF8, "utf_8" ))

if content[0:3] == u'\xef\xbb\xbf':

content = content[3:]

targetFile.write(content)

except UnicodeDecodeError:

#

# skip the failure file.

#

print ("convert file: \"%s\" failure" % (sourceFilePath) )

sourceFile.close()

targetFile.close()

os.remove(targetFilePath)

return False

finally:

sourceFile.close()

targetFile.close()

except IOError:

print ("open file %s failed." % (targetFilePath))

return False

# print ("convert file: \"%s\" from %s to %s successfully" % (os.path.basename(sourceFilePath), sourceEncoding, targetEncoding) )

return True

if __name__=='__main__':

""""""

if len(sys.argv) <= 2:

# print __doc__

sSourceDir = r"D:\\trunk"

sTargetDir = r"D:\\\trunk_new"

else:

sSourceDir = sys.argv[1]

sTargetDir = sys.argv[2]

for root, dirs, files in os.walk(sSourceDir):

for fileName in files:

sourcePath = os.path.join(root, fileName)

targetPath = sourcePath.replace(sSourceDir, sTargetDir)

ConvertFileEncoding(sourcePath, targetPath)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值