用python写cs_python使用codecs模块进行文件操作-读写中英文字符

最新推荐文章于 2021-10-17 12:38:33 发布

胡桓铭

最新推荐文章于 2021-10-17 12:38:33 发布

阅读量254

点赞数

文章标签：用python写cs

本文链接：https://blog.csdn.net/weixin_34698042/article/details/112991215

版权

本文介绍了一个Python脚本，用于将源文件夹（如D:/Test/NonUnicode）中的非Unicode文本文件（如.cpp、.c等）转换为UTF-8编码，适用于跨平台文件格式统一。脚本首先检查文件是否已包含BOM头，然后尝试用多种编码进行读取，最终将内容写入目标文件夹（如D:/Test(utf8)）。

摘要由CSDN通过智能技术生成

import sys

import os

import codecs

""" Usage: ConvertCp.py SrcDir DstDir e.g. if your source folder is "D:/Test/NonUnicode", destination folder is "D:/Test/utf8", just run command as follow : python ConvertCp.py "D:/Test/NonUnicode" "D:/Test/utf8" """

codePageList = (\

"utf-8",

"cp1251",

"cp855",

"cp1252",

"cp1250",

"cp1251",

"cp1254",

"cp936",

"cp950",

"cp932",

"cp949",

"cp874",

"cp1253"

)

fileExtFilter = (\

".cpp",

".c",

".cxx",

".h",

".hpp",

".hxx",

".cc",

".inl"

)

def FileIsBomUtf8Encoding(filePath):

""" Judge the file whether is Bom Utf_8. """

content = ""

try:

f = open(filePath, "rb")

try:

content = f.read()

finally:

f.close()

except IOError:

print ("open file %s failed." % (os.path.basename(filePath)))

return False

if content[0:3] == '\xef\xbb\xbf':

return True

return False

#----------------------------------------------------------------------

def ConvertFileEncoding(sourceFilePath, targetFilePath, targetEncoding = "utf_8"):

""" Convert the text files from ANSI encoding into 'targetEncoding'(utf_8). @param sourceFilePath source files path. @param targetFilePath target files path. @param targetEncoding target files encoding. """

# filter file ext.

(filePathname, filePathExt) = os.path.splitext(sourceFilePath)

if filePathExt.lower() not in fileExtFilter:

return False

# If the file is Bom utf_8 just skip.

if FileIsBomUtf8Encoding(sourceFilePath):

# print ("File \"%s\" is utf_8 format, not need convert." % (sourceFilePath))

return False

# Get the source content.

content = None

sourceEncoding = None

for cp in codePageList:

try:

sourceFile = codecs.open(sourceFilePath, mode = "r", encoding = cp)

content = sourceFile.read()

sourceEncoding = cp

break

except UnicodeDecodeError:

sourceFile.close()

content = None

continue

except IOError:

print ("open file %s failed." % (os.path.basename(sourceFilePath)))

return False

if content == None:

print ("File \"%s\" is not valid encoding." % (sourceFilePath))

return False

# ensure the target directory exist.

targetPathDir = os.path.dirname(targetFilePath)

if not os.path.exists(targetPathDir):

os.makedirs(targetPathDir)

# convert the file content.

try:

targetFile = codecs.open(targetFilePath, mode = "w", encoding = targetEncoding)

try:

if targetEncoding.lower().startswith("utf") and targetEncoding.lower()[len(targetEncoding)-1] == "8":

targetFile.write(unicode( codecs.BOM_UTF8, "utf_8" ))

if content[0:3] == u'\xef\xbb\xbf':

content = content[3:]

targetFile.write(content)

except UnicodeDecodeError:

# skip the failure file.

print ("convert file: \"%s\" failure" % (sourceFilePath) )

sourceFile.close()

targetFile.close()

os.remove(targetFilePath)

return False

finally:

sourceFile.close()

targetFile.close()

except IOError:

print ("open file %s failed." % (targetFilePath))

return False

# print ("convert file: \"%s\" from %s to %s successfully" % (os.path.basename(sourceFilePath), sourceEncoding, targetEncoding) )

return True

if __name__=='__main__':

""""""

if len(sys.argv) <= 2:

# print __doc__

sSourceDir = r"D:\\trunk"

sTargetDir = r"D:\\\trunk_new"

else:

sSourceDir = sys.argv[1]

sTargetDir = sys.argv[2]

for root, dirs, files in os.walk(sSourceDir):

for fileName in files:

sourcePath = os.path.join(root, fileName)

targetPath = sourcePath.replace(sSourceDir, sTargetDir)

ConvertFileEncoding(sourcePath, targetPath)

胡桓铭

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫