python 对文件编码格式转换小工具

最新推荐文章于 2024-08-03 21:18:04 发布

小慧慧_

最新推荐文章于 2024-08-03 21:18:04 发布

阅读量1.1k

点赞数

分类专栏： python 文章标签： python 编码

本文链接：https://blog.csdn.net/u013946356/article/details/79193590

版权

python 专栏收录该内容

8 篇文章 0 订阅

订阅专栏

做项目的时候一般都用source insight 读源码，有些文件的中文注释的编码方式是utf-8,但是si不支持utf-8编码，网上有些插件可以解决问题，但是都不好用。于是决定自己写一个文件编码转换工具，支持批量和单个文件的编码格式转换。

先封一个格式转换的类 deal_code.py

#!/user/bin/env python
#coding=utf-8
import os,sys
import chardet
class conver:
def __init__(self,path,deal_flag):
self.filelist = []
self.deal_flag = deal_flag
if os.path.isfile(path):
self.filelist.append(path)
elif os.path.isdir(path):
self.path = path
self.getFile()
def getFile(self):
for root,dirs,files in os.walk(self.path):
for file in files:
self.filelist.append(os.path.join(root,file))
def deal_conver(self):
if self.deal_flag == '1':
for file in self.filelist:
self.u2g(file)
elif self.deal_flag == '2':
for file in self.filelist:
self.g2u(file)
else:
print('flag err'+self.deal_flag)
def u2g(self,file):
content = open(file).read()
result = chardet.detect(content)#通过chardet.detect获取当前文件的编码格式串，返回类型为字典类型
coding = result.get('encoding')#获取encoding的值[编码格式]
print(coding)
if coding == 'utf-8':#文件格式如果是utf-8的时候，才进行转码
new_content = content.decode('UTF8').encode('GBK')
open(file, 'w').write(new_content)
else:
pass
def g2u(self,file):
content = open(file).read()
result = chardet.detect(content)#通过chardet.detect获取当前文件的编码格式串，返回类型为字典类型
coding = result.get('encoding')#获取encoding的值[编码格式]
print(coding)
if coding == 'gbk':#文件格式如果是gbk的时候，才进行转码
new_content = content.decode('GBK').encode('UTF8')
open(file, 'w').write(new_content)
else:
pass

调用的主函数 convercode_demo.py：

#!/user/bin/env python
#coding=utf-8
import sys
from sys import path
path.append('../class')
import deal_code as ConverCode

if __name__ == "__main__":
length = len(sys.argv)
if length != 3:
print("python %s 1/2(1.u2g 2.g2u) file/dir"%sys.argv[:1][0])
exit()
flag = sys.argv[1:][0]
path = sys.argv[1:][1]
print(flag,path)
myConver = ConverCode.conver(path,flag)
myConver.deal_conver()