python文件批量转换编码_Python批量转换子文件夹下的文件编码

最新推荐文章于 2021-02-10 15:16:25 发布

weixin_39767386

最新推荐文章于 2021-02-10 15:16:25 发布

阅读量84

点赞数

文章标签： python文件批量转换编码

版权声明：本文为博主原创文章，遵循 CC 4.0 BY-SA 版权协议，转载请附上原文出处链接和本声明。

本文链接：https://blog.csdn.net/weixin_39767386/article/details/111455820

版权

# coding=utf-8

import os

import chardet

import codecs

# 批量转换文件夹中的index.shtml为utf-8编码

def run():

# 第一步，读取所有的子文件夹，形成地址列表

all_child_dir = get_all_child_dir("D:\\2")

# 第二步,判断路径下的index.shtml是否存在，如果存在加入新的index.shtml列表

index_shtml = get_all_index_shtml(all_child_dir)

# 第三步自动判断index.shtml文件的编码，如果为gb2312，加入新列表

gb2312_list = get_all_gb2312(index_shtml)

# 第四步转换gb2312的文件列表为utf-8

convert_to_utf8(gb2312_list)

def get_all_child_dir(path):

dir_list = []

# 判断路径是否存在

if (os.path.exists(path)):

print("该母路径存在")

# 获取该目录下的所有文件或文件夹目录

files = os.listdir(path)

for file in files:

# 得到该文件夹下所有子目录的路径

m = os.path.join(path, file)

# 判断是否为文件夹

if (os.path.isdir(m)):

dir_list.append(m)

print("所有列表如下")

print(dir_list)

return dir_list

def get_all_index_shtml(all_child_dir):

index_shtml = []

for i in all_child_dir:

i = i + "\index.shtml"

# 判断文件是否存在

if os.path.exists(i):

index_shtml.append(i)

print("index.shtml列表如下")

print(index_shtml)

return index_shtml

def get_all_gb2312(index_shtml):

gb2312_list = []

for i in index_shtml:

# with open(i, 'rb') as f:

# if chardet.detect(f.read())['encoding'] == "GB2312":

# gb2312_list.append(f)

f = open(i, "rb")

data = f.read()

print(chardet.detect(data)["encoding"])

# 如果文件为Gb2312加入新列表

if (chardet.detect(data)["encoding"] == "GB2312"):

gb2312_list.append(i)

print("GB2312列表如下")

print(gb2312_list)

return gb2312_list

def convert_to_utf8(gb2312_list):

to_coding_type = "utf-8"

from_coding_type = "ansi"

jishuqi = 0

for i in gb2312_list:

try:

f = codecs.open(i, "rb", from_coding_type)

new_content = f.read()

codecs.open(i, "wb", to_coding_type).write(new_content)

jishuqi += 1

except IOError as err:

print("IO ERROR:".format(err))

print("本次转换%d个文件" % jishuqi)

if __name__ == '__main__':

run()

weixin_39767386

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python文件批量转换编码_Python批量转换子文件夹下的文件编码

# coding=utf-8import osimport chardetimport codecs# 批量转换文件夹中的index.shtml为utf-8编码def run():# 第一步，读取所有的子文件夹，形成地址列表all_child_dir = get_all_child_dir("D:\\2")# 第二步,判断路径下的index.shtml是否存在，如果存在加入新的index.shtm...
复制链接

扫一扫

评论

被折叠的条评论为什么被折叠?

到【灌水乐园】发言

查看更多评论

添加红包

成就一亿技术人!

hope_wisdom

发出的红包

实付元

使用余额支付

点击重新获取

扫码支付

钱包余额 0

抵扣说明：

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、付费专栏及课程。