处理ansi编码转utf8
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
# @Author : kinggoo
# @Software: 鳗鱼是条狗
# @blog :https://kinggoo.com/pyansitoutf8.htm
import os
import sys
import time
import codecs
import chardet
"""
主要是GB2312转utf-8格式,其他格式可以通过chardet来获取类型转换。
kinggoo.com
"""
if __name__=="__main__":
file_path = "./txt"
file_path_new = "./txtnew"
files = os.listdir(file_path)
for file in files:
file_name = file_path + '/' + file
# 不重复
if os.path.isfile(file_path_new + '/' + file):
print("已存在文件 %s"%file_path_new + '/' + file)
continue
f_2 = open(file_name, 'rb') # 注意此处打开方式 'rb'
str_1 = f_2.readline()
chardet_1 = chardet.detect(str_1)
print("编码为:%s"%chardet_1)
if chardet_1['encoding'] == 'GB2312':
f = codecs.open(file_name, 'r', 'ansi')
ff = f.read()
file_object = codecs.open(file_path_new + '/' + file, 'w', 'utf-8')
file_object.write(ff)
else:
print("文件:%s 格式:%s"%(file_name,chardet_1['encoding']))
- THE END -