Python3批量转换文本文件编码

79 篇文章 0 订阅

Python3批量转换文本文件编码

 

#-*- coding: utf-8 -*-

try: from chardet.universaldetector import UniversalDetector IsAuto = True except ImportError: IsAuto = False import os import os.path import glob def Convert_Auto( filename,out_enc="utf-8"): ''' Re-encode text file with auto detec current encode. Need chardet Lib. Input Parameter: filename: full path and file name, e.g. c:dir1file.txt out_enc: new encode. Default as 'utf-8' Output Parameter None''' try: f=open(filename,'rb') b= b' ' b+=f.read(1024) u=UniversalDetector() u.reset() u.feed(b) u.close() f.seek(0) b=f.read() f.close() in_enc=u.result['encoding'] new_content=b.decode(in_enc, 'ignore') f=open(filename, 'w', encoding=out_enc) f.write(new_content) f.close() print ("Success:"+filename+"converted from"+ in_enc+"to"+out_enc +"!") except IOError: print ("Error:"+filename+"FAIL to converted from"+ in_enc+"to"+out_enc+"!") def Convert_Manu( filename,in_enc='gbk', out_enc="utf-8"): ''' Re-encode text file with manual decide input text encode. Input Parameter: filename: full path and file name, e.g. c:dir1file.txt in_enc: current encode. Default as 'gbk' out_enc: new encode. Default as 'utf-8' Output Parameter None''' try: print ("convert"+ filename) f=open(filename,'rb') b=f.read() f.close() new_content=b.decode(in_enc, 'ignore') f=open(filename, 'w', encoding=out_enc) f.write(new_content) f.close() print ("Success:"+filename+"converted from"+ in_enc+"to"+out_enc +"!") except IOError: print ("Error:"+filename+"FAIL to converted from"+ in_enc+"to"+out_enc+"!") def explore(dir, IsLoopSubDIR=True): '''Convert files encoding. Input: dir : Current folder IsLoopSubDIR: True -- Include files in sub folder False-- Only include files in current folder Output: NONE ''' if IsLoopSubDIR: flist=getSubFileList(dir, '.txt') else: flist=getCurrFileList(dir, '.txt') for fname in flist: if IsAuto: Convert_Auto(fname, 'utf-8') else: Convert_Manu(fname, 'gbk', 'utf-8') def getSubFileList(dir, suffix=''): '''Get all file list with specified suffix under current folder(Include sub folder) Input: dir : Current folder suffix : default to blank, means select all files. Output: File list ''' flist=[] for root, dirs, files in os.walk(os.getcwd()): for name in files: if name.endswith(suffix): flist.append(os.path.join(root, name)) return flist def getCurrFileList(dir, suffix=''): '''Get all file list with specified suffix under current level folder Input: dir : Current folder suffix : default to blank, means select all files. Output: File list ''' if suffix=='': files=glob.glob('*') else: files=glob.glob('*'+suffix) flist=[] for f in files: flist.append(os.path.join(os.getcwd(), f)) return flist def main(): explore(os.getcwd(), True) if __name__ =="__main__": main()
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值