最近需要一个txt文件的批量转码功能,在网上找到一段批量处理java源文件的py程序如下:
1 #-*- coding: utf-8 -*- 2 3 import codecs 4 import os 5 import shutil 6 import re 7 import chardet 8 9 def convert_encoding(filename, target_encoding): 10 # Backup the origin file. 11 shutil.copyfile(filename, filename + '.bak') 12 13 # convert file from the source encoding to target encoding 14 content = codecs.open(filename, 'r').read() 15 source_encoding = chardet.detect(content)['encoding'] 16 print source_encoding, filename 17 content = content.decode(source_encoding) #.encode(source_encoding) 18 codecs.open(filename, 'w', encoding=target_encoding).write(content) 19 20 def main(): 21 for root, dirs, files in os.walk(os.getcwd()): 22 for f in files: 23 if f.lower().endswith('.txt'): 24 filename = os.path.join(root, f) 25 try: 26 convert_encoding(filename, 'utf-8') 27 except Exception, e: 28 print filename 29 30 def process_bak_files(action='restore'): 31 for root, dirs, files in os.walk(os.getcwd()): 32 for f in files: 33 if f.lower().endswith('.txt.bak'): 34 source = os.path.join(root, f) 35 target = os.path.join(root, re.sub('\.txt\.bak$', '.txt', f, flags=re.IGNORECASE)) 36 try: 37 if action == 'restore': 38 shutil.move(source, target) 39 elif action == 'clear': 40 os.remove(source) 41 except Exception, e: 42 print source 43 44 if __name__ == '__main__': 45 # process_bak_files(action='clear') 46 main()
其中import了python的chardet模块,此模块需要单独安装,到http://pypi.python.org/pypi/chardet#downloads下载chardet-2.1.1.tar.gz,
解压后其中有setup.py文件,用于模块的安装,执行命令:python setup.py install ,报错:ImportError: No module named setuptools ,
需要安装setup tools,网上下载ez_setup.py,cmd执行之。重跑一遍setup.py,chardet安装成功!
另:ubuntu安装命令: # apt-get install python-setuptools