importcsvimportosimportshutilfrom chardet.universaldetector importUniversalDetectordefget_encode_info(file):
with open(file,'rb') as f:
detector=UniversalDetector()for line inf.readlines():
detector.feed(line)ifdetector.done:breakdetector.close()return detector.result['encoding']defread_file(file):
with open(file,'rb') as f:returnf.read()defwrite_file(content, file):
with open(file,'wb') as f:
f.write(content)defconvert_encode2utf8(file, original_encode, des_encode):
file_content=read_file(file)
file_decode= file_content.decode(original_encode,'ignore')
file_encode=file_decode.encode(des_encode)
write_file(file_encode, file)## Move *.txt to a folder
defmove2txtfolder(path, txt_file_list):
txt_folder_path= path + '\\txt'
if notos.path.exists(txt_folder_path):
os.makedirs(txt_folder_path)for file intxt_file_list:
des_path=os.path.join(txt_folder_path, os.path.basename(file))
shutil.move(file, des_path)##在路径中找出所有的*.txt文件
deffindtxt(path, txt_file_list):
file_name_list=os.listdir(path)for filename infile_name_list:
de_path=os.path.join(path, filename)ifos.path.isfile(de_path):if de_path.endswith(".txt"): #Specify to find the txt file.
txt_file_list.append(de_path)else:
findtxt(de_path, txt_file_list)deftxt2csv(txt_file):##先把所有文件的encoding都转换成utf-8
encode_info =get_encode_info(txt_file)if encode_info != 'utf-8':
convert_encode2utf8(txt_file, encode_info,'utf-8')
csv_file= os.path.splitext(txt_file)[0] + '.csv'with open(csv_file,'w+', newline='', encoding='utf-8') as csvfile:
writer= csv.writer(csvfile, dialect='excel')
with open(txt_file,'r', encoding='utf-8') as txtfile:for line intxtfile.readlines():
line_list= line.strip('\n').split(';')
writer.writerow(line_list)if __name__ == '__main__':
folder_path= r'C:\Details'
###如果文件夹中还有子文件夹,请用findtxt函数
#txt_file_list = []
#findtxt(folder_path, txt_file_list)
##如果文件夹中没有子文件夹的时候直接使用推导式来生产txt文件的list
txt_file_list = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if os.path.join(folder_path, file).endswith('.txt')]for txt_file intxt_file_list:
txt2csv(txt_file)
move2txtfolder(folder_path, txt_file_list)