import os import re from opencc import * input_path = 'D:\\......\\xx' output_path = 'D:\\.....\\yy' all_files = [] all_new_files = [] #获取原始所有文件 def get_all_file(ipath): all_file_list = os.listdir(ipath) for f in all_file_list: filepath = os.path.join(ipath, f) if os.path.isdir(filepath): get_all_file(filepath) if not os.path.isdir(filepath): all_files.append(filepath) return all_files #获取需要处理的所有文件 def get_new_file(): for f in get_all_file(input_path): filename = os.path.basename(f) nn = re.findall(r'(.+?译文)\.', filename) print(nn) if nn: # print("译文:"+f) continue else: # print("古文:"+f) all_new_files.append(f) return all_new_files #处理文件,输出到指定目录 def get_update_files(): cc = OpenCC('t2s') new_files = get_new_file() for f in new_files: fo = open(f, "r",encoding='UTF-8') #读取文件 #提取需要保留的文件目录内容进行拼接 p = f.split(os.path.sep) l = len(p) target_fpath = os.path.join(output_path , p[l-2]) target_filepath = os.path.join(output_path , p[l-2],p[l-1]) if not os.path.exists(target_fpath): os.makedirs(target_fpath) print(target_fpath) target_file = open(target_filepath, "w", encoding='UTF-8')##写入目标文件 for line in fo.readlines(): # 依次读取每行 line = line.strip()#去除头尾空格 if len(line) != 0: #繁简转换 line =cc.convert(line) target_file.write(line+'\n')#还原分段 # 关闭文件 fo.close() target_file.close() # if __name__ == '__main__': # print(get_all_file(input_path)) # print(get_new_file()) get_update_files()
按要求处理文件夹下所有文件夹和文件(中文繁简转换),并输出到指定文件夹下,保持原有两级目录结构
最新推荐文章于 2024-09-17 21:57:02 发布