按要求处理文件夹下所有文件夹和文件（中文繁简转换），并输出到指定文件夹下，保持原有两级目录结构

最新推荐文章于 2024-09-17 21:57:02 发布

W_Ria

最新推荐文章于 2024-09-17 21:57:02 发布

阅读量320

点赞数 1

分类专栏：自然语言处理 # python 文章标签： python 文件处理

本文链接：https://blog.csdn.net/wr1997/article/details/96998488

版权

自然语言处理同时被 2 个专栏收录

21 篇文章 1 订阅

订阅专栏

python

12 篇文章 0 订阅

订阅专栏

import os
import re
from opencc import *

input_path = 'D:\\......\\xx'
output_path = 'D:\\.....\\yy'

all_files = []
all_new_files = []

#获取原始所有文件
def get_all_file(ipath):
    all_file_list = os.listdir(ipath)
    for f in all_file_list:
        filepath = os.path.join(ipath, f)
        if os.path.isdir(filepath):
            get_all_file(filepath)
        if not os.path.isdir(filepath):
            all_files.append(filepath)
    return all_files

#获取需要处理的所有文件
def get_new_file():
    for f in get_all_file(input_path):
        filename = os.path.basename(f)
        nn = re.findall(r'(.+?译文)\.', filename)
        print(nn)
        if nn:
            # print("译文:"+f)
            continue
        else:
            # print("古文:"+f)
            all_new_files.append(f)
    return all_new_files

#处理文件，输出到指定目录
def get_update_files():
    cc = OpenCC('t2s')
    new_files = get_new_file()
    for f in new_files:
        fo = open(f, "r",encoding='UTF-8')  #读取文件
        #提取需要保留的文件目录内容进行拼接
        p = f.split(os.path.sep)
        l = len(p)
        target_fpath = os.path.join(output_path , p[l-2])
        target_filepath = os.path.join(output_path , p[l-2],p[l-1])
        if not os.path.exists(target_fpath):
            os.makedirs(target_fpath)
            print(target_fpath)
        target_file = open(target_filepath, "w", encoding='UTF-8')##写入目标文件

        for line in fo.readlines():  # 依次读取每行
            line = line.strip()#去除头尾空格
            if len(line) != 0:
                #繁简转换
                line =cc.convert(line)
                target_file.write(line+'\n')#还原分段
        # 关闭文件
        fo.close()
        target_file.close()

#
if __name__ == '__main__':
    # print(get_all_file(input_path))
    # print(get_new_file())
    get_update_files()