【Python】文件处理——批量实现docx转以及将批量txt汇总成一个txt文件将批量txt汇总成一个txt文件思路

最新推荐文章于 2024-07-12 14:26:47 发布

其他垃圾！

最新推荐文章于 2024-07-12 14:26:47 发布

阅读量2.1k

点赞数 1

分类专栏： Python学习

本文链接：https://blog.csdn.net/tan_qin/article/details/94554373

版权

Python学习专栏收录该内容

9 篇文章 2 订阅

订阅专栏

在这里插入图片描述

#coding=utf-8
import docx2txt
import os

def get_all_path(open_file_path):
    rootdir = open_file_path
    path_list = []
    list = os.listdir(rootdir)  # 列出文件夹下所有的目录与文件
    for i in range(0, len(list)):
        com_path = os.path.join(rootdir, list[i])
        #print(com_path)
        if os.path.isfile(com_path):
            path_list.append(com_path)
        if os.path.isdir(com_path):
            path_list.extend(get_all_path(com_path))
    #print(path_list)
    return path_list


def docx_txt(path_list,txt_save_path):
    for path in path_list:
        file_type = os.path.splitext(path)[-1]
        if file_type == '.docx':
            txt_save_name = str(os.path.join(txt_save_path, os.path.basename(path)))[:-4] + 'txt'
            text = docx2txt.process(path)
            file = open(txt_save_name, 'w', encoding='utf-8')
            file.write(text)
            file.close()
            print(txt_save_name + "——文件保存成功")


#判断输入的存储文件路径是否存在，若不存在则创建
def judge_path(File_Path):
    if not os.path.exists(File_Path):
        os.makedirs(File_Path)
    return File_Path


def docx_to_txt(open_file_path, txt_save_path):
    path_list = get_all_path(open_file_path)
    txt_save_path_exists = judge_path(txt_save_path)
    docx_txt(path_list, txt_save_path_exists)


def txt_together(open_file_path, all_txt_path):
    path_list = get_all_path(open_file_path)
    all_txt_path_exists = judge_path(all_txt_path)
    all_f = open(os.path.join(all_txt_path_exists,'all_txt.txt'),'a')
    for path in path_list:
        single_f = open(path,'r',encoding='utf-8')
        single_txt = single_f.read()
        single_f.close()
        all_f.write(single_txt)
    all_f.close()