python win11 编程 实现:读取指定文件夹下所有word文档,然后依次把文档里面的文本返回【zhilu.space】

from pathlib import Path
from docx import Document
import logging
from concurrent.futures import ThreadPoolExecutor

设置日志

logging.basicConfig(level=logging.INFO, format=‘%(asctime)s - %(levelname)s - %(message)s’)

def read_docx(file_path):
doc = Document(file_path)
return ‘\n’.join(para.text for para in doc.paragraphs if para.text.strip())

def process_file(file_path):
try:
content = read_docx(file_path)
if content:
logging.info(f"Content from {file_path.name}:“)
logging.info(content)
except Exception as e:
logging.error(f"An error occurred while reading {file_path.name}: {e}”)

def read_docx_files_from_folder(folder_path):
folder_path = Path(folder_path)
docx_files = list(folder_path.glob(‘*.docx’))

# 使用线程池来并行处理文件
with ThreadPoolExecutor() as executor:
    executor.map(process_file, docx_files)

指定要读取的文件夹路径

folder_path = ‘path_to_your_folder’ # 替换为你的文件夹路径
read_docx_files_from_folder(folder_path)

from pathlib import Path
from docx import Document
import logging
from concurrent.futures import ThreadPoolExecutor

# 设置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def read_docx(file_path):
    doc = Document(file_path)
    return '\n'.join(para.text for para in doc.paragraphs if para.text.strip())

def process_file(file_path):
    try:
        content = read_docx(file_path)
        if content:
            logging.info(f"Content from {file_path.name}:")
            logging.info(content)
    except Exception as e:
        logging.error(f"An error occurred while reading {file_path.name}: {e}")

def read_docx_files_from_folder(folder_path):
    folder_path = Path(folder_path)
    docx_files = list(folder_path.glob('*.docx'))

    # 使用线程池来并行处理文件
    with ThreadPoolExecutor() as executor:
        executor.map(process_file, docx_files)

# 指定要读取的文件夹路径
folder_path = 'path_to_your_folder'  # 替换为你的文件夹路径
read_docx_files_from_folder(folder_path)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值