from pathlib import Path
from docx import Document
import logging
from concurrent.futures import ThreadPoolExecutor
设置日志
logging.basicConfig(level=logging.INFO, format=‘%(asctime)s - %(levelname)s - %(message)s’)
def read_docx(file_path):
doc = Document(file_path)
return ‘\n’.join(para.text for para in doc.paragraphs if para.text.strip())
def process_file(file_path):
try:
content = read_docx(file_path)
if content:
logging.info(f"Content from {file_path.name}:“)
logging.info(content)
except Exception as e:
logging.error(f"An error occurred while reading {file_path.name}: {e}”)
def read_docx_files_from_folder(folder_path):
folder_path = Path(folder_path)
docx_files = list(folder_path.glob(‘*.docx’))
# 使用线程池来并行处理文件
with ThreadPoolExecutor() as executor:
executor.map(process_file, docx_files)
指定要读取的文件夹路径
folder_path = ‘path_to_your_folder’ # 替换为你的文件夹路径
read_docx_files_from_folder(folder_path)
from pathlib import Path
from docx import Document
import logging
from concurrent.futures import ThreadPoolExecutor
# 设置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def read_docx(file_path):
doc = Document(file_path)
return '\n'.join(para.text for para in doc.paragraphs if para.text.strip())
def process_file(file_path):
try:
content = read_docx(file_path)
if content:
logging.info(f"Content from {file_path.name}:")
logging.info(content)
except Exception as e:
logging.error(f"An error occurred while reading {file_path.name}: {e}")
def read_docx_files_from_folder(folder_path):
folder_path = Path(folder_path)
docx_files = list(folder_path.glob('*.docx'))
# 使用线程池来并行处理文件
with ThreadPoolExecutor() as executor:
executor.map(process_file, docx_files)
# 指定要读取的文件夹路径
folder_path = 'path_to_your_folder' # 替换为你的文件夹路径
read_docx_files_from_folder(folder_path)