import os
from docx import Document # python-docx
# 指定Word文档文件夹的路径
root_folder = r'E:\file'
# 确保指定的文件夹存在
if not os.path.exists(root_folder):
print("指定的文件夹不存在。")
else:
# 遍历Word文档文件夹中的所有Word文档
for province_filename in os.listdir(root_folder):
if province_filename.endswith("省") or province_filename.endswith("市") or province_filename.endswith("区"):
province_folder=os.path.join(root_folder, province_filename)
print(province_folder)
for city_filename in os.listdir(province_folder):
if city_filename.endswith("市"):
city_folder=os.path.join(province_folder, city_filename)
print(city_folder)
for docx_name in os.listdir(city_folder):
print(docx_name)
if docx_name.endswith('.docx'):
docx_path = os.path.join(city_folder, docx_name)
print(docx_path)
txt_filename = os.path.splitext(docx_name)[0] + '.txt'
txt_path = os.path.join(city_folder, txt_filename)
# 打开Word文档
doc = Document(docx_path)
# 创建一个TXT文件并将Word文档内容写入其中
with open(txt_path, 'w', encoding='utf-8') as txt_file:
for paragraph in doc.paragraphs:
txt_file.write(paragraph.text + '\n')
print("Word文档已成功转换为UTF-8编码的TXT文件。")
python 文件夹批量docx转txt
最新推荐文章于 2024-06-08 06:54:49 发布