import os
import docx
def word_to_txt(word_file, txt_file):
doc = docx.Document(word_file)
with open(txt_file, 'w', encoding='utf-8') as f:
for para in doc.paragraphs:
if para.text != '':
f.write(para.text.replace(' ', '') + '\n')
def convert_folder(folder_path, output_path):
if not os.path.exists(output_path):
os.mkdir(output_path)
files = os.listdir(folder_path)
for file_name in files:
file_path = os.path.join(folder_path, file_name)
if os.path.isfile(file_path) and file_name.endswith('.docx'):
txt_file = os.path.join(output_path, file_name.replace('.docx', '.txt'))
word_to_txt(file_path, txt_file)
with open(txt_file, 'r+', encoding='utf-8') as f:
lines = f.readlines()
f.seek(0)
for line in lines:
f.write(line.strip().replace(' ', '') + '\n')
f.truncate()
if __name__ == '__main__':
folder_path = r'C:\Users\86155\Desktop\支持\大财务' # word文件所在的文件夹路径
output_path = r'C:\Users\86155\Desktop\支持\大财务2' # 转换后的txt文件输出路径
convert_folder(folder_path, output_path)