import os
import textract
def doc_to_txt(doc_file, txt_file):
text = textract.process(doc_file, encoding='utf-8')
with open(txt_file, 'w', encoding='utf-8') as f:
f.write(text.replace('\n\n', '\n').replace(' ', ''))
def convert_folder(folder_path, output_path):
if not os.path.exists(output_path):
os.mkdir(output_path)
files = os.listdir(folder_path)
for file_name in files:
file_path = os.path.join(folder_path, file_name)
if os.path.isfile(file_path) and file_name.endswith('.doc'):
txt_file = os.path.join(output_path, file_name.replace('.doc', '.txt'))
doc_to_txt(file_path, txt_file)
if __name__ == '__main__':
folder_path = r'C:\Users\86155\Desktop\支持\大财务'
output_path = r'C:\Users\86155\Desktop\支持\大财务2'
convert_folder(folder_path, output_path)