直接上代码:
利用docx读取.docx格式
利用win32com模块对.doc文档转为docx
脚本调用: python your.py your.doc/your.docx 被替换字符A/被替换字符B/被替换字符C 替换字符a/替换字符b/替换字符c
from win32com import client as wc
import os
import docx
import sys
def help():
print("Format: python py_file path+filename.doc/.docx A/B/C a/b/c\n"
"eg: python preplace_string.py E:/....../a.docx 你好/好的/谢谢 你好啊/嗯嗯/不客气\n"
"eg: python preplace_string.py E:/....../a.doc 你好/好的/谢谢 你好啊/嗯嗯/不客气\n"
"EXE:\n"
"eg: preplace_string.exe E:/....../a.doc 你好/好的/谢谢 你好啊/嗯嗯/不客气\n"
"eg: preplace_string.exe E:/....../a.docx 你好/好的/谢谢 你好啊/嗯嗯/不客气\n"
"替换字符串与被替换字符串一一对应,不同字符串用/分割\n")
if __name__=='__main__':
#print(sys.argv)
num=len(sys.argv)
if(num!=4):
help()
sys.exit(1)
path=sys.argv[1]#file
if not os.path.exists(path):
print(path,'does not exist!!!!!\n')
path=os.path.abspath(path)
basename,ext=os.path.splitext(path)
if ext=='.doc':
try:
#webbrowser.open(path)
word = wc.Dispatch("Word.Application")
doc = word.Documents.Open(path)
doc.SaveAs(basename+'.docx', 12)# 12为docx
doc.Close()
word.Quit()
except:
print("can not read "+path+"!\nPlease check out that whether the format of the file is doc/docx Or Does this file exist!!!")
sys.exit(1)
elif ext!='.docx':
print("can not read" + path + "!\nPlease check out that whether the format of the file is doc/docx!!!")
sys.exit(1)
old_string=sys.argv[2]#old_string="你好|好的|谢谢"
new_string=sys.argv[3]#new_string="你好啊|嗯嗯|不客气"
olds=old_string.split('/')
news=new_string.split('/')
if(len(olds)!=len(news)):
print('The number of replacement strings does not match the number of replaced strings,Please Check!')
print('Old string:',olds)
print('New string:',news)
sys.exit(1)
path=basename + '.docx'
file =docx.Document(path)
for i in range(len(olds)):
for paragraph in file.paragraphs:
paragraph.text = paragraph.text.replace(olds[i],news[i])
#file.save('test'+'_temp.docx')
file.save(basename + '_temp.docx')
print('succes!\n'
'new file is shown in ' + basename+'_temp.docx')