最近在做一个从doc文件中提取图片的实验,可是报了一个错误TypeError: write() argument must be str, not bytes
python 3我的代码为
import os
import docx2txt
class fileUtil():
'''
删除其它格式的文件,保留图片格式的文件
'''
def deleteFiles(self,dirs):
for file in dirs:
# print dr
# print(file)
fileArray = file.split(".")
if fileArray[-1] == "pdf": # 删除pdf文档
# print(file)
os.remove(os.path.join(path, file))
print(file)
elif fileArray[-1] == "docx": # 删除pdf文档
# print(file)
os.remove(os.path.join(path, file))
print(file)
elif fileArray[-1] == "doc": # 删除pdf文档
# print(file)
os.remove(os.path.join(path, file))
print(file)
def extractImageFromDoc(self,docPath,docOutputPath):
# text = docx2txt.process(docPath)
text = docx2txt.process(docPath, docOutputPath)
print(text)
if __name__ == '__main__':
path='C:/Users/eric/Desktop\简谱图片'
dirs=os.listdir(path)
fileOperate=fileUtil();
# fileOperate.deleteFiles(dirs)
docPath="file/demo.docx"
docOutputPath="result"
fileOperate.extractImageFromDoc(docPath,docOutputPath)
需要修改docx2txt 源码中的 doc2txt.py中103行的
with open(dst_fname, "w") as dst_f:
dst_f.write(zipf.read(fname))
改为
with open(dst_fname, "wb") as dst_f:
dst_f.write(zipf.read(fname))
就可以通过运行