方法:利用win32来实现,
步骤:
1.安装win32:在cmd中使用python -m pip install pypiwin32进行安装
2.复制粘贴代码,并修改文件路径,运行
3.可以在这个.py文件的文件夹里找到temp.txt,得到转换结果
代码如下:
#coding:utf-8
import win32com
import win32con
import win32gui
import codecs
from win32com.client import Dispatch
import pythoncom
class MSOffice2txt():
def __init__(self, fileType=['doc','ppt']):
self.docCom = None
self.pptCom = None
pythoncom.CoInitialize()
if type(fileType) is not list:
return 'Error, please check the fileType, it must be list[]'
for ft in fileType:
if ft == 'doc':
self.docCom = self.docApplicationOpen()
elif ft == 'ppt':
self.pptCom = self.pptApplicationOpen()
def close(self):
self.docApplicationClose(self.docCom)
self.pptApplicationClose(self.pptCom)
def docApplicationOpen(self):
docCom = win32com.client.Dispatch('Word.Application')
docCom.Visible = 1
docCom.DisplayAlerts = 0
docHwnd = win32gui.FindWindow(None, 'Microsoft Word')
win32gui.ShowWindow(docHwnd, win32con.SW_HIDE)
return docCom
def docApplicationClose(self,docCom):
if docCom is not None:
docCom.Quit()
def doc2Txt(self, docCom, docFile, txtFile):
doc = docCom.Documents.Open(FileName=docFile,ReadOnly=1)
doc.SaveAs(txtFile, 2)
doc.Close()
def pptApplicationOpen(self):
pptCom = win32com.client.Dispatch('PowerPoint.Application')
pptCom.Visible = 1
pptCom.DisplayAlerts = 0
pptHwnd = win32gui.FindWindow(None, 'Microsoft PowerPoint')
win32gui.ShowWindow(pptHwnd, win32con.SW_HIDE)
return pptCom
def pptApplicationClose(self, pptCom):
if pptCom is not None:
pptCom.Quit()
def ppt2txt(self, pptCom, pptFile, txtFile):
ppt = pptCom.Presentations.Open(pptFile,ReadOnly=1, Untitled=0, WithWindow=0)
f = codecs.open