下载poppler-0.45,使用pdftohtml.exe 命令
def pdftoXml(filename):
pdfpath = 'F:/testFiles/pdfFiles/'+filename+'.pdf'resultpath = 'F:/testFiles/resuleFiles/'+filename+'.xml'
os.system('E:/poppler-0.45_x86/poppler-0.45/bin/pdftohtml.exe %s -i -xml %s' % (pdfpath,resultpath))