这是现成代码,可以直接copy
from io import StringIO
from io import open
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfinterp import PDFResourceManager, process_pdf
def read_pdf(pdf):
# resource manager
rsrcmgr = PDFResourceManager()
retstr = StringIO()
laparams = LAParams()
# device
device = TextConverter(rsrcmgr, retstr, laparams=laparams)
process_pdf(rsrcmgr, device, pdf)
device.close()
content = retstr.getvalue()
retstr.close()
# 获取所有行
lines = str(content).split("\n")
return lines
if __name__ == '__main__':
with open('需要打开的pdf路径.pdf', "rb") as my_pdf:
contentlist = read_pdf(my_pdf)
with open('想要存储的txt路径.txt','w') as text:
content = ''.join(contentlist)
text.write(content)