python下载网页中的pdf文件_Python读取网页上的pdf文件，输出字符串

最新推荐文章于 2024-02-27 19:14:46 发布

weixin_39861955

最新推荐文章于 2024-02-27 19:14:46 发布

阅读量249

点赞数

文章标签： python下载网页中的pdf文件

读取一个本地pdf文件，输出字符串

# -*- coding: UTF-8 -*from urllib import urlopenfrom pdfminer.pdfinterp import PDFResourceManager, process_pdffrom pdfminer.converter import TextConverterfrom pdfminer.layout import LAParamsfrom io import StringIOfrom io import opendef readPDF(pdfFile): rsrcmgr = PDFResourceManager() retstr = StringIO() laparams = LAParams() device = TextConverter(rsrcmgr, retstr, laparams=laparams) process_pdf(rsrcmgr, device, pdfFile) device.close() content = retstr.getvalue() retstr.close() return contentpdfFile = open(u"/home/mypdf.pdf",”rd”) #本地

print(pdfFile)outputString = readPDF(pdfFile)print(outputString)pdfFile.close()