def getpdfContent(url):
import urllib3
import io
import PyPDF2
urllib3.disable_warnings()
contents = ''
# url = 'https://www.apc-paris.com/system/files/file_fields/2022/04/14/apc-trophees-coachcopro-vf.pdf'
with urllib3.PoolManager() as http:
r = http.request('GET', url)
with io.BytesIO(r.data) as f:
reader = PyPDF2.PdfFileReader(f)
nums = reader.getNumPages()
print(type(nums), nums)
for i in range(int(nums)):
pageContent = ' '.join(reader.getPage(0).extractText().split('\n'))
contents += pageContent
# print(contents)
return contents
python 从Url获取pdf内容:
最新推荐文章于 2024-05-13 09:48:07 发布