pdfplumber 处理PDF文件
1.提取PDF指定页码文本
import pdfplumber
with pdfplumber.open("C:\\Users\\Lucas\\Desktop\\py\\gsk\\grades.pdf") as pdf:
page01 = pdf.pages[0]
text = page01.extract_text()
print(text)
2.提取PDF中表格
import pdfplumber
import pandas as pd
pdf=pdfplumber.open("C:\\Users\\Lucas\\Desktop\\py\\gsk\\grades.pdf")
pages=pdf.pages
page=pages[0]
tables=page.extract_tables()
table=tables[0]
data09=pd.DataFrame(table[:])
3.使用openpyxl来输出excel
import pdfplumber
from openpyxl import Workbook
with pdfplumber.open("C:\\Users\\Lucas\\Desktop\\py\\gsk\\grades.pdf") as pdf:
page01 = pdf.pages[0]
str1=p0.extract_text()[:163].split(' ')
table = page01.extract_table()
workbook = Workbook()
sheet = workbook.active
sheet.append(str1)
for row in table:
sheet.append(row)
workbook.save(filename="C:\\Users\\Lucas\\Desktop\\py\\gsk\\grades3.xlsx")