数据分为两半,需要提取出来后将右边的数据续接到左边
#-*-coding:GBK -*-
import pdfplumber
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
pages=range(13,17)#提取的pdf页数范围
result_all=None
#打开pdf文件
with pdfplumber.open(r'C:\Users\chenb\Desktop\XXX.pdf') as pdf:
for p in pages:
page = pdf.pages[p]
for table in page.extract_tables():
tb=pd.DataFrame(table[1:],columns=table[0],index=None)
#处理格式
tb=tb.drop(index=0)
tb1=tb.iloc[:,0:4] #提取0:4列
tb2=tb.iloc[:,4:8] #提取4:8列
result_page = pd.concat([tb1, tb2]) #拼接在一起
if result_all is None:
result_all = result_page
else:
result_all = pd.concat([result_all,result_page])
#保存为excel
result_all.to_excel(r'C:\Users\chenb\Desktop\XXX.xlsx',index=False)
result_read = pd.read_excel(r'C:\Users\chenb\Desktop\XXX.xlsx',sheet_name='Sheet1',header=0)
print(result_read.dtypes)
#绘制图形
plt.figure(figsize=(10, 12))
ax=plt.subplot()
ax.xaxis.set_ticks_position('top') #将x轴的位置设置在顶部
ax.invert_yaxis()#翻转y轴
plt.plot(result_read.iloc[:,1],result_read.iloc[:,0])
plt.plot(result_read.iloc[:,2],result_read.iloc[:,0])
plt.plot(result_read.iloc[:,3],result_read.iloc[:,0])
plt.show()