import pdfplumber # 导入pdfplumber (如果pip下载模块不成功,指定版本号得以解决
file_dir = r"C:\Users\xxx\Desktop\xxx" # pdf文件的父目录
with pdfplumber.open(os.path.join(file_dir, "要提取数据的.pdf")) as pdf:
for i in range(len(pdf.pages)):
content = ''
page = pdf.pages[i] # 获取到pdf的页数
page_content = '\n'.join(page.extract_text().split('\n')) # 处理读取到的字符串
content = content+page_content
print(re.findall(r'\n日期:(.+)', content )[0]) # 使用正则匹配出需要的信息
01-08
3580
![](https://csdnimg.cn/release/blogv2/dist/pc/img/readCountWhite.png)
11-29
1692
![](https://csdnimg.cn/release/blogv2/dist/pc/img/readCountWhite.png)
12-29
3455
![](https://csdnimg.cn/release/blogv2/dist/pc/img/readCountWhite.png)
10-10
2万+
![](https://csdnimg.cn/release/blogv2/dist/pc/img/readCountWhite.png)
07-20
5027
![](https://csdnimg.cn/release/blogv2/dist/pc/img/readCountWhite.png)