pandas基本操作及pdf转excel的方法

最新推荐文章于 2022-11-08 17:42:41 发布

也无渢雨也无晴

最新推荐文章于 2022-11-08 17:42:41 发布

阅读量432

点赞数

分类专栏： Python 文章标签： python pandas

本文链接：https://blog.csdn.net/weixin_45477572/article/details/121079805

版权

Python 专栏收录该内容

15 篇文章 0 订阅

订阅专栏

基本操作

import pandas as pd

dict_list = [{"name":"Jack","age":22,"score":100}, {"name":"xuan","age":21,"score":99}, {"name":"Rose","age":18,"score":60}]
df1 = pd.DataFrame(dict_list)
print(df1)
#    name  age  score
# 0  Jack   22    100
# 1  xuan   21     99
# 2  Rose   18     60
dict = {"name":"Jack","age":22,"score":100}
df2 = pd.DataFrame(dict, index=[3,1,6])
print(df2)
#    name  age  score
# 3  Jack   22    100
# 1  Jack   22    100
# 6  Jack   22    100
df2 = pd.DataFrame.from_dict(dict, orient="index",columns=['test'])
print(df2)
#        test
# name   Jack
# age      22
# score   100

list = ['xuan', 'Jack', 'Rose', 'Luxi']
df3 = pd.DataFrame(list,columns=['name'],index=[1,6,7,8])
print(df3)
#    name
# 1  xuan
# 6  Jack
# 7  Rose
# 8  Luxi

list_list = [['name','age'],['xuanRui1',22],['xuanRui2',33],['xuanRui3',44]]
df4 = pd.DataFrame(list_list,columns=['姓名', '年龄'])
print(df4)
# 添加columns列名参数
#          姓名   年龄
# 0      name  age
# 1  xuanRui1   22
# 2  xuanRui2   33
# 3  xuanRui3   44

df4["性别"] = ["无",'m','w','m']
print(df4)
#          姓名   年龄 性别
# 0      name  age  无
# 1  xuanRui1   22  m
# 2  xuanRui2   33  w
# 3  xuanRui3   44  m
print(df4.iloc[:,[0,2]])  # iloc: 根据索引切片，不支持字段名
#          姓名 性别
# 0      name  无
# 1  xuanRui1  m
# 2  xuanRui2  w
# 3  xuanRui3  m
print(df4.iloc[[0,2],:])
#          姓名   年龄 性别
# 0      name  age  无
# 2  xuanRui2   33  w

print(df4.loc[:,["姓名"]]) # iloc: 根据索引切片，不支持字段名
# 姓名
# 0      name
# 1  xuanRui1
# 2  xuanRui2
# 3  xuanRui3
print(df4.loc[[2,3],:])
#          姓名  年龄 性别
# 2  xuanRui2  33  w
# 3  xuanRui3  44  m
df4.index = ["一",'er',"san","si"]
print(df4)
#            姓名   年龄 性别
# 一        name  age  无
# er   xuanRui1   22  m
# san  xuanRui2   33  w
# si   xuanRui3   44  m

print(df4.loc[["er","si"],["姓名"]])
#          姓名
# er  xuanRui1
# si  xuanRui3

PDF格式转为Excel格式

import pandas as pd
import camelot.io as camelot

def pdfToExcel(pdf_path):
    excel_path = pdf_path.split(".pdf")[0] + ".xlsx"
    with pd.ExcelWriter(excel_path) as writer:
        try:
            for page_num in range(1,20):
                print(page_num)
                try:
                    tables = camelot.read_pdf(pdf_path,flavor='stream',pages=str(page_num))
                    for table_num in range(5):
                        try:
                            if tables[table_num].shape != (1,1):
                                table_df = tables[table_num].df
                                # 生成excel文件
                                # table_df.to_excel(writer,sheet_name=f"数据表{page_num}")
                        except IndexError:
                            break
                except IndexError:
                    break
        except:
            print("PDF TO EXCEL ERROR ...")
            return None
    return table_df

也无渢雨也无晴

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
pandas基本操作及pdf转excel的方法

基本操作import pandas as pddict_list = [{"name":"Jack","age":22,"score":100}, {"name":"xuan","age":21,"score":99}, {"name":"Rose","age":18,"score":60}]df1 = pd.DataFrame(dict_list)print(df1)# name age score# 0 Jack 22 100# 1 xuan 21 9
复制链接

扫一扫

专栏目录