基本操作
import pandas as pd
dict_list = [{"name":"Jack","age":22,"score":100}, {"name":"xuan","age":21,"score":99}, {"name":"Rose","age":18,"score":60}]
df1 = pd.DataFrame(dict_list)
print(df1)
# name age score
# 0 Jack 22 100
# 1 xuan 21 99
# 2 Rose 18 60
dict = {"name":"Jack","age":22,"score":100}
df2 = pd.DataFrame(dict, index=[3,1,6])
print(df2)
# name age score
# 3 Jack 22 100
# 1 Jack 22 100
# 6 Jack 22 100
df2 = pd.DataFrame.from_dict(dict, orient="index",columns=['test'])
print(df2)
# test
# name Jack
# age 22
# score 100
list = ['xuan', 'Jack', 'Rose', 'Luxi']
df3 = pd.DataFrame(list,columns=['name'],index=[1,6,7,8])
print(df3)
# name
# 1 xuan
# 6 Jack
# 7 Rose
# 8 Luxi
list_list = [['name','age'],['xuanRui1',22],['xuanRui2',33],['xuanRui3',44]]
df4 = pd.DataFrame(list_list,columns=['姓名', '年龄'])
print(df4)
# 添加columns列名参数
# 姓名 年龄
# 0 name age
# 1 xuanRui1 22
# 2 xuanRui2 33
# 3 xuanRui3 44
df4["性别"] = ["无",'m','w','m']
print(df4)
# 姓名 年龄 性别
# 0 name age 无
# 1 xuanRui1 22 m
# 2 xuanRui2 33 w
# 3 xuanRui3 44 m
print(df4.iloc[:,[0,2]]) # iloc: 根据索引切片,不支持字段名
# 姓名 性别
# 0 name 无
# 1 xuanRui1 m
# 2 xuanRui2 w
# 3 xuanRui3 m
print(df4.iloc[[0,2],:])
# 姓名 年龄 性别
# 0 name age 无
# 2 xuanRui2 33 w
print(df4.loc[:,["姓名"]]) # iloc: 根据索引切片,不支持字段名
# 姓名
# 0 name
# 1 xuanRui1
# 2 xuanRui2
# 3 xuanRui3
print(df4.loc[[2,3],:])
# 姓名 年龄 性别
# 2 xuanRui2 33 w
# 3 xuanRui3 44 m
df4.index = ["一",'er',"san","si"]
print(df4)
# 姓名 年龄 性别
# 一 name age 无
# er xuanRui1 22 m
# san xuanRui2 33 w
# si xuanRui3 44 m
print(df4.loc[["er","si"],["姓名"]])
# 姓名
# er xuanRui1
# si xuanRui3
PDF格式转为Excel格式
import pandas as pd
import camelot.io as camelot
def pdfToExcel(pdf_path):
excel_path = pdf_path.split(".pdf")[0] + ".xlsx"
with pd.ExcelWriter(excel_path) as writer:
try:
for page_num in range(1,20):
print(page_num)
try:
tables = camelot.read_pdf(pdf_path,flavor='stream',pages=str(page_num))
for table_num in range(5):
try:
if tables[table_num].shape != (1,1):
table_df = tables[table_num].df
# 生成excel文件
# table_df.to_excel(writer,sheet_name=f"数据表{page_num}")
except IndexError:
break
except IndexError:
break
except:
print("PDF TO EXCEL ERROR ...")
return None
return table_df