1. 初始化
import pandas as pd
data = pd.DataFrame({"name":["lx","cl","zgq"],"age":[18,20,21],"id":[2122,2133,2144]})
data
2. 基本属性
data.columns
data.index
data.dtypes
data.shape
data[data.columns[1:]]
data.iloc[1:3,:]
data.index = ["bana","mango","apple","peach"]
data.loc["mango":"peach",:]
data = data.replace([np.inf, np.nan], 0) #把所有空值替换成0
data['age'] = data['age'].astype(int) #该列数据类型强转
data[(data['age']>= 20) & (data['id'] >2140)] #数据筛选,记住有括号
3. groupby 、apply、merge用法
data
data.groupby(["name"]).size().reset_index(name="size") #根据name列分组,计算出大小,可以替换成size()、max()、min()
data.groupby(["name"]).apply(lambda x:list(x.id)).reset_index(name = "id_list") #计算同一name分组的所有id
data1.merge(data2,on=["name"],how = "left") #对data1、data2按照name列进行左连接
4、新增行、列数据
data.insert(1,"x",[x for x in range(6)])
data["y"] = [x*x for x in range(6)]
data
standart_row = pd.DataFrame([x for x in range(5)]).T
standart_row.columns = data.columns
data.append(standart_row,ignore_index=True)
5、读、写xls
- 读xls文件,第一列作为索引,第一列作为列名
- 写xls文件
- 同时写入多个sheet的xls文件
pd.read_excel(filename,index_col=0,header = 0,encoding="utf8")
pd.to_excel(filename)
xlsWriter = pd.ExcelWriter(filename)
for i in range(5):
data.to_excel(excel_writer = xlsWriter,sheet_name = i)
xlsWriter.close()
data.to_pickle(filename)
data.read_pickle(filename)