导入依赖,读取数据
import pandas as pd
df = pd.read_excel(r"D:\数据1\望仔.xlsx",header=0,index_col=0)
df
数据预处理
df = pd.read_excel(r'C:/Users/hp/Desktop/数据.xlsx',header=1,names=None)
df
import pandas as pd
df_55 = df.groupby(["商品代号", "所属季度"], as_index=False).sum()
da = pd.pivot(df_55, index="商品代号", columns="所属季度")
da
import pandas as pd
df = pd.read_excel(r'C:/Users/hp/Desktop/泰迪/380平台-数据源.xlsx')
df.columns
保存处理的表
df3.to_excel(r'D:\表.xlsx')
df.isnull()
df_new = df.dropna(axis=1,how='all', thresh=None, subset=None,inplace=False)
df_new
df_3 = df.groupby(by='销售点类型').agg({'销售额(万元)':sum}).reset_index()
df_3
读取csv、txt
df = pd.read_csv(r'F:\王者荣耀比赛数据.txt', names=["比赛编号","玩家编号",
"英雄名字-类别",
"玩家性别",
"玩家年龄",
"玩家地区",
"对局结果",
"对局开始时间",
"对局结束时间",
"对局时长",
"玩家评分",
"击败数",
"阵亡数",
"助攻数",
"金牌/银牌",
"MVP",
"禁用1",
"禁用2",
"禁用3",
"禁用4",
"禁用5",
"禁用6",
"平台"
], sep="\t",index_col=None,header=None)
df
排序
df_top.sort_values("count",inplace=True,ascending=False)
df_all= df.groupby('英雄名字-类别')['击败数'].agg(['mean']).reset_index()
df_all
dfff = df_all[df_all['英雄名字-类别'].apply(lambda x:True if x[-2:] == '刺客' else False)==True]
dfff.shape[0]
dfff