import pandas as pd
data = [
{"id":1, "name": "jack", "age":18, "score": 90,"aa":10},
{"id":6, "name": "jack", "age":18, "score": 85,"aa":10},
{"id":2, "name": "ammy", "age":15, "score": 80,"aa":20},
{"id":3, "name": "jack", "age":17, "score": 50,"aa":20},
{"id":4, "name": "ammy", "age":21, "score": 67,"aa":10},
{"id":5, "name": "ammy", "age":15, "score": 95,"aa":30}
]
data_pd = pd.DataFrame(data)
# 设置索引 set_index('column', False) False 不删除原数据字段
# set_index(keys=['id','name']) 多个索引
data_pd = data_pd.set_index("id", False)
# 重置索引 reset_index()
data_pd = data_pd.reset_index()
data_pd = data_pd.set_index(keys=["id","name"])
# 查询
query_pd = data_pd.query("id > 3")
# 分组求平均
new_pd = data_pd.groupby(['name','age'])['score'].mean()
aa = new_pd.unstack()
print(aa.loc['ammy'].at[15])
new_pd = data_pd.groupby(['name','age','aa'])['score'].mean()
aa = new_pd.unstack()
#多个分组条件时loc用元组
print(aa.loc[('ammy',21)].at[10])
# 排序 分组取最后
data_pd = data_pd.sort_values(by='score')
new_pd = data_pd.groupby(['name','age']).last()
# 两种取值方式
print(new_pd.loc[('ammy', 15)].at['id'])
new_pd = new_pd.query("name == 'ammy' and age == 15")
print(new_pd.iloc[0].at['id'])
# 判断dataframe中字段是否为NAN
pd.isnull(win_score)
# 判断dataframe 是否为空
if data_pd.empty:
print 'dataframe 空'
# 索引取值
data_pd = pd.DataFrame(data)
aa = data_pd.set_index('id')
print(aa.loc[1]['name'])
# pandas 转 dict
col1 col2
0 1 3
1 2 4
>>> [df.to_dict(orient='index')]
[{0: {'col1': 1, 'col2': 3}, 1: {'col1': 2, 'col2': 4}}]
>>> df.to_dict(orient='records')
[{'col1': 1, 'col2': 3}, {'col1': 2, 'col2': 4}]
data_pd.query("playerid == %s and heroid == %s" % (player['playerid'],player['heroid']))
# 分组排序取组内前5
player_hero_group = player_hero_pd.sort_values('total_score', ascending=False).groupby('heroid').head(5)
groupby('name')['age'].count() # 所有值
groupby('name')['age'].nunique() # 唯一值
python pandas笔记
最新推荐文章于 2024-05-19 23:31:32 发布