pandas分组
1.分组统计
import numpy as np
import pandas as pd
df = pd.DataFrame(data={'sex': np.random.randint(0, 2, size=300),
'class': np.random.randint(1, 9, size=300),
'Python': np.random.randint(0, 151, size=300),
'Keras': np.random.randint(0, 151, size=300),
'Tensorflow': np.random.randint(0, 151, size=300),
'Java': np.random.randint(0, 151, size=300),
'C++': np.random.randint(0, 151, size=300)})
df['sex'] = df['sex'].map({0: '男', 1: '⼥'})
g = df.groupby(by='sex')[['Python', 'Java']]
for name, data in g:
print('组名:', name)
print('数据:', data)
df.groupby(by=['class', 'sex'])[['Python']]
df['Python'].groupby(df['class'])
df['Keras'].groupby([df['class'], df['sex']])
df.groupby(df.dtypes, axis=1)
m = {'sex': 'category', 'class': 'category', 'Python': 'IT', 'Keras': 'IT', 'Tensorflow': 'IT', 'Java': 'IT', 'C++': 'IT'}
for name, data in df.groupby(m, axis=1):
print('组名', name)
print('数据', data)
2.分组聚合
2.1 分组直接调⽤函数进⾏聚合
df.groupby(by = 'sex').mean().round(1)
df.groupby(by = ['class','sex'])[['Python','Keras']].max()
df.groupby(by = ['class','sex']).size()
df.groupby(by = ['class','sex']).describe()
2.2 分组后调⽤apply,transform封装单⼀函数计算
df.groupby(by = ['class','sex'])[['Python','Keras']].apply(np.mean).round(1)
def normalization(x):
return (x - x.min())/(x.max() - x.min())
df.groupby(by = ['class','sex'])[['Python','Tensorflow']].transform(normalization).round(3)
2.3 agg 多种统计汇总操作
df.groupby(by = ['class','sex'])[['Tensorflow','Keras']].agg([np.max,np.min,pd.Series.count])
df.groupby(by = ['class','sex'])[['Python','Keras']].agg({'Python':[('最⼤值',np.max),('最⼩值',np.min)],
'Keras':[('计 数',pd.Series.count),('中位数',np.median)]})