In [36]:
# 创建dataframe
d = {
'Name':['Alisa','Bobby','Cathrine','Alisa','Bobby','Cathrine',
'Alisa','Bobby','Cathrine','Alisa','Bobby','Cathrine'],
'Semester':['Semester 1','Semester 1','Semester 1','Semester 1','Semester 1','Semester 1',
'Semester 2','Semester 2','Semester 2','Semester 2','Semester 2','Semester 2'],
'Subject':['Mathematics','Mathematics','Mathematics','Science','Science','Science',
'Mathematics','Mathematics','Mathematics','Science','Science','Science'],
'Score':[62,47,55,74,31,77,85,63,42,67,89,81]}
df = pd.DataFrame(d)
df
Out[36]:
Name | Semester | Subject | Score | |
---|---|---|---|---|
0 | Alisa | Semester 1 | Mathematics | 62 |
1 | Bobby | Semester 1 | Mathematics | 47 |
2 | Cathrine | Semester 1 | Mathematics | 55 |
3 | Alisa | Semester 1 | Science | 74 |
4 | Bobby | Semester 1 | Science | 31 |
5 | Cathrine | Semester 1 | Science | 77 |
6 | Alisa | Semester 2 | Mathematics | 85 |
7 | Bobby | Semester 2 | Mathematics | 63 |
8 | Cathrine | Semester 2 | Mathematics | 42 |
9 | Alisa | Semester 2 | Science | 67 |
10 | Bobby | Semester 2 | Science | 89 |
11 | Cathrine | Semester 2 | Science | 81 |
层级分组
In [37]:
df.groupby(by=['Semester','Subject'])['Score'].mean().to_frame()
Out[37]:
Score | ||
---|---|---|
Semester | Subject | |
Semester 1 | Mathematics | 54.666667 |
Science | 60.666667 | |
Semester 2 | Mathematics | 63.333333 |
Science | 79.000000 |
pivot_table
In [38]:
df.pivot_table(values='Score',index='Semester',columns='Subject') # 数据透视表
Out[38]:
Subject | Mathematics | Science |
---|---|---|
Semester | ||
Semester 1 | 54.666667 | 60.666667 |
Semester 2 | 63.333333 | 79.000000 |
margin参数
In [39]:
df.pivot_table(values='Score',index='Semester',columns='Subject',margins=True) # 求和
Out[39]:
Subject | Mathematics | Science | All |
---|---|---|---|
Semester | |||
Semester 1 | 54.666667 | 60.666667 | 57.666667 |
Semester 2 | 63.333333 | 79.000000 | 71.166667 |
All | 59.000000 | 69.833333 | 64.416667 |
多个聚合操作
In [40]:
df.pivot_table(values='Score',index='Semester',columns='Subject',aggfunc=['mean','max','min'])
Out[40]:
mean | max | min | ||||
---|---|---|---|---|---|---|
Subject | Mathematics | Science | Mathematics | Science | Mathematics | Science |
Semester | ||||||
Semester 1 | 54.666667 | 60.666667 | 62 | 77 | 47 | 31 |
Semester 2 | 63.333333 | 79.000000 | 85 | 89 | 42 | 67 |
多个聚合操作
In [42]:
df.pivot_table(values='Score',index='Semester',columns='Subject',aggfunc=['mean','max','min'])
Out[42]:
mean | max | min | ||||
---|---|---|---|---|---|---|
Subject | Mathematics | Science | Mathematics | Science | Mathematics | Science |
Semester | ||||||
Semester 1 | 54.666667 | 60.666667 | 62 | 77 | 47 | 31 |
Semester 2 | 63.333333 | 79.000000 | 85 | 89 | 42 | 67 |
分析车辆数据集
In [44]:
# 加载数据
cars_df = pd.read_csv(r'C:\Users\ML Learning\Projects\第四章-数据分析预习内容\第四章-数据分析预习内容\第二节-数据分析工具pandas高阶\3_lesson_06\lesson_06\examples\datasets\cars.csv')
cars_df.head()
Out[44]:
YEAR | Make | Model | Size | (kW) | Unnamed: 5 | TYPE | CITY (kWh/100 km) | HWY (kWh/100 km) | COMB (kWh/100 km) | CITY (Le/100 km) | HWY (Le/100 km) | COMB (Le/100 km) | (g/km) | RATING | (km) | TIME (h) | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2012 | MITSUBISHI | i-MiEV | SUBCOMPACT | 49 | A1 | B | 16.9 | 21.4 | 18.7 | 1.9 | 2.4 | 2.1 | 0 | NaN | 100 | 7 |
1 | 2012 | NISSAN | LEAF | MID-SIZE | 80 | A1 | B | 19.3 | 23.0 | 21.1 | 2.2 | 2.6 | 2.4 | 0 | NaN | 117 | 7 |
2 | 2013 | FORD | FOCUS ELECTRIC | COMPACT | 107 | A1 | B | 19.0 | 21.1 | 20.0 | 2.1 | 2.4 | 2.2 | 0 | NaN | 122 | 4 |
3 | 2013 | MITSUBISHI | i-MiEV | SUBCOMPACT | 49 | A1 | B | 16.9 | 21.4 | 18.7 | 1.9 | 2.4 | 2.1 | 0 | NaN | 100 | 7 |
4 | 2013 | NISSAN | LEAF | MID-SIZE | 80 | A1 | B | 19.3 | 23.0 | 21.1 | 2.2 | 2.6 | 2.4 | 0 | NaN | 117 | 7 |
In [46]:
# 比较不同年份的不同厂商的车,在电池方面的不同
cars_df.pivot_table(values='(kW)',index='YEAR',columns='Make')
Out[46]:
Make | BMW | CHEVROLET | FORD | KIA | MITSUBISHI | NISSAN | SMART | TESLA |
---|---|---|---|---|---|---|---|---|
YEAR | ||||||||
2012 | NaN | NaN | NaN | NaN | 49.0 | 80.0 | NaN | NaN |
2013 | NaN | NaN | 107.0 | NaN | 49.0 | 80.0 | 35.0 | 280.000000 |
2014 | NaN | 104.0 | 107.0 | NaN | 49.0 | 80.0 | 35.0 | 268.333333 |
2015 | 125.0 | 104.0 | 107.0 | 81.0 | 49.0 | 80.0 | 35.0 | 320.666667 |
2016 | 125.0 | 104.0 | 107.0 | 81.0 | 49.0 | 80.0 | 35.0 | 409.700000 |
In [ ]: