5.1长宽表的变形
import pandas as pd
pd.DataFrame({'Gender':['F','F','M','M']
,'Height':[163,160,175,180]})
| Gender | Height |
---|
0 | F | 163 |
---|
1 | F | 160 |
---|
2 | M | 175 |
---|
3 | M | 180 |
---|
pd.DataFrame({'Height:F':[163,160],
'Height:M':[175,180]})
| Height:F | Height:M |
---|
0 | 163 | 175 |
---|
1 | 160 | 180 |
---|
5.1.1 pivot
df = pd.DataFrame({'Class':[1,1,2,2],
'Name':['zhangsan','zhangsan','lisi','lisi']
,'Subject':['Chinese','Math','Chinese','Math']
,'Grade':[80,75,90,85]})
df
| Class | Name | Subject | Grade |
---|
0 | 1 | zhangsan | Chinese | 80 |
---|
1 | 1 | zhangsan | Math | 75 |
---|
2 | 2 | lisi | Chinese | 90 |
---|
3 | 2 | lisi | Math | 85 |
---|
df.pivot(index = 'Name',columns = 'Subject',values = 'Grade')
Subject | Chinese | Math |
---|
Name | | |
---|
lisi | 90 | 85 |
---|
zhangsan | 80 | 75 |
---|
5.1.2 pivot_table
df = pd.DataFrame({'Name':['San Zhang', 'San Zhang',
'San Zhang', 'San Zhang',
'Si Li', 'Si Li', 'Si Li', 'Si Li'],
'Subject':['Chinese', 'Chinese', 'Math', 'Math',
'Chinese', 'Chinese', 'Math', 'Math'],
'Grade':[80, 90, 100, 90, 70, 80, 85, 95]})
df
| Name | Subject | Grade |
---|
0 | San Zhang | Chinese | 80 |
---|
1 | San Zhang | Chinese | 90 |
---|
2 | San Zhang | Math | 100 |
---|
3 | San Zhang | Math | 90 |
---|
4 | Si Li | Chinese | 70 |
---|
5 | Si Li | Chinese | 80 |
---|
6 | Si Li | Math | 85 |
---|
7 | Si Li | Math | 95 |
---|
df.pivot_table(index = 'Name',
columns = 'Subject',
values = 'Grade',
aggfunc = 'mean')
Subject | Chinese | Math |
---|
Name | | |
---|
San Zhang | 85 | 95 |
---|
Si Li | 75 | 90 |
---|
df.pivot_table(index = 'Name',
columns = 'Subject',
values = 'Grade',
aggfunc = lambda x:x.mean())
Subject | Chinese | Math |
---|
Name | | |
---|
San Zhang | 85 | 95 |
---|
Si Li | 75 | 90 |
---|
df.pivot_table(index = 'Name',
columns = 'Subject',
values = 'Grade',
aggfunc='mean',
margins=True)
Subject | Chinese | Math | All |
---|
Name | | | |
---|
San Zhang | 85 | 95.0 | 90.00 |
---|
Si Li | 75 | 90.0 | 82.50 |
---|
All | 80 | 92.5 | 86.25 |
---|
5.1.3 melt
df = pd.DataFrame({'Class':[1,2],
'Name':['zhangsan','lisi'],
'Chinese':[80,90]
,'Math':[80,75]})
df
| Class | Name | Chinese | Math |
---|
0 | 1 | zhangsan | 80 | 80 |
---|
1 | 2 | lisi | 90 | 75 |
---|
df_melted = df.melt(id_vars = ['Class','Name'],
value_vars = ['Chinese','Math'],
var_name = 'Subject',
value_name = 'Grade')
df_melted
| Class | Name | Subject | Grade |
---|
0 | 1 | zhangsan | Chinese | 80 |
---|
1 | 2 | lisi | Chinese | 90 |
---|
2 | 1 | zhangsan | Math | 80 |
---|
3 | 2 | lisi | Math | 75 |
---|
5.1.4 wide_to_long
df = pd.DataFrame({'Class':[1,2],'Name':['San Zhang', 'Si Li'],
'Chinese_Mid':[80, 75], 'Math_Mid':[90, 85],
'Chinese_Final':[80, 75], 'Math_Final':[90, 85]})
df
| Class | Name | Chinese_Mid | Math_Mid | Chinese_Final | Math_Final |
---|
0 | 1 | San Zhang | 80 | 90 | 80 | 90 |
---|
1 | 2 | Si Li | 75 | 85 | 75 | 85 |
---|
pd.wide_to_long(df,
stubnames=['Chinese', 'Math'],
i = ['Class', 'Name'],
j='Examination',
sep='_',
suffix='.+')
| | | Chinese | Math |
---|
Class | Name | Examination | | |
---|
1 | San Zhang | Mid | 80 | 90 |
---|
Final | 80 | 90 |
---|
2 | Si Li | Mid | 75 | 85 |
---|
Final | 75 | 85 |
---|