Pandas填充缺失值两种方法:bfill/ffill对比
import pandas as pd
import numpy as np
"""
在dataframe中建立缺失值的两种方法:
np.nan和None
"""
df_=pd.DataFrame({'ID':range(5)
,'Height':range(5,10)
,'Gender':['male',np.nan,None,'famle',np.nan]
,'col3':[1.3,np.nan,3.6,np.nan,5.8]}).set_index('ID')
df_
| Height | Gender | col3 |
---|
ID | | | |
---|
0 | 5 | male | 1.3 |
---|
1 | 6 | NaN | NaN |
---|
2 | 7 | None | 3.6 |
---|
3 | 8 | famle | NaN |
---|
4 | 9 | NaN | 5.8 |
---|
'''
bfill:向前填充
ffill:向后填充
默认 axis=0(列方向) vs axis=1(行方向)
'''
df1=df_.fillna(method='bfill',axis=1)
df1
| Height | Gender | col3 |
---|
ID | | | |
---|
0 | 5 | male | 1.3 |
---|
1 | 6 | NaN | NaN |
---|
2 | 7 | 3.6 | 3.6 |
---|
3 | 8 | famle | NaN |
---|
4 | 9 | 5.8 | 5.8 |
---|
df2=df_.fillna(method='bfill',axis=0)
df2
| Height | Gender | col3 |
---|
ID | | | |
---|
0 | 5 | male | 1.3 |
---|
1 | 6 | famle | 3.6 |
---|
2 | 7 | famle | 3.6 |
---|
3 | 8 | famle | 5.8 |
---|
4 | 9 | NaN | 5.8 |
---|
df3=df_.fillna(method='ffill',axis=0)
df3
| Height | Gender | col3 |
---|
ID | | | |
---|
0 | 5 | male | 1.3 |
---|
1 | 6 | male | 1.3 |
---|
2 | 7 | male | 3.6 |
---|
3 | 8 | famle | 3.6 |
---|
4 | 9 | famle | 5.8 |
---|
df3['col3'].replace([5.8,1.3],[np.nan,np.nan],inplace=True)
df3
| Height | Gender | col3 |
---|
ID | | | |
---|
0 | 5 | male | NaN |
---|
1 | 6 | male | NaN |
---|
2 | 7 | male | 3.6 |
---|
3 | 8 | famle | 3.6 |
---|
4 | 9 | famle | NaN |
---|
df4=df3.fillna(method='ffill',axis=1)
df4
| Height | Gender | col3 |
---|
ID | | | |
---|
0 | 5 | male | male |
---|
1 | 6 | male | male |
---|
2 | 7 | male | 3.6 |
---|
3 | 8 | famle | 3.6 |
---|
4 | 9 | famle | famle |
---|