panda缺失值处理

http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.fillna.html

import pandas as pd
import numpy as np
df=pd.DataFrame(np.random.rand(5,6))
df
012345
00.3999160.5466350.7292550.9927350.7479170.157603
10.2000170.7105270.3616840.0940260.8953360.848446
20.5528510.4566020.4106530.8352230.7694560.803724
30.3920670.3978410.3932200.7453610.3608590.383625
40.8207690.8935940.3128870.3781150.5842610.214013
# Make a few areas have NaN values
df.iloc[1:3,1] = np.nan
df.iloc[3:,3] = np.nan
df.iloc[2,5]=np.nan
df
012345
00.3999160.5466350.7292550.9927350.7479170.157603
10.200017NaN0.3616840.0940260.8953360.848446
20.552851NaN0.4106530.8352230.769456NaN
30.3920670.3978410.393220NaN0.3608590.383625
40.8207690.8935940.312887NaN0.5842610.214013
df.isnull()
012345
0FalseFalseFalseFalseFalseFalse
1FalseTrueFalseFalseFalseFalse
2FalseTrueFalseFalseFalseTrue
3FalseFalseFalseTrueFalseFalse
4FalseFalseFalseTrueFalseFalse
#显示存在缺失值的行列,清楚的确定缺失值的位置
df[df.isnull().values==True]
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
012345
10.200017NaN0.3616840.0940260.8953360.848446
20.552851NaN0.4106530.8352230.769456NaN
20.552851NaN0.4106530.8352230.769456NaN
30.3920670.3978410.393220NaN0.3608590.383625
40.8207690.8935940.312887NaN0.5842610.214013
#填充缺失数据
df.fillna(0)
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
012345
00.3999160.5466350.7292550.9927350.7479170.157603
10.2000170.0000000.3616840.0940260.8953360.848446
20.5528510.0000000.4106530.8352230.7694560.000000
30.3920670.3978410.3932200.0000000.3608590.383625
40.8207690.8935940.3128870.0000000.5842610.214013
#根据(axis=0列,axis=1行)的前一个值进行填充,method : {‘backfill’, ‘bfill’, ‘pad’, ‘ffill’, None}, default None
df.fillna(axis=0,method='ffill')
ABCDEF
00.3999160.5466350.7292550.9927350.7479170.157603
10.2000170.5466350.3616840.0940260.8953360.848446
20.5528510.5466350.4106530.8352230.7694560.848446
30.3920670.3978410.3932200.8352230.3608590.383625
40.8207690.8935940.3128870.8352230.5842610.214013
df.columns=['A','B','C','D','E','F']
df
ABCDEF
00.3999160.5466350.7292550.9927350.7479170.157603
10.200017NaN0.3616840.0940260.8953360.848446
20.552851NaN0.4106530.8352230.769456NaN
30.3920670.3978410.393220NaN0.3608590.383625
40.8207690.8935940.312887NaN0.5842610.214013
#Replace all NaN elements in column ‘A’, ‘B’, ‘C’, and so on with 0, 1, 2, and 3 respectively.
values = {'A': 0, 'B': 1, 'C': 2, 'D': 3,'E':4,'F':5}
df.fillna(value=values)
ABCDEF
00.3999160.5466350.7292550.9927350.7479170.157603
10.2000171.0000000.3616840.0940260.8953360.848446
20.5528511.0000000.4106530.8352230.7694565.000000
30.3920670.3978410.3932203.0000000.3608590.383625
40.8207690.8935940.3128873.0000000.5842610.214013
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值