import pandas as pd
import numpy as np
from sklearn.preprocessing import Imputer
df=pd.DataFrame(np.random.rand(6,4),columns=["col1","col2","col3","col4"])
df.iloc[1:2,1]=np.nan
df.iloc[4,3]=np.nan
print(df)
nan_all=df.isnull()
#print(nan_all)
nan_col1=df.isnull().any()#含NA列
nan_col2=df.isnull().all()#全为NA列
# print(nan_col1)
# print(nan_col2)
df2=df.dropna()#丢弃含NA列
print(df2)
#sklearn处理缺失值
nan_model=Imputer(missing_values="NaN", strategy="mean",
axis=0,)#构建填充规则
nan_result=nan_model.fit_transform(df)#应用模型规则
print(nan_result)
#pandas处理缺失值
nan_result_col1=df.fillna(method="backfill")
print(nan_result_col1)
---------------------------
通常策略用均值 中位数 众数 填充
关于Imputer 的 strategy参数 有如下参考
mean 均值
median 中值
most_frequent 众数
详情见源码