import numpy as np
import pandas as pd
dic = {'col1':[1,2,3,4,5],
'col2':[1,np.nan, 3, np.nan, 6],
'col3':[1,2,np.nan, np.nan, 5],
'col4':['aaa','bbb',np.nan, 'ccc','bbb'],
'col5':['bbb','ccc','ccc', np.nan, np.nan]}
# 字典转DataFrame
df = pd.DataFrame(dic)
print(df)
from sklearn.impute import SimpleImputer # 模型
# missing_values=np.nan 指定缺失值
# strategy = “mean” 填充方式:均值、中位数、众数
# fill_value 指定填充值
# 采用合适的方法进行填充
s = SimpleImputer(strategy="mean") # 均值填充 均值、中位数、众数等等
for col_name in ['col2','col3']:
df[col_name] = s.fit_transform(df[col_name].values.reshape(-1,1)) # [1,np.nan, 3, np.nan, 6]
print(df)
s2 = SimpleImputer(strategy="constant", fill_value="eee")
for col_name in ["col4","col5"]:
df[col_name] = s2.fit_transform(df[col_name].values.reshape(-1,1))
print(df)
缺失值处理 SimpleInputer
最新推荐文章于 2024-04-16 21:32:09 发布