float() argument must be a string or a number, not ‘map‘

本文介绍了如何使用Python的sklearn库中的SimpleImputer函数处理数据集中缺失值(NaN)。首先通过loadDataSet函数读取数据,然后将map函数应用到list中以解决类型错误,最后演示了如何用平均值填充缺失值。
摘要由CSDN通过智能技术生成
from numpy import *
import os
import pandas as pd

def loadDataSet(fileName, delim='\t'):
    fr = open(fileName)
    stringArr = [line.strip().split(delim) for line in fr.readlines()]
    datArr = [map(float, line) for line in stringArr]     #################
    return mat(datArr)

def replaceNanWithMean(): 
    datMat = loadDataSet('C:/Users/Omega/OneDrive/桌面/实验三+四/4/全部数据集/secom.data', ' ')
    numFeat = shape(datMat)[1]
    print(type(datMat))
    datMat=imputer(datMat)
#     for i in range(numFeat):
#         meanVal = mean(datMat[nonzero(~isnan(datMat[:, i].A))[0], i])  # values that are not NaN (a number)
#         datMat[nonzero(isnan(datMat[:, i].A))[0], i] = meanVal  # set NaN values to mean
    return datMat

from sklearn.impute import SimpleImputer  # 上面遗漏了一块
 
def imputer(a):
    im=SimpleImputer(missing_values=nan,strategy="mean")
    data=im.fit_transform(a)
    return data

运行上面这段代码之后,会出现如下错误:

<class 'numpy.matrix'>
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-77-b762e1b84152> in <module>
----> 1 dataMat = replaceNanWithMean()############################

<ipython-input-75-d4c5db6a0068> in replaceNanWithMean()
      3     numFeat = shape(datMat)[1]
      4     print(type(datMat))
----> 5     datMat=imputer(datMat)
      6 #     for i in range(numFeat):
      7 #         meanVal = mean(datMat[nonzero(~isnan(datMat[:, i].A))[0], i])  # values that are not NaN (a number)

<ipython-input-72-37b11b13bcab> in imputer(a)
      3 def imputer(a):
      4     im=SimpleImputer(missing_values=nan,strategy="mean")
----> 5     data=im.fit_transform(a)
      6     return data
      7 #     data=im.fit_transform([[90,2,10,np.nan],

D:\anacoda\lib\site-packages\sklearn\base.py in fit_transform(self, X, y, **fit_params)
    697         if y is None:
    698             # fit method of arity 1 (unsupervised transformation)
--> 699             return self.fit(X, **fit_params).transform(X)
    700         else:
    701             # fit method of arity 2 (supervised transformation)

D:\anacoda\lib\site-packages\sklearn\impute\_base.py in fit(self, X, y)
    286         self : SimpleImputer
    287         """
--> 288         X = self._validate_input(X, in_fit=True)
    289 
    290         # default fill_value is 0 for numerical input and "missing_value"

D:\anacoda\lib\site-packages\sklearn\impute\_base.py in _validate_input(self, X, in_fit)
    250 
    251         try:
--> 252             X = self._validate_data(X, reset=in_fit,
    253                                     accept_sparse='csc', dtype=dtype,
    254                                     force_all_finite=force_all_finite,

D:\anacoda\lib\site-packages\sklearn\base.py in _validate_data(self, X, y, reset, validate_separately, **check_params)
    419             out = X
    420         elif isinstance(y, str) and y == 'no_validation':
--> 421             X = check_array(X, **check_params)
    422             out = X
    423         else:

D:\anacoda\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
     61             extra_args = len(args) - len(all_args)
     62             if extra_args <= 0:
---> 63                 return f(*args, **kwargs)
     64 
     65             # extra_args > 0

D:\anacoda\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
    614                     array = array.astype(dtype, casting="unsafe", copy=False)
    615                 else:
--> 616                     array = np.asarray(array, order=order, dtype=dtype)
    617             except ComplexWarning as complex_warning:
    618                 raise ValueError("Complex data not supported\n"

D:\anacoda\lib\site-packages\numpy\core\_asarray.py in asarray(a, dtype, order, like)
    100         return _asarray_with_like(a, dtype=dtype, order=order, like=like)
    101 
--> 102     return array(a, dtype, copy=False, order=order)
    103 
    104 

TypeError: float() argument must be a string or a number, not 'map'

最后经过查看后,了解到map函数要套上list,修改后正常运行:

datArr = [list(map(float, line)) for line in stringArr]

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值