交叉验证错误处理

from sklearn.datasets import load_wine  
import pandas as pd  
  
# 加载葡萄酒数据集  
data = load_wine()  
  
names = ['label','a1','a2','a3','a4','a5','a6','a7','a8','a9','a10','a11','a12']
# 将特征数据转换为DataFrame,并指定列名称  
dataset= pd.DataFrame(data.data, columns=names)  
  
# 目标变量(葡萄酒类型)已经是一个NumPy数组,你可以直接使用它  
# 但如果你想要一个DataFrame,也可以这样做:  
y = pd.Series(data.target, name='target')  
  
# 现在,X是一个DataFrame,包含了葡萄酒的特征数据,每列的名称都是对应的特征名称  
# y是一个Series,包含了葡萄酒的类型(目标变量)  
  
# 显示前几行数据以验证  
 
import matplotlib.pyplot as plt
plt.style.use('seaborn-darkgrid')
dataset.plot(kind='box',subplots=True,layout=(3,5),sharex=False,sharey=False)
p=dataset.boxplot(return_type='dict')
for i in range(13):
    y=p['fliers'][i].get_ydata()  #查找异常数据
    print('a',i+1,':',y)
plt.show()
a 1 : []
a 2 : [5.8  5.51 5.65]
a 3 : [1.36 3.22 3.23]
a 4 : [10.6 30.  28.5 28.5]
a 5 : [151. 139. 136. 162.]
a 6 : []
a 7 : []
a 8 : []
a 9 : [3.28 3.58]
a 10 : [10.8  13.   11.75 10.68]
a 11 : [1.71]
a 12 : []
a 13 : []

在这里插入图片描述

# 导入sklearn库中的preprocessing模块,该模块提供了数据预处理的功能  
from sklearn import preprocessing  
  
# 假设dataset是一个已经加载好的pandas DataFrame,这里从dataset中提取前178行,第2列到第13列的数据作为特征集(data1)  
# 注意:在pandas中,行和列的索引是从0开始的,所以range(1,13)实际上是选取了第2列到第13列  
data1 = dataset.iloc[range(0,178),range(1,13)]  
  
# 接着,从dataset中提取前178行的第1列(索引为0的列)作为目标变量(target1)  
# 注意:这里的reshape(1,178)[0]操作实际上是多余的,因为values已经是一个一维数组(长度为178),但这样做会先将数组转置成1行178列的二维数组,然后再取其第一行,结果仍然是原一维数组  
# 更简洁的写法是直接使用:target1 = dataset.iloc[0:178, 0].values  
target1 = dataset.iloc[range(0,178),range(0,1)].values.reshape(1,178)[0]  
  
# 使用StandardScaler对特征集(data1)进行标准化处理  
# fit_transform方法首先计算数据的均值和标准差(即拟合),然后使用这些参数将数据转换为标准正态分布(均值为0,标准差为1)  
cdata = preprocessing.StandardScaler().fit_transform(data1)  
  
# 打印标准化后的特征集(cdata)  
print(cdata)
[[-0.5622498   0.23205254 -1.16959318 ...  0.36217728  1.84791957
   1.01300893]
 [-0.49941338 -0.82799632 -2.49084714 ...  0.40605066  1.1134493
   0.96524152]
 [ 0.02123125  1.10933436 -0.2687382  ...  0.31830389  0.78858745
   1.39514818]
 ...
 [ 1.74474449 -0.38935541  0.15166079 ... -1.61212515 -1.48544548
   0.28057537]
 [ 0.22769377  0.01273209  0.15166079 ... -1.56825176 -1.40069891
   0.29649784]
 [ 1.58316512  1.36520822  1.50294326 ... -1.52437837 -1.42894777
  -0.59516041]]
import matplotlib.pyplot as plt
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
x,y = cdata,target1
x_train,x_test,y_train,y_test = train_test_split(x,y,random_state=0)
k_range= range(1,12)
k_error = []
for k in k_range:
    model=KNeighborsClassifier(n_neighbors=k)
    scores = cross_val_score(model, x, y, cv=5, scoring='accuracy')
    k_error.append(1-scores.mean())
plt.plot(k_range,k_error,'r-')
plt.show()
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py:542: FutureWarning: From version 0.22, errors during fit will result in a cross validation score of NaN by default. Use error_score='raise' if you want an exception raised or error_score=np.nan to adopt the behavior from version 0.22.
  FutureWarning)



---------------------------------------------------------------------------

ValueError                                Traceback (most recent call last)

<ipython-input-13-9adeb1bba86d> in <module>
     10 for k in k_range:
     11     model=KNeighborsClassifier(n_neighbors=k)
---> 12     scores = cross_val_score(model, x, y, cv=5, scoring='accuracy')
     13     k_error.append(1-scores.mean())
     14 plt.plot(k_range,k_error,'r-')


C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in cross_val_score(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, error_score)
    400                                 fit_params=fit_params,
    401                                 pre_dispatch=pre_dispatch,
--> 402                                 error_score=error_score)
    403     return cv_results['test_score']
    404 


C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
    238             return_times=True, return_estimator=return_estimator,
    239             error_score=error_score)
--> 240         for train, test in cv.split(X, y, groups))
    241 
    242     zipped_scores = list(zip(*scores))


C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
    915             # remaining jobs.
    916             self._iterating = False
--> 917             if self.dispatch_one_batch(iterator):
    918                 self._iterating = self._original_iterator is not None
    919 


C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in dispatch_one_batch(self, iterator)
    757                 return False
    758             else:
--> 759                 self._dispatch(tasks)
    760                 return True
    761 


C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in _dispatch(self, batch)
    714         with self._lock:
    715             job_idx = len(self._jobs)
--> 716             job = self._backend.apply_async(batch, callback=cb)
    717             # A job can complete so quickly than its callback is
    718             # called before we get here, causing self._jobs to


C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in apply_async(self, func, callback)
    180     def apply_async(self, func, callback=None):
    181         """Schedule a func to be run"""
--> 182         result = ImmediateResult(func)
    183         if callback:
    184             callback(result)


C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in __init__(self, batch)
    547         # Don't delay the application, to avoid keeping the input
    548         # arguments in memory
--> 549         self.results = batch()
    550 
    551     def get(self):


C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self)
    223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    224             return [func(*args, **kwargs)
--> 225                     for func, args, kwargs in self.items]
    226 
    227     def __len__(self):


C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0)
    223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    224             return [func(*args, **kwargs)
--> 225                     for func, args, kwargs in self.items]
    226 
    227     def __len__(self):


C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)
    526             estimator.fit(X_train, **fit_params)
    527         else:
--> 528             estimator.fit(X_train, y_train, **fit_params)
    529 
    530     except Exception as e:


C:\ProgramData\Anaconda3\lib\site-packages\sklearn\neighbors\base.py in fit(self, X, y)
    903             self.outputs_2d_ = True
    904 
--> 905         check_classification_targets(y)
    906         self.classes_ = []
    907         self._y = np.empty(y.shape, dtype=np.int)


C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\multiclass.py in check_classification_targets(y)
    169     if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',
    170                       'multilabel-indicator', 'multilabel-sequences']:
--> 171         raise ValueError("Unknown label type: %r" % y_type)
    172 
    173 


ValueError: Unknown label type: 'continuous'
print(x.shape)  # 打印 x 的形状  
assert len(x.shape) == 2, "x 必须是二维的"  
assert isinstance(x, (np.ndarray, pd.DataFrame, ...)), "x 必须是数组或类似数组的对象"  
# 注意:... 表示您可能需要根据实际情况添加其他可能的类型
print(y.shape)  # 打印 y 的形状  
assert len(y.shape) == 1, "y 必须是一维的"  
assert y.shape[0] == x.shape[0], "y 的长度必须与 x 的行数相同"  
assert isinstance(y, (np.ndarray, pd.Series, list)), "y 必须是数组、序列或类似一维数组的对象"
(178, 12)
(178,)


C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py:542: FutureWarning: From version 0.22, errors during fit will result in a cross validation score of NaN by default. Use error_score='raise' if you want an exception raised or error_score=np.nan to adopt the behavior from version 0.22.
  FutureWarning)



---------------------------------------------------------------------------

ValueError                                Traceback (most recent call last)

<ipython-input-12-d873070a37bb> in <module>
      2 for k in k_range:
      3     model=KNeighborsClassifier(n_neighbors=k)
----> 4     scores = cross_val_score(model, x, y, cv=5, scoring='accuracy')
      5     k_error.append(1-scores.mean())
      6 plt.plot(k_range,k_error,'r-')


C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in cross_val_score(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, error_score)
    400                                 fit_params=fit_params,
    401                                 pre_dispatch=pre_dispatch,
--> 402                                 error_score=error_score)
    403     return cv_results['test_score']
    404 


C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
    238             return_times=True, return_estimator=return_estimator,
    239             error_score=error_score)
--> 240         for train, test in cv.split(X, y, groups))
    241 
    242     zipped_scores = list(zip(*scores))


C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
    915             # remaining jobs.
    916             self._iterating = False
--> 917             if self.dispatch_one_batch(iterator):
    918                 self._iterating = self._original_iterator is not None
    919 


C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in dispatch_one_batch(self, iterator)
    757                 return False
    758             else:
--> 759                 self._dispatch(tasks)
    760                 return True
    761 


C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in _dispatch(self, batch)
    714         with self._lock:
    715             job_idx = len(self._jobs)
--> 716             job = self._backend.apply_async(batch, callback=cb)
    717             # A job can complete so quickly than its callback is
    718             # called before we get here, causing self._jobs to


C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in apply_async(self, func, callback)
    180     def apply_async(self, func, callback=None):
    181         """Schedule a func to be run"""
--> 182         result = ImmediateResult(func)
    183         if callback:
    184             callback(result)


C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in __init__(self, batch)
    547         # Don't delay the application, to avoid keeping the input
    548         # arguments in memory
--> 549         self.results = batch()
    550 
    551     def get(self):


C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self)
    223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    224             return [func(*args, **kwargs)
--> 225                     for func, args, kwargs in self.items]
    226 
    227     def __len__(self):


C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0)
    223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    224             return [func(*args, **kwargs)
--> 225                     for func, args, kwargs in self.items]
    226 
    227     def __len__(self):


C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)
    526             estimator.fit(X_train, **fit_params)
    527         else:
--> 528             estimator.fit(X_train, y_train, **fit_params)
    529 
    530     except Exception as e:


C:\ProgramData\Anaconda3\lib\site-packages\sklearn\neighbors\base.py in fit(self, X, y)
    903             self.outputs_2d_ = True
    904 
--> 905         check_classification_targets(y)
    906         self.classes_ = []
    907         self._y = np.empty(y.shape, dtype=np.int)


C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\multiclass.py in check_classification_targets(y)
    169     if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',
    170                       'multilabel-indicator', 'multilabel-sequences']:
--> 171         raise ValueError("Unknown label type: %r" % y_type)
    172 
    173 


ValueError: Unknown label type: 'continuous'

  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值