什么是交叉式验证?
这样所有的数据都进行了验证,所有的数据也都进行了训练,训练了五次也测试了五次,将这五次得到的数据求一个平均值,这样的数据更有说服力
全部代码
#导包,加载数据
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn import datasets
#model_selection:模型选择
#cross_val_score cross:交叉,validation:验证(测试)
#交叉验证
from sklearn.model_selection import cross_val_score
X,y = datasets.load_iris(True)
X.shape
# 参考
150**0.5
# K 选择 1----13
#cross_val_score交叉验证筛选最合适的参数
#演示了交叉验证,如何使用
knn = KNeighborsClassifier()
score = cross_val_score(knn,X,y,scoring='accuracy',cv = 6)
score.mean()
#应用cross_val_score去筛选最合适的邻居数量
erros = []
for k in range(1,14):
knn = KNeighborsClassifier(n_neighbors=k)
score = cross_val_score(knn,X,y,scoring='accuracy',cv = 6).mean()
#误差越小,说明k选择越合适,越好
erros.append(1 - score)
import matplotlib.pyplot as plt
%matplotlib inline
#k = 11时,误差最小,说明k = 11对鸢尾花来说,最合适的k值
plt.plot(np.arange(1,14),erros)
weights = ['uniform','distance']
for w in weights:
knn = KNeighborsClassifier(n_neighbors= 11,weights = w)
print(w,cross_val_score(knn,X,y,scoring='accuracy',cv = 6).mean())
#多参数组合使用cross_val_score筛选最合适的参数组合
#模型是如何调参的,参数调节
result = {}
for k in range(1,14):
for w in weights:
knn = KNeighborsClassifier(n_neighbors=k,weights=w)
sm = cross_val_score(knn,X,y,scoring='accuracy',cv = 6).mean()
result[w + str(k)] = sm
result
max(result.values())
np.array(list(result.values())).argmax()
list(result)[20]