分类问题 Python sklearn库 k-近邻算法交叉验证（学习笔记）

最新推荐文章于 2024-06-15 22:47:05 发布

Monica_Zzz

最新推荐文章于 2024-06-15 22:47:05 发布

阅读量1k

点赞数 1

本文链接：https://blog.csdn.net/weixin_44819497/article/details/101638913

版权

from numpy import *
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

#导入iris数据集，特征选择二维
iris=load_iris()
data=iris.data[:,:2]
label=iris.target 

#画原始数据分类图
index_0=where(label==0)#索引值
plt.scatter(data[index_0,0],data[index_0,1],marker='x',color='r',label='0',s=25)#s点的大小
index_1=where(label==1)#索引值
plt.scatter(data[index_1,0],data[index_1,1],marker='o',color='b',label='1',s=25)
index_2=where(label==2)#索引值
plt.scatter(data[index_2,0],data[index_2,1],marker='s',color='g',label='2',s=25)
plt.xlabel('X1')
plt.ylabel('X2')
plt.legend(loc='upper left')
plt.show()

输出结果：

在这里插入图片描述

x_train,x_test,y_train,y_test=train_test_split(data,label,test_size=0.2,random_state=1)#训练集，测试集

'''k-近邻算法的交叉验证'''
folds=4 #4折交叉验证
k_choices=[1,3,5,7,9,11,13,15,17,21,23,25]#k近邻
X_folds=[]
Y_folds=[]
X_folds=vsplit(x_train,folds)#将x_train纵向等分为4个片段
Y_folds=hsplit(y_train,folds)#将y_train横向等分为4个片段

accuracy_of_k={}#字典，储存不同k的准确率
for k in k_choices:
    accuracy_of_k[k]=[]#每个k的准确率

#交叉验证计算准确率
for i in range(folds):
    X_train=vstack(X_folds[:i]+X_folds[i+1:])#交叉验证中的训练集
    X_val=X_folds[i]#交叉验证中的测试集
    Y_train=hstack(Y_folds[:i]+Y_folds[i+1:])#交叉验证中的训练集
    Y_val=Y_folds[i]#交叉验证中的测试集
    #print(X_train.shape,X_val.shape,Y_train.shape,Y_val.shape)
    
    for k in k_choices:
        knn=KNeighborsClassifier(n_neighbors=k)
        knn.fit(X_train,Y_train)
        Y_val_pred=knn.predict(X_val)
        accuracy=mean(Y_val_pred==Y_val)#准确率
        accuracy_of_k[k].append(accuracy)#对应的k中加入准确率
    
for k in sorted(k_choices):
    for accuracy in accuracy_of_k[k]:
        print("k=%d,accuracy=%f"%(k,accuracy))

输出结果：

k=1,accuracy=0.700000
k=1,accuracy=0.766667
k=1,accuracy=0.666667
k=1,accuracy=0.800000
k=3,accuracy=0.600000
k=3,accuracy=0.766667
k=3,accuracy=0.766667
k=3,accuracy=0.733333
k=5,accuracy=0.600000
k=5,accuracy=0.766667
k=5,accuracy=0.733333
k=5,accuracy=0.800000
k=7,accuracy=0.666667
k=7,accuracy=0.766667
k=7,accuracy=0.800000
k=7,accuracy=0.666667
k=9,accuracy=0.766667
k=9,accuracy=0.766667
k=9,accuracy=0.766667
k=9,accuracy=0.766667
k=11,accuracy=0.833333
k=11,accuracy=0.800000
k=11,accuracy=0.766667
k=11,accuracy=0.733333
k=13,accuracy=0.800000
k=13,accuracy=0.833333
k=13,accuracy=0.766667
k=13,accuracy=0.733333
k=15,accuracy=0.800000
k=15,accuracy=0.766667
k=15,accuracy=0.766667
k=15,accuracy=0.733333
k=17,accuracy=0.800000
k=17,accuracy=0.833333
k=17,accuracy=0.766667
k=17,accuracy=0.733333
k=21,accuracy=0.800000
k=21,accuracy=0.800000
k=21,accuracy=0.766667
k=21,accuracy=0.700000
k=23,accuracy=0.800000
k=23,accuracy=0.833333
k=23,accuracy=0.733333
k=23,accuracy=0.733333
k=25,accuracy=0.800000
k=25,accuracy=0.833333
k=25,accuracy=0.733333
k=25,accuracy=0.733333

#交叉验证的准确度可视化
for k in k_choices:
    plt.scatter([k]*len(accuracy_of_k[k]),accuracy_of_k[k])

#误差棒图
accuracies_mean=array([mean(v) for k,v in sorted(accuracy_of_k.items())])
accuracies_std=array([std(v) for k,v in sorted(accuracy_of_k.items())])
plt.errorbar(k_choices,accuracies_mean,accuracies_std)
plt.xlabel('k')
plt.ylabel('accuracy')
plt.show()

输出结果：

在这里插入图片描述

'''在交叉验证中，最优的k值为13'''
best_k=13
knn=KNeighborsClassifier(n_neighbors=best_k)
knn.fit(x_train,y_train)
y_test_pred=knn.predict(x_test)
correctNum=sum(y_teat_pred==y_test)
accuracyRate=mean(y_teat_pred==y_test)
print("正确的个数为：%d，准确率为：%f"%(correctNum,accuracyRate))

输出结果：

正确的个数为：24，准确率为：0.800000

Monica_Zzz

关注

1
点赞
踩
12

收藏

觉得还不错? 一键收藏
0
评论
分类问题 Python sklearn库 k-近邻算法交叉验证（学习笔记）

from numpy import *from sklearn.datasets import load_irisimport matplotlib.pyplot as pltfrom sklearn.model_selection import train_test_splitfrom sklearn.neighbors import KNeighborsClassifier#导入i...
复制链接

扫一扫

分类问题 Python sklearn库 k-近邻算法交叉验证（学习笔记）

“相关推荐”对你有帮助么？