案例来源,《Python机器学习实战》
from sklearn import datasets
from sklearn.neighbors import KNeighborsClassifier
# from sklearn import cross_validation
from sklearn import model_selection
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
#准备数据集,并分离训练集和验证集
iris = datasets.load_iris() #导入自带的Iris数据集
X = iris.data
Y = iris.target
validation_size = 0.20
seed = 1
X_train,X_validation,Y_train,Y_validation = model_selection.train_test_split(
X,Y,test_size=validation_size,random_state=seed
) #将数据集中随机20%的内容作为验证集
#创建KNN分类器,并拟合数据集
knn = KNeighborsClassifier()
knn.fit(X_train,Y_train)
#在验证集上进行预测,并输出accuracy score,混淆矩阵和分类报告
predictions = knn.predict(X_validation)
print(accuracy_score(Y_validation,predictions))
print(confusion_matrix(Y_validation,predictions))
print(classification_report(Y_validation,predictions))
原书中调用的是cross_validation模块,实际中此模块已逐渐被弃用,不建议继续使用。可用其下方的sklearn.model_selection模块代替
运行结果