"""
@author: Vincnet_Sheng
@file: sklearn-cross_validation-1.py
@time: 2018/1/4 0004 下午 8:17
#-*- coding: utf-8 -*
"""
# target: 1) cross_validation 对同一sample中取不同的train和test
# 2) knn算法中,k值大小对应预测准确率的关系,并可视化呈现
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt
# load data
iris = load_iris()
X = iris.data
y = iris.target
# 对k取1-30的值,计算每个k对应的平均scores
k_range = range(1, 31)
k_scores = [] #k_class is list
for k in k_range:
knn = KNeighborsClassifier(k)
# 学习方法为knn,数据分成5分(cv),打分方法为accuracy, 输出为5维元组
scores = cross_val_score(knn, X, y, cv=10, scoring='accuracy')
k_scores.append(scores.mean())
# 可视化模块,k值value和accuracy 的关系图
plt.plot(k_range, k_scores)
plt.xlabel('Value of K for KNN')
plt.ylabel('Cross_Validation Accuracy')
plt.show()
输出图像: