import numpy as np
from sklearn import datasets
from sklearn.cross_validation import train_test_split
from sklearn.neighbors import KNeighborsClassifier
iris = datasets.load_iris()
iris_X =iris.data
iris_Y = iris.target
#print (iris_X[:5,:])
#print (iris_Y)
X_train,X_test,y_train,y_test=train_test_split(iris_X,iris_Y,random_state=4)
#print(y_train)
knn=KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train,y_train)
#y_pre=knn.predict(X_test)
print (knn.score(X_test,y_test))
没有进行交叉验证结果为:0.973684210526
"""
Created on Wed Nov 9 15:47:35 2016
功能:交叉验证(cross validation)
分类器:k邻近
数据:莺尾花
@author: haoming
"""
import numpy as np
from sklearn import datasets
from sklearn.cross_validation import train_test_split
from sklearn.neighbors import KNeighborsClassifier
iris = datasets.load_iris()
iris_X =iris.data
iris_Y = iris.target
from sklearn.cross_validation import cross_val_score
knn=KNeighborsClassifier(n_neighbors=5)
scores=cross_val_score(knn,iris_X,iris_Y,cv=5,scoring='accuracy')
'''
交叉验证将测试数据分为五组 cv = 5,最后输出平均值mean()
'''
print (scores)
print (scores.mean())
[ 0.96666667 1. 0.93333333 0.96666667 1. ]
0.973333333333
以下是一段选择k邻近算法参数k的方法
from sklearn import datasets
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cross_validation import cross_val_score
import matplotlib.pyplot as plt
iris = datasets.load_iris()
iris_X =iris.data
iris_Y = iris.target
k_range=range(1,31)
k_score=[]
for k in k_range:
knn = KNeighborsClassifier(n_neighbors=k)
scores = cross_val_score(knn,iris_X,iris_Y,cv=10,scoring='accuracy')
# loss = cross_val_score(knn,iris_X,iris_Y,cv=10,scoring='mean_squared_error')
k_score.append(scores.mean())
plt.plot(k_range,k_score)
plt.xlabel('Value of K for KNN')
plt.ylabel('cross validation accuracy')
plt.show()
结果:
从图中我们可以看出,选择怎么一个k正确率比较高
这个是一个KNN回归算法的选择参数代码
from sklearn import datasets
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cross_validation import cross_val_score
import matplotlib.pyplot as plt
iris = datasets.load_iris()
iris_X =iris.data
iris_Y = iris.target
k_range=range(1,31)
k_score=[]
for k in k_range:
knn = KNeighborsClassifier(n_neighbors=k)
# scores = cross_val_score(knn,iris_X,iris_Y,cv=10,scoring='accuracy')
# k_score.append(scores.mean())
loss = -cross_val_score(knn,iris_X,iris_Y,cv=10,scoring='mean_squared_error')
k_score.append(loss.mean())
'''
判断回归的误差(mean_squared_error)
'''
plt.plot(k_range,k_score)
plt.xlabel('Value of K for KNN')
plt.ylabel('cross validation accuracy')
plt.show()
运行结果: