scikit learning——交叉验证

import numpy as np
from sklearn import datasets
from sklearn.cross_validation import train_test_split
from sklearn.neighbors import KNeighborsClassifier
iris = datasets.load_iris()
iris_X =iris.data
iris_Y = iris.target
#print (iris_X[:5,:])
#print (iris_Y)
X_train,X_test,y_train,y_test=train_test_split(iris_X,iris_Y,random_state=4)
#print(y_train)
knn=KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train,y_train)
#y_pre=knn.predict(X_test)
print (knn.score(X_test,y_test))


没有进行交叉验证结果为:0.973684210526

"""
Created on Wed Nov  9 15:47:35 2016
功能:交叉验证(cross validation)
分类器:k邻近
数据:莺尾花
@author: haoming
"""

import numpy as np
from sklearn import datasets
from sklearn.cross_validation import train_test_split
from sklearn.neighbors import KNeighborsClassifier
iris = datasets.load_iris()
iris_X =iris.data
iris_Y = iris.target

from sklearn.cross_validation import cross_val_score
knn=KNeighborsClassifier(n_neighbors=5)
scores=cross_val_score(knn,iris_X,iris_Y,cv=5,scoring='accuracy')
'''
交叉验证将测试数据分为五组 cv = 5,最后输出平均值mean()
'''
print (scores)
print (scores.mean())


输出结果:

[ 0.96666667  1.          0.93333333  0.96666667  1.        ]
0.973333333333



以下是一段选择k邻近算法参数k的方法

from sklearn import datasets
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cross_validation import cross_val_score
import matplotlib.pyplot as plt
iris = datasets.load_iris()
iris_X =iris.data
iris_Y = iris.target
k_range=range(1,31)
k_score=[]
for k in k_range:
    knn = KNeighborsClassifier(n_neighbors=k)
    scores = cross_val_score(knn,iris_X,iris_Y,cv=10,scoring='accuracy')
#    loss = cross_val_score(knn,iris_X,iris_Y,cv=10,scoring='mean_squared_error')
    k_score.append(scores.mean())

plt.plot(k_range,k_score)
plt.xlabel('Value of K for KNN')
plt.ylabel('cross validation accuracy')
plt.show()

结果:



从图中我们可以看出,选择怎么一个k正确率比较高


这个是一个KNN回归算法的选择参数代码

from sklearn import datasets
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cross_validation import cross_val_score
import matplotlib.pyplot as plt
iris = datasets.load_iris()
iris_X =iris.data
iris_Y = iris.target
k_range=range(1,31)
k_score=[]
for k in k_range:
    knn = KNeighborsClassifier(n_neighbors=k)
#    scores = cross_val_score(knn,iris_X,iris_Y,cv=10,scoring='accuracy') 
#    k_score.append(scores.mean())
    loss = -cross_val_score(knn,iris_X,iris_Y,cv=10,scoring='mean_squared_error')
    k_score.append(loss.mean())
    
    
'''
判断回归的误差(mean_squared_error)
'''
   

plt.plot(k_range,k_score)
plt.xlabel('Value of K for KNN')
plt.ylabel('cross validation accuracy')
plt.show()

运行结果:




评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值