模型原型
sklearn.neighbors.KNeighborsRegressor(n_neighbors=5,weights=’uniform’,
algorithm=’auto’,leaf_size=30,p=2, metric=’minkowski’,metric_params=None,n_jobs=1,**kwargs)
参数
- n_neighbors
- weights
- algorithm
- leaf_size
- p
- n_jobs
方法
- fit(X,y)
- predict(X)
- score(X,y)
- predict_proba(X)
- kneighbors([X,n_neighbors,return_distance])
- kneighbors_graph([X,n_neighbors,mode])
import numpy as np
import matplotlib.pyplot as plt
from sklearn import neighbors,datasets,cross_validation
def create_regression_data(n):
X=5*np.random.rand(n,1)
y=np.sin(X).ravel()
y[::5]+=1*(0.5-np.random.rand(int(n/5)))
return cross_validation.train_test_split(X,y,test_size=0.25,
random_state=0)
使用KNeighborsRegressor
def test_KNeighborsRegressor(*data):
X_train,X_test,y_train,y_test=data
regr=neighbors.KNeighborsRegressor()
regr.fit(X_train,y_train)
print('Training Score:%f'%regr.score(X_train,y_train))
print('Testing Score:%f'%regr.score(X_test,y_test))
X_train,X_test,y_train,y_test=create_regression_data(1000)
test_KNeighborsRegressor(X_train,X_test,y_train,y_test)
k值以及投票策略的影响
def test_KNeighborsRegressor_k_w(*data):
X_train,X_test,y_train,y_test=data
Ks=np.linspace(1,y_train.size,num=100,endpoint=False,dtype='int')
weights=['uniform','distance']
#绘图
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
for weight in weights:
training_scores=[]
testing_scores=[]
for K in Ks:
regr=neighbors.KNeighborsRegressor(weights=weight,
n_neighbors=K)
regr.fit(X_train,y_train.astype('int'))
testing_scores.append(regr.score(X_test,y_test.astype('int')))
training_scores.append(regr.score(X_train,
y_train.astype('int')))
ax.plot(Ks,testing_scores,label='testing score:weight=%s'%weight)
ax.plot(Ks,training_scores,label='training score:weight=%s'%weight)
ax.legend(loc='best')
ax.set_xlabel('K')
ax.set_ylabel('score')
ax.set_ylim(0,1.05)
ax.set_title('KNeighborsRegressor')
plt.show()
test_KNeighborsRegressor_k_w(X_train,X_test,y_train,y_test)
p值的影响
def test_KNeighborsRegressor_k_p(*data):
X_train,X_test,y_train,y_test=data
Ks=np.linspace(1,y_train.size,endpoint=False,dtype='int')
Ps=[1,2,10]
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
for P in Ps:
training_scores=[]
testing_scores=[]
for K in Ks:
clf=neighbors.KNeighborsRegressor(p=P,n_neighbors=K)
clf.fit(X_train,y_train)
testing_scores.append(clf.score(X_test,y_test))
training_scores.append(clf.score(X_train,y_train))
ax.plot(Ks,testing_scores,label='testing score:p=%d'%P)
ax.plot(Ks,training_scores,label='training score:p=%d'%P)
ax.legend(loc='best')
ax.set_xlabel("K")
ax.set_ylabel('score')
ax.set_ylim(0,1.05)
ax.set_title('KNeighborsRegressor')
plt.show()
test_KNeighborsRegressor_k_p(X_train,X_test,y_train,y_test)