import numpy as np
from sklearn import datasets
digits = datasets.load_digits()
X = digits.data
y = digits.target
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=666)
from sklearn.neighbors import KNeighborsClassifier
sk_knn_clf = KNeighborsClassifier(n_neighbors=4,weights="uniform")
sk_knn_clf.fit(X_train,y_train)
sk_knn_clf.score(X_test,y_test)
param_grid = [
{
'weights':['uniform'],
'n_neighbors':[i for i in range(1,11)]
},
{
'weights':['distance'],
'n_neighbors':[i for i in range(1,11)],
'p':[i for i in range (1,6)]
}
]
knn_clf = KNeighborsClassifier()
from sklearn.model_selection import GridSearchCV
grid_search = GridSearchCV(knn_clf,param_grid)
%%time
grid_search.fit(X_train,y_train)
运行结果:
Wall time: 1min 47s
GridSearchCV(estimator=KNeighborsClassifier(),
param_grid=[{'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
'weights': ['uniform']},
{'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
'p': [1, 2, 3, 4, 5], 'weights': ['distance']}])
grid_search.best_estimator_//返回网格搜索搜索到的最佳分类器对应的参数
运行结果:
KNeighborsClassifier(n_neighbors=1)
grid_search.best_score_//最佳分类器对应的准确度
运行结果:0.9860820751064653
grid_search.best_params_//对应于搜索的数组而言,最终得到的最佳的参数是谁
运行结果:
{'n_neighbors': 1, 'weights': 'uniform'}
knn_clf = grid_search.best_estimator_
knn_clf.predict(X_test)
knn_clf.score(X_test,y_test)
运行结果:0.9833333333333333
%%time
grid_search = GridSearchCV(knn_clf,param_grid,n_jobs=-1,verbose=2)
//n_iobs决定为计算机分配几个核来进行并行处理,-1表示所有核。
//verbose表示在搜索过程中进行输出,便于了解搜索状态,值越大,输出的信息越详细
grid_search.fit(X_train,y_train)
运行结果:
GridSearchCV(estimator=KNeighborsClassifier(n_neighbors=1), n_jobs=-1,
param_grid=[{'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
'weights': ['uniform']},
{'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
'p': [1, 2, 3, 4, 5], 'weights': ['distance']}],
verbose=2)