from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import label_binarize, StandardScaler
代码实现如下:
# 调库使用knn对鸢尾花数据集进行分类,并且找出邻居数量最优超参数
# 调库拿数据和导包
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import label_binarize, StandardScaler
df = load_iris()
x = df.data
y = df.target
# print(x,y)
# 特征缩放
x = StandardScaler().fit_transform(x)
# 切分训练集
x_train, x_test, y_train, y_test = train_test_split(x, y)
# 调库
knn = KNeighborsClassifier()
# 网格搜索交叉验证
model = GridSearchCV(knn, param_grid={'n_neighbors': [5, 6, 7, 8, 9]}, cv=5)
# 模型训练
model.fit(x_train, y_train)
# 打印最优得分和最优超参数
print(model.score(x_test,y_test))
print(model.best_params_)
# 使用找出的超级参数重新训练模型
model = KNeighborsClassifier(n_neighbors=model.best_params_['n_neighbors'])
model.fit(x_train, y_train)
y_pre = model.predict(x_test)
# 打印评分
print(model.score(x_test,y_test))