from sklearn import datasets
from sklearn.model_selection import train_test_split, GridSearchCV
# 加载鸢尾花数据集
iris = datasets.load_iris()
# 分别提取特征和标签
X = iris.data
y = iris.target
# 分割数据集为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 检查训练集和测试集特征的形状
print("训练集特征形状:", X_train.shape)
print("测试集特征形状:", X_test.shape)
# 导入相应模块,创建算法并训练
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
# 初始化K近邻算法
knn = KNeighborsClassifier()
# 使用网格搜索法验证最优K值(邻居数)
param_grid = {'n_neighbors': range(1, 31)}
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)
best_k = grid_search.best_params_['n_neighbors']
print("最优K值:", best_k)
# 使用训练集训练选取的最优K值下的K近邻算法
knn = KNeighborsClassifier(n_neighbors=best_k)
knn.fit(X_train, y_train)
# 评估模型
from sklearn.metrics import accuracy_score
# 使用K近邻算法模型预测测试集结果
y_pred = knn.predict(X_test)
# 使用测试集计算K近邻算法模型的分类准确率(模型得分)
accuracy = accuracy_score(y_test, y_pred)
print("K近邻算法模型的分类准确率:", accuracy)