# 导入所需库
from sklearn.datasets import load_iris # 加载鸢尾花数据集
from sklearn.model_selection import train_test_split # 划分训练集和测试集
from sklearn.neighbors import KNeighborsClassifier # K近邻分类器
import matplotlib.pyplot as plt # 绘图库
# 设置字体和坐标轴显示
plt.rcParams['font.family'] = 'KaiTi' # 设置字体为楷体
plt.rcParams['axes.unicode_minus'] = False # 解决坐标轴负号显示问题
# 加载鸢尾花数据集
dataset = load_iris()
# 提取特征数据和标签数据
x = dataset.data # 特征数据
y = dataset.target # 标签数据
# 划分训练集和测试集,测试集占比25%
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)
# 创建K近邻分类器实例,设置邻居数量为3
model = KNeighborsClassifier(n_neighbors=3)
# 使用训练集训练模型
model.fit(x_train, y_train)
# 对测试集进行预测
pred_y = model.predict(x_test)
# 输出模型在测试集上的准确率
print(model.score(x_test, y_test))
# 绘制预测结果的散点图
fig = plt.figure(figsize=(10, 6))
# 创建第一个子图,用于展示预测分类情况
ax1 = fig.add_subplot(121)
# 根据预测结果将样本分为三个类别
class0 = [i for i in range(len(pred_y)) if pred_y[i] == 0]
class1 = [i for i in range(len(pred_y)) if pred_y[i] == 1]
class2 = [i for i in range(len(pred_y)) if pred_y[i] == 2]
label_added = False
for i in class0:
if not label_added:
ax1.scatter(x_test[i, 0], x_test[i, 2], c='black', marker='o', label='class0')
label_added = True
else:
ax1.scatter(x_test[i, 0], x_test[i, 2], c='black', marker='o')
label_added = False
for i in class1:
if not label_added:
ax1.scatter(x_test[i, 0], x_test[i, 2], c='m', marker='o', label='class1')
label_added = True
else:
ax1.scatter(x_test[i, 0], x_test[i, 2], c='m', marker='o')
label_added = False
for i in class2:
if not label_added:
ax1.scatter(x_test[i, 0], x_test[i, 2], c='purple', marker='o', label='class2')
label_added = True
else:
ax1.scatter(x_test[i, 0], x_test[i, 2], c='purple', marker='o')
ax1.set_xlabel('花萼长度', fontsize=14)
ax1.set_ylabel('花瓣长度', fontsize=14)
ax1.set_title('预测分类预测情况', fontsize=18, color='red')
plt.legend()
# 创建第二个子图,用于展示真实分类情况
ax2 = fig.add_subplot(122)
# 根据真实标签将样本分为三个类别
class00 = [i for i in range(len(y_test)) if y_test[i] == 0]
class11 = [i for i in range(len(y_test)) if y_test[i] == 1]
class22 = [i for i in range(len(y_test)) if y_test[i] == 2]
label_added = False
for i in class00:
if not label_added:
ax2.scatter(x_test[i, 0], x_test[i, 2], c='red', marker='o', label='class00')
label_added = True
else:
ax2.scatter(x_test[i, 0], x_test[i, 2], c='red', marker='o')
label_added = False
for i in class11:
if not label_added:
ax2.scatter(x_test[i, 0], x_test[i, 2], c='green', marker='o', label='class11')
label_added = True
else:
ax2.scatter(x_test[i, 0], x_test[i, 2], c='green', marker='o')
label_added = False
for i in class22:
if not label_added:
ax2.scatter(x_test[i, 0], x_test[i, 2], c='blue', marker='o', label='class22')
label_added = True
else:
ax2.scatter(x_test[i, 0], x_test[i, 2], c='blue', marker='o')
ax2.set_xlabel('花萼长度', fontsize=14)
ax2.set_ylabel('花瓣长度', fontsize=14)
plt.title('真实值分类预测情况', fontsize=18, color='red')
plt.legend()
# 显示图像
plt.show()
KNN算法代码实例
最新推荐文章于 2024-07-29 14:37:54 发布