代码如下:
# iris数据加载
from sklearn import datasets
from sklearn.neighbors import KNeighborsClassifier # 模型调用
from sklearn.metrics import accuracy_score # 查看准确度
from sklearn.model_selection import train_test_split # 数据分离调用
import matplotlib.pyplot as plt
iris = datasets.load_iris()
# x输入数据赋值;y输出数据赋值
x = iris.data
y = iris.target
def basic_test():
"""
基础测试
:return: void
"""
# 展示iris数据
print(iris.data)
# 属性名
print(iris.feature_names)
# 输出结果
print(iris.target)
# 结果含有
print(iris.target_names)
# 确认维度
print(iris.data.shape)
print(iris.target.shape)
print(x)
print(y)
def KNN_example():
"""
KNN近邻分析
分类问题测试
:return: void
"""
# 创建实例
knn = KNeighborsClassifier(n_neighbors=1)
print(knn)
# 模型训练
knn.fit(x, y)
# 预测
print(knn.predict([[1, 2, 3, 4]]))
x_test = [[1, 2, 3, 4], [2, 4, 1, 2]]
print(knn.predict(x_test))
# 设置一个新的knn进行KNN建模
knn_5 = KNeighborsClassifier(n_neighbors=5)
knn_5.fit(x, y)
print(knn_5.predict(x_test))
# 确认模型结构
print(knn_5)
def KNN_assess():
"""
模型训练与测试
:return: void
"""
knn_5 = KNeighborsClassifier(n_neighbors=5)
knn_5.fit(x, y)
# 评估数据
y_pred = knn_5.predict(x)
print(y_pred)
print(y_pred.shape)
# 准确率计算
print(accuracy_score(y, y_pred))
# 同样的方法预测K=1时的
knn_1 = KNeighborsClassifier(n_neighbors=1)
knn_1.fit(x, y)
y_pred = knn_1.predict(x)
print(accuracy_score(y, y_pred))
# 两种k值预测的准确度进行对比,找到最优解
def data_split():
"""
数据分离
:return: void
"""
print(x.shape)
print(y.shape)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.4) # 测试数据占40%
# 分离后数据集的维度确认
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
# 分离后数据集的训练与评估
knn_5_s = KNeighborsClassifier(n_neighbors=5)
knn_5_s.fit(x_train, y_train)
y_train_pred = knn_5_s.predict(x_train)
y_test_pred = knn_5_s.predict(x_test)
# 分离后模型预测的准确率
print("5训练:", accuracy_score(y_train, y_train_pred))
print("5测试新数据:", accuracy_score(y_test, y_test_pred))
# 同理测试k=1时的
# 分离后数据集的训练与评估
knn_1_s = KNeighborsClassifier(n_neighbors=1)
knn_1_s.fit(x_train, y_train)
y_train_pred = knn_1_s.predict(x_train)
y_test_pred = knn_1_s.predict(x_test)
# 分离后模型预测的准确率
print("1训练:", accuracy_score(y_train, y_train_pred))
print("1测试新数据:", accuracy_score(y_test, y_test_pred))
# 测试K取最优值
k_range = list(range(1, 26))
score_train = []
score_test = []
for k in k_range:
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(x_train, y_train)
y_train_pred = knn.predict(x_train)
y_test_pred = knn.predict(x_test)
score_train.append(accuracy_score(y_train, y_train_pred))
score_test.append(accuracy_score(y_test, y_test_pred))
for k in k_range:
print(k, "训练集的", score_train[k - 1])
print(k, "测试新数据", score_test[k - 1])
# 图形展示
# 展示k值与训练数据集预测准确率之间的关系
plt.figure()
plt.plot(k_range, score_train)
plt.xlabel("K(KNN model)")
plt.ylabel("Training Accuracy")
# 测试集的同理
plt.figure()
plt.plot(k_range, score_test)
plt.xlabel("K(KNN model)")
plt.ylabel("Testing Accuracy")
plt.show()
# 对k=11进行预测
knn_11 = KNeighborsClassifier(n_neighbors=11)
knn_11.fit(x_train, y_train)
print(knn_11.predict([[1, 2, 3, 4]]))
最后绘制出,k与测试数据的准确率图像: