# 如有问题,欢迎大家批评指正,谢谢。
import numpy as np from sklearn import datasets from sklearn.model_selection import train_test_split import matplotlib.pylab as plt class KNN: def __init__(self, k): self.k = k def fit(self, X, y): self.X_train = X self.y_train = y def euclidean_distance(self, x1, x2): return np.sqrt(np.sum((x1 - x2) ** 2)) def predict(self, X): y_pred = [self._predict(x) for x in X] return np.array(y_pred) def _predict(self, x): # 计算测试点与所有训练点的距离 distances = [self.euclidean_distance(x, x_train) for x_train in self.X_train] # 获取距离最近的k个点的索引 k_indices = np.argsort(distances)[:self.k] # 获取k个点的标签 k_nearest_labels = [self.y_train[i] for i in k_indices] # 返回k个点中出现最频繁的标签 most_common = np.bincount(k_nearest_labels).argmax() return most_common if __name__ == "__main__": # 加载Iris数据集 iris = datasets.load_iris() X = iris.data y = iris.target # 将数据集分为训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # 初始化KNN分类器,指定K值 knn = KNN(k=3) # 训练模型 knn.fit(X_train, y_train) # 在测试集上进行预测 print("ytest", y_test) predictions = knn.predict(X_test) acc = 0 acc += (y_test == predictions).sum().item() acc = 100 * acc / X_test.data.shape[0] print(f"Accuracy: {acc:.2f}%") # 利用matplotlib绘制图像 plt.figure(figsize=(8, 6)) plt.scatter(X_train[y_train == 0, 2], X_train[y_train == 0, 3], color='g', label='Class 0') plt.scatter(X_train[y_train == 1, 2], X_train[y_train == 1, 3], color='y', label='Class 1') plt.scatter(X_train[y_train == 2, 2], X_train[y_train == 2, 3], color='b', label='Class 2') plt.scatter(X_test[:, 2], X_test[:, 3], color='r', label='Predict') plt.title('Iris Data Visualization') plt.xlabel('Petal Length') plt.ylabel('Petal Width') plt.legend() plt.show()