- from sklearn import datasets
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier as KNN
- from sklearn.metrics import accuracy_score
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- plt.rcParams['font.sans-serif'] = ['SimHei']
-
-
- data = pd.read_csv('data.csv')
- datas = np.array(data)
- x = np.array(data[['radius_mean','texture_mean']])
- data['diagnosis'] = data['diagnosis'].map({'M': 1, 'B': 0})
- y = np.array(data['diagnosis'])
-
- X_train,X_test,y_train,y_test = \
- train_test_split(x, y,test_size = 0.3, random_state = 24)
-
- #模型拟合,k值为3
- knn = KNN(n_neighbors = 2)
- knn.fit(X_train,y_train)
- acc = knn.score(X_test, y_test)
- acc1 = knn.score(X_train, y_train)
-
- #模型预测
- pred_label = knn.predict(X_test)
- print(f'预测结果:{pred_label}')
- print(f'真实结果:{y_test}')
-
- #返回测试单位个数的概率估计。
- P_x1 = knn.predict_proba([[0.9,.3]])
- print(P_x1)
-
- #返回给定测试数据和标签的平均精度。即模型的测试得分。
- accuracy = accuracy_score(pred_label, y_test)
- print("k-近邻测试集得分:",acc)
- print("k-近邻训练集得分:",acc1)
- print(f'模型预测精确度:{accuracy}')
K-近邻算法
最新推荐文章于 2025-04-03 19:48:35 发布