from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']
data = pd.read_csv('data.csv')
datas = np.array(data)
x = np.array(data[['radius_mean','texture_mean']])
data['diagnosis'] = data['diagnosis'].map({'M': 1, 'B': 0})
y = np.array(data['diagnosis'])
X_train,X_test,y_train,y_test = \
train_test_split(x, y,test_size = 0.3, random_state = 24)
#模型拟合,k值为3
knn = KNN(n_neighbors = 2)
knn.fit(X_train,y_train)
acc = knn.score(X_test, y_test)
acc1 = knn.score(X_train, y_train)
#模型预测
pred_label = knn.predict(X_test)
print(f'预测结果:{pred_label}')
print(f'真实结果:{y_test}')
#返回测试单位个数的概率估计。
P_x1 = knn.predict_proba([[0.9,.3]])
print(P_x1)
#返回给定测试数据和标签的平均精度。即模型的测试得分。
accuracy = accuracy_score(pred_label, y_test)
print("k-近邻测试集得分:",acc)
print("k-近邻训练集得分:",acc1)
print(f'模型预测精确度:{accuracy}')
K-近邻算法
最新推荐文章于 2024-11-14 19:06:32 发布