from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']
data = pd.read_csv('data.csv')
datas = np.array(data)
x = np.array(data[['radius_mean','texture_mean']])
data['diagnosis'] = data['diagnosis'].map({'M': 1, 'B': 0})
y = np.array(data['diagnosis'])
X_train,X_test,y_train,y_test = train_test_split(x, y,test_size = 0.2, random_state = 45)
#引入高斯函数,高斯朴素叶贝斯
from sklearn.naive_bayes import GaussianNB
#实例化
clf = GaussianNB() #括号内的参数自行研究
#训练数据 fit相当于train
clf.fit(X_train,y_train)
acc = clf.score(X_test, y_test)
#输出单个预测结果
pred = clf.predict(X_test)
print(f'预测结果:{pred}')
print(f'真实结果:{y_test}')
print(f'长度 = {pred.size}')
#精确度
from sklearn.metrics import accuracy_score
print("贝叶斯测试集得分:",acc)
print(f'贝叶斯分类器预测精确度:{accuracy_score(pred, y_test)}')
#绘制分类边界线
l,r = x[:,0].min() -1, x[:,0].max() + 1
b,t = x[:,1].min() -1, x[:,1].max() + 1
n = 500 #分界线的光滑程度
#从坐标向量中返回坐标矩阵
grid_x,grid_y = np.meshgrid(np.linspace(l,r,n), np.linspace(b, t,n))
#基于线性函数的-贝叶斯算法
mesh_x = np.column_stack((grid_x.ravel(),grid_y.ravel()))
pred_mesh_y = clf.predict(mesh_x)
grid_z = pred_mesh_y.reshape(grid_x.shape)
#绘制这些点
plt.pcolormesh(grid_x, grid_y,grid_z,label = 'dff',cmap = 'gray')
plt.scatter(x[y == 0,0], x[y == 0,1], s = 30, c ='r', label = '良性乳腺癌')
plt.scatter(x[y == 1,0], x[y == 1,1], s = 30, c = 'g', label = '恶性乳腺癌')
plt.title('良恶性乳腺癌-贝叶斯',fontsize = 16)
plt.xlabel('radius_mean', fontsize = 12)
plt.ylabel('texture_mean', fontsize = 12)
plt.xlim(l, r)
plt.ylim(b, t)
plt.legend()
plt.show()
预测结果:[0 1 1 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0
0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 1 0
0 1 0 1 1 0 1 0 1 0 1 0 1 0 1 1 1 1 0 0 1 1 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0
0 1 1]
真实结果:[0 1 1 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 0 1 0 0 0
0 1 1 1 0 0 1 0 0 0 1 0 0 0 0 1 0 0 1 0 0 1 0 0 1 0 0 0 0 1 0 0 1 1 1 1 0
0 1 0 1 0 0 1 1 1 0 1 0 1 0 1 1 1 0 0 0 1 1 1 0 0 0 0 0 1 1 0 1 1 0 0 0 0
0 1 1]
长度 = 114
贝叶斯测试集得分: 0.8947368421052632
贝叶斯分类器预测精确度:0.8947368421052632
注意:仅供参考