import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import sklearn.model_selection as ms
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams['font.sans-serif'] = ['SimHei']
# 读取本地数据集
data = pd.read_csv('data.csv')
datas = np.array(data)
x = np.array(data[['radius_mean','texture_mean']])
data['diagnosis'] = data['diagnosis'].map({'M': 1, 'B': 0})
y = np.array(data['diagnosis'])
#拆分训练集与测试集
X_train,X_test,y_train,y_test = train_test_split(x, y,test_size = 0.2, random_state = 82)
#基于线性函数的 svm 绘制分类边界
model = SVC(kernel = 'linear')
model.fit(X_train,y_train)
acc = model.score(X_test, y_test)
#模型预测准确率
y_pred1 = model.predict(X_test)
print("支持向量机测试集得分:",acc)
print("支持向量机预测精准度:", accuracy_score(y_pred1, y_test))
#绘制分类边界线
l,r = x[:,0].min() -1, x[:,0].max() + 1
b,t = x[:,1].min() -1, x[:,1].max() + 1
n = 500 #分界线的光滑程度
#从坐标向量中返回坐标矩阵
grid_x,grid_y = np.meshgrid(np.linspace(1,r,n), np.linspace(b, t,n))
#把 grid_x 与 grid_t 押平了组成模型的输入,预测输出
# ravel()方法将数组维度拉成一维数组
mesh_x = np.column_stack((grid_x.ravel(),grid_y.ravel()))
pred_mesh_y = model.predict(mesh_x)
grid_z = pred_mesh_y.reshape(grid_x.shape)
#绘制这些点
plt.pcolormesh(grid_x, grid_y,grid_z,label = 'dff',cmap = 'gray')
plt.scatter(x[y == 0,0], x[y == 0,1], s = 30, c ='r', label = '良性乳腺癌')
plt.scatter(x[y == 1,0], x[y == 1,1], s = 30, c = 'g', label = '恶性乳腺癌')
plt.title('良恶性乳腺癌-支持向量机',fontsize = 16)
plt.xlabel('radius_mean', fontsize = 12)
plt.ylabel('texture_mean', fontsize = 12)
plt.xlim(l, r)
plt.ylim(b, t)
plt.legend()
plt.show()
支持向量机测试集得分: 0.9210526315789473
支持向量机预测精准度: 0.9210526315789473
仅供参考:算法参数自行修改