先上代码:
from matplotlib import pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
import numpy as np
import seaborn as sns
cancers = load_breast_cancer()
def sign(w, x, b):
return np.sign(np.dot(w, x) + b)
def train1(feature, label, w, b, num, learning_rate):
for j in range(num):
count = feature.shape[0]
for i in range(feature.shape[0]):
print(i)
if sign(w, feature[i], b) * label[i] <= 0: # <0定义为分类错误,>0定义为分类正确
print("分类错误!误分类点为:", feature[i])
w = w + learning_rate * label[i] * feature[i]
b = b + learning_rate * label[i]
else:
count -= 1
w = np.around(w, 2) # 设置小数点保留位数
b = np.around(b, 2)
print("最终权重 w:", w, "最终偏置 b:", b, "分类错误个数", count)
return w, b
def heatmap(cancers):
x = cancers.data # 获取特征值
y = cancers.target # 获取标签
learning_rate = 0.1 # 学习率
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
# 之前是0和1的时候会出问题,在更新权重那边
for i in range(len(y_test)):
if y_test[i] == 0:
y_test[i] = -1
for k in range(len(y_train)):
if y_train[k] == 0:
y_train[k] = -1
print(y_train)
weight = np.zeros(x.shape[1])
bias = 0
w, b = train1(x_train, y_train, weight, bias, 100, learning_rate)
y_testhat = np.sign(np.dot(x_test, w) + b)
result = confusion_matrix(y_test, y_testhat)
sns.set(font_scale=1.5)
# 热力图主要展示的是二维数据的数据关系
sns.heatmap(data=result, square=True)
plt.xlabel('Predict label', fontsize=18)
plt.ylabel('True label', fontsize=18)
plt.show() # 显示热度图
heatmap(cancers)
代码解释:
1.在train1函数中原本运行终止结果应该是没有分类错误样本,但之前运行了很久也没有成功跑出来,就设置了一定的迭代次数。
2.在获取标签后将0换成了-1,否则分类错误时的样本在更新时一直是加0(即label[i] * feature[i])
运行结果: