参考文章
【深度学习】神经网络入门(最通俗的理解神经网络)
神经网络分类总结
机器学习算法(八):基于BP神经网络的预测(乳腺癌分类实践)
实现代码
# Step1:库函数导入
# 导入乳腺癌数据集
from sklearn.datasets import load_breast_cancer
# 导入BP模型
from sklearn.neural_network import MLPClassifier
# 导入训练集分割方法
from sklearn.model_selection import train_test_split
# 导入预测指标计算函数和混淆矩阵计算函数
from sklearn.metrics import classification_report, confusion_matrix
# 导入绘图包
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
# Step2:数据读取/载入
# 导入乳腺癌数据集
cancer = load_breast_cancer()
# Step3:数据信息简单查看与可视化
# 查看数据集信息
print('breast_cancer数据集的长度为:',len(cancer))
print('breast_cancer数据集的类型为:',type(cancer))
# 分割数据为训练集和测试集
cancer_data = cancer['data']
print('cancer_data数据维度为:',cancer_data.shape)
cancer_target = cancer['target']
print('cancer_target标签维度为:',cancer_target.shape)
cancer_names = cancer['feature_names']
cancer_desc = cancer['DESCR']
#分为训练集与测试集
cancer_data_train,cancer_data_test = train_test_split(cancer_data,test_size=0.2,random_state=42)#训练集
cancer_target_train,cancer_target_test = train_test_split(cancer_target,test_size=0.2,random_state=42)#测试集
# Step4:利用BP在乳腺癌数据上进行训练和预测
# 建立 BP 模型, 采用Adam优化器,relu非线性映射函数
BP = MLPClassifier(solver='adam',activation = 'relu',max_iter = 1000,alpha = 1e-3,hidden_layer_sizes = (64,32, 32),random_state = 1)
# 进行模型训练
BP.fit(cancer_data_train, cancer_target_train)
# 进行模型预测
predict_train_labels = BP.predict(cancer_data_train)
# 可视化真实数据
fig = plt.figure()
ax = Axes3D(fig, rect=[0, 0, 1, 1], elev=20, azim=20)
ax.scatter(cancer_data_train[:, 0], cancer_data_train[:, 1], cancer_data_train[:, 2], marker='o', c=cancer_target_train)
plt.title('True Label Map')
plt.show()
# 可视化预测数据
fig = plt.figure()
ax = Axes3D(fig, rect=[0, 0, 1, 1], elev=20, azim=20)
ax.scatter(cancer_data_train[:, 0], cancer_data_train[:, 1], cancer_data_train[:, 2], marker='o', c=predict_train_labels)
plt.title('Cancer with BP Model')
plt.show()
# 显示预测分数
print("预测准确率: {:.4f}".format(BP.score(cancer_data_test, cancer_target_test)))
# 进行测试集数据的类别预测
predict_test_labels = BP.predict(cancer_data_test)
print("测试集的真实标签:\n", cancer_target_test)
print("测试集的预测标签:\n", predict_test_labels)
# 进行预测结果指标统计 统计每一类别的预测准确率、召回率、F1分数
print(classification_report(cancer_target_test, predict_test_labels))
# 计算混淆矩阵
confusion_mat = confusion_matrix(cancer_target_test, predict_test_labels)
# 打混淆矩阵
print(confusion_mat)
# 将混淆矩阵以热力图的防线显示
sns.set()
figure, ax = plt.subplots()
# 画热力图
sns.heatmap(confusion_mat, cmap="YlGnBu_r", annot=True, ax=ax)
# 标题
ax.set_title('confusion matrix')
# x轴为预测类别
ax.set_xlabel('predict')
# y轴实际类别
ax.set_ylabel('true')
plt.show()
自己代码
'''
网络模型分类
'''
# Step1:库函数导入
# 导入乳腺癌数据集
from sklearn.datasets import load_breast_cancer
import torch
# 导入训练集分割方法
from sklearn.model_selection import train_test_split
# 导入预测指标计算函数和混淆矩阵计算函数
from sklearn.metrics import classification_report, confusion_matrix
# 导入绘图包
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
matplotlib.rc("font", family='kaiti')
# Step2:数据读取/载入
# 导入乳腺癌数据集
cancer = load_breast_cancer()
# Step3:数据信息简单查看与可视化
# 查看数据集信息
print('breast_cancer数据集的长度为:',len(cancer))
print('breast_cancer数据集的类型为:',type(cancer))
# 分割数据为训练集和测试集
cancer_data = cancer['data']
print('cancer_data数据维度为:',cancer_data.shape)
cancer_target = cancer['target']
print('cancer_target标签维度为:',cancer_target.shape)
cancer_names = cancer['feature_names']
# print(cancer_names)
cancer_desc = cancer['DESCR']
# print(cancer_desc)
x = load_breast_cancer()['data']
y = load_breast_cancer()['target']
# 划分数据集和训练集
x_train,x_test,y_train,y_test = train_test_split(torch.FloatTensor(x), torch.LongTensor(y), test_size=0.2, random_state=42)
# 建立 模型
net=torch.nn.Sequential(
torch.nn.Linear(30, 50),
torch.nn.Sigmoid(),
torch.nn.Linear(50, 50),
torch.nn.Sigmoid(),
torch.nn.Linear(50, 2),
)
optimizer=torch.optim.SGD(net.parameters(),lr=0.05)
loss_func=torch.nn.CrossEntropyLoss()
# 4. 训练数据
# 进行模型训练
for t in range(500):
out = net(x_train) # 输入input,输出out
loss = loss_func(out, y_train) # 输出与label对比
optimizer.zero_grad() # 梯度清零
loss.backward() # 前馈操作
optimizer.step() # 使用梯度优化器
# 进行模型预测
out = net(x_train) #out是一个计算矩阵,可以用Fun.softmax(out)转化为概率矩阵
pred = torch.max(out, 1)[1] # 返回index 0返回原值
target_train = y_train.data.numpy()
pred_train = pred.data.numpy()
# 可视化真实数据
fig = plt.figure()
ax = Axes3D(fig, rect=(0, 0, 1, 1), elev=20, azim=20)
ax.scatter(x_train[:, 0], x_train[:, 1], x_train[:, 2], marker='o', c=target_train)
plt.title('True Label Map')
plt.show()
# 可视化预测数据
fig = plt.figure()
ax = Axes3D(fig, rect=(0, 0, 1, 1), elev=20, azim=20)
ax.scatter(x_train[:, 0], x_train[:, 1], x_train[:, 2], marker='o', c=pred_train)
plt.title('Cancer with BP Model')
plt.show()
# 5. 预测结果
out = net(x_test) #out是一个计算矩阵,可以用Fun.softmax(out)转化为概率矩阵
prediction = torch.max(out, 1)[1] # 返回index 0返回原值
target_test = y_test.data.numpy()
pred_test = prediction.data.numpy()
# 显示预测分数
accuracy = float((pred_test == target_test).astype(int).sum()) / float(target_test.size)
print("准确率",accuracy)
# 进行测试集数据的类别预测
print("测试集的真实标签:\n",target_test)
print("测试集的预测标签:\n",pred_test)
# 进行预测结果指标统计 统计每一类别的预测准确率、召回率、F1分数
print(classification_report(target_test, pred_test))
# 计算混淆矩阵
confusion_mat = confusion_matrix(target_test, pred_test)
# 打混淆矩阵
print(confusion_mat)
# 将混淆矩阵以热力图的防线显示
sns.set()
figure, ax = plt.subplots()
# 画热力图
sns.heatmap(confusion_mat, cmap="YlGnBu_r", annot=True, ax=ax)
# 标题
ax.set_title('confusion matrix')
# x轴为预测类别
ax.set_xlabel('predict')
# y轴实际类别
ax.set_ylabel('true')
plt.show()