BP神经网络理论学习与全连接层实现手写数字识别

最新推荐文章于 2024-04-19 08:59:17 发布

天津泰达李大脑袋树正气

最新推荐文章于 2024-04-19 08:59:17 发布

阅读量223

点赞数

分类专栏：机器学习文章标签：神经网络学习 python

本文链接：https://blog.csdn.net/qq_50438796/article/details/128796250

版权

机器学习专栏收录该内容

10 篇文章 0 订阅

订阅专栏

在这里插入图片描述

import numpy as np
import torchvision
from torch.utils.data import DataLoader
class BP():
    def __init__(self,nodes_num):
        self.nodes_num = nodes_num  # 各层结点数
        self.layout_nums = len(nodes_num)
        self.weights = [np.random.randn(n,m) for (m,n) in zip(self.nodes_num[:-1],self.nodes_num[1:])]
        self.bias = [np.random.randn(1,n) for n in self.nodes_num[1:]]
        self.before_activation = []
        self.after_activation = []
        self.loss_func = "MSE"
        self.eta = 3

    def sigmoid(self,x):
        return 1.0 / (1.0 + np.exp(-x))

    def sigmoid_grad(self,x):
        return self.sigmoid(x) * (1.0 - self.sigmoid(x))

    def forward(self,x):
        # 前向传播
        value = x
        self.before_activation = []
        # self.after_activation = []
        self.before_activation.append(x)
        # self.after_activation.append(x)
        for (weight,bias) in zip(self.weights,self.bias):
            self.before_activation.append(np.matmul(weight,value.T).T + bias)
            value = self.sigmoid(np.matmul(weight,value.T).T + bias)
            # self.after_activation.append(x)
        return value

    def backwards(self,y):
        # minibatch反向传播
        delta_new_weight = [np.zeros(w.shape) for w in self.weights]
        delta_new_bias = [np.zeros(b.shape) for b in self.bias]
        # y为真实样本
        if self.loss_func == "MSE":
            delta = -(y - self.sigmoid(self.before_activation[-1])) * self.sigmoid_grad(self.before_activation[-1])
            # delta = -(y - self.sigmoid(self.before_activation[-1]))    -->最后一层不激活
            # 更新最后一层权重
            delta_new_weight[-1] = np.matmul(delta.T,self.sigmoid(self.before_activation[-2]))
            delta_new_bias[-1] = delta
            for i in range(2,self.layout_nums):
                # 可以print shape看一下
                # 注意这里下标千万别乱了，self.before_activation的元素个数比self.weights的元素个数多1。
                delta = np.matmul(self.weights[-i + 1].T, delta.T).T * self.sigmoid_grad(self.before_activation[-i])
                delta_new_weight[-i] = np.matmul(delta.T,self.sigmoid(self.before_activation[-i-1]))
                delta_new_bias[-i] = delta
            return delta_new_weight,delta_new_bias

使用torchvision加载数据

myBP = BP([784,30,10])


dataset_train = torchvision.datasets.MNIST(root="./MNIST",train=True,transform=torchvision.transforms.ToTensor(),download=True)
dataset_test = torchvision.datasets.MNIST(root="./MNIST",train=False,transform=torchvision.transforms.ToTensor(),download=True)


def one_hot_label(label):
    mlabel = np.array([[0.0] * 10])
    mlabel[0][label] = 1.0
    return mlabel


train_loader = DataLoader(dataset=dataset_train,batch_size=10,shuffle=True,num_workers=0,drop_last=False)
train_epochs = 30
for epoch in range(train_epochs):
    for minibatch in train_loader:
        batch_delta_new_weight = [np.zeros(w.shape) for w in myBP.weights]
        batch_delta_new_bias = [np.zeros(b.shape) for b in myBP.bias]
        imgs, labels = minibatch
        for i in range(len(imgs)):
            label = one_hot_label(labels[i])
            img = np.array(imgs[i]).reshape(1,784)  # 拉平
            predict_label = myBP.forward(img)   # 前向传播
            delta_new_weight,delta_new_bias = myBP.backwards(label)   # 反向传播
            batch_delta_new_weight = [total+item for total,item in zip(batch_delta_new_weight,delta_new_weight)]
            batch_delta_new_bias = [total+item for total,item in zip(batch_delta_new_bias,delta_new_bias)]
        # 批量梯度更新
        myBP.weights = [w - (myBP.eta / len(imgs)) * batch_new_weight for w,batch_new_weight in zip(myBP.weights,batch_delta_new_weight)]
        myBP.bias = [b - (myBP.eta/len(imgs)) * batch_new_bias for b,batch_new_bias in zip(myBP.bias,batch_delta_new_bias)]
    correct = 0
    for i in range(len(dataset_test)):
        img,label = dataset_test[i]
        img = np.array(img).reshape(1,784)
        predict_label = np.argmax(myBP.forward(img)[0])
        if(predict_label == label):
            correct += 1
    print("Epoch {}: accuracy {}%".format(epoch,(correct / len(dataset_test) * 100)))

这里学习到了minibatch的概念，即以batch_size为一批次更新梯度。所更新的梯度为这批训练数据的平均梯度。
如果batch_size为1，训练速度快，但可能达不到最优解。
这里的损失函数并没有用交叉熵损失函数，而是全连接层输出层的十个结点最后激活后，会落入（0，1）的区间。转换真实值为独热编码，这十个输出值分别对应地去拟合独热编码（这是一个10个元素的向量）。