【基于Numpy】实现一个简单的Softmax_Regression神经网络实现手写数字（一）

hjc2020

已于 2022-02-17 19:00:28 修改

阅读量2k

点赞数 1

分类专栏：神经网络算法计算机编程笔记文章标签：神经网络深度学习机器学习

于 2022-01-28 17:16:09 首次发布

本文链接：https://blog.csdn.net/qq_43398777/article/details/122734032

版权

计算机编程同时被 3 个专栏收录

12 篇文章 0 订阅

订阅专栏

神经网络算法

10 篇文章 1 订阅

订阅专栏

笔记

5 篇文章 0 订阅

订阅专栏

简介

完全基于numpy实现一个单层的softmax回归网络，此代码主要用于学习如何求梯度，反向传播。

模型代码

import numpy as np
from ._base_network import _baseNetwork


class SoftmaxRegression(_baseNetwork):
    def __init__(self, input_size=28 * 28, num_classes=10):
        super().__init__(input_size, num_classes)
        self._weight_init()
        self.net = _baseNetwork()
        self.input_size = input_size
        self.num_classes = num_classes

    def _weight_init(self):
        np.random.seed(1024)
        self.weights['W1'] = 0.001 * np.random.randn(self.input_size, self.num_classes)
        self.gradients['W1'] = np.zeros((self.input_size, self.num_classes))

    def forward(self, X, y, mode='train'):
        X = X.reshape(-1, 28 * 28)
        N, C = X.shape
        X1 = np.dot(X, self.weights['W1'])
        X2 = self.net.ReLU(X1)
       

        Y = self.net.softmax(X2)
        loss = self.net.cross_entropy_loss(Y, y)
        accuracy = self.net.compute_accuracy(Y, y)
       
         if mode == 'train':
            gradient = np.dot(X.transpose(1, 0), self.net.cross_entropy_dev(X2, y) * self.net.ReLU_dev(X1)) / N
            self.gradients['W1'] = gradient
            return loss, accuracy


        return loss, accuracy

骨干网络

import numpy as np


class _baseNetwork:
    def __init__(self, input_size=28 * 28, num_classes=10):
        self.input_size = input_size
        self.num_classes = num_classes

        self.weights = dict()
        self.gradients = dict()

    def _weight_init(self):
        pass

    def forward(self):
        pass

    def softmax(self, scores):
        prob = np.zeros(scores.shape)
        pro = []

        for i in range(scores.shape[0]):
            pro.append(np.sum(np.exp(scores[i])))
        for i in range(scores.shape[0]):
            for j in range(scores.shape[1]):
                prob[i,j] = np.exp(scores[i,j])/pro[i]

        return prob

    def cross_entropy_loss(self, x_pred, y):
        yy = []
        for i in range(x_pred.shape[0]):
            y0 = np.zeros(x_pred.shape[1])
            y0[y[i]] = 1
            yy.append(y0)
        yy = np.array(yy)
        loss = 0
        for i in range(x_pred.shape[0]):
            loss += -np.log(x_pred[i,y[i]])
        loss = loss / (x_pred.shape[0])
  
        return loss


    def compute_accuracy(self, x_pred, y):
        right = 0
        for i in range(x_pred.shape[0]):
            if y[i] == np.argmax(x_pred[i]):
                right += 1

        acc = right / x_pred.shape[0]
     
        return acc

    def sigmoid(self, X):
        out = np.zeros(X.shape)
        for i in range(X.shape[0]):
            for j in range(X.shape[1]):
                out[i,j] = 1/(1+np.exp(-X[i,j]))
        return out

    def sigmoid_dev(self, x):
        """
        The analytical derivative of sigmoid function at x
        :param x: Input data
        :return: The derivative of sigmoid function at x
        """
        ds = self.sigmoid(x) * (1 - self.sigmoid(x))
        return ds

    def ReLU(self, X):
        out = np.zeros(X.shape)
        for i in range(X.shape[0]):
            for j in range(X.shape[1]):
                if X[i,j] > 0:
                    out[i, j] = X[i,j]
                else:
                    out[i, j] = 0
        return out

    def ReLU_dev(self, X):
        out = np.zeros(X.shape)
        inr = self.ReLU(X)
        for i in range(X.shape[0]):
            for j in range(X.shape[1]):
                if inr[i,j] > 0:
                    out[i, j] = 1. #inr[i,j]
                else:
                    out[i, j] = 0
        return out



    def cross_entropy_dev(self, x_pred, y):
        yy = []
        for i in range(x_pred.shape[0]):
            y0 = np.zeros(x_pred.shape[1])
            y0[y[i]] = 1
            yy.append(y0)
        yy = np.array(yy)
        out = np.zeros(x_pred.shape)
        sm_out = self.softmax(x_pred)
        out = sm_out - yy
        # 这里注意cross_entropy和softmax是默认连接在一块的，所以梯度也是一块求的，而且这样更简单

        return out

注意事项

前向传播比较简单，反向传播的时候注意，损失对任意偏置也好，权重也好求梯度的时候，梯度的维度是与该权重/偏置是保持一致的！！
因此，一旦维度不一致那么一般有两种方法统一维度：①通过链式法则的前后两个梯度矩阵做矩阵乘法统一维度 ②如果是偏置如维度不一致什么的，比如求出的偏置梯度维度是（128,10），但是实际上，根据(1)，偏置的梯度的维度需要跟偏置的维度保持一致即，应该维度是（128,），所以可以对某一个维度求平均或者求和（如np.mean或者np.sum）将维度统一。
特别注意交叉熵损失+Softmax这一个模块的反向求梯度，一般是默认连在一块求的，具体证明见下，因此不用挨个去求交叉熵模块，或者Softmax模块的梯度，而是直接用公式 gradient of module = yy - sm_out。其中gradient of module就是交叉熵+Softmax的梯度，yy是label的one-hot embedding，sm_out是logits经过Softmax的结果。
交叉熵与Softmax梯度简化求法推导过程