【基于Numpy】实现一个简单的Softmax_Regression神经网络实现手写数字(一)

简介

完全基于numpy实现一个单层的softmax回归网络,此代码主要用于学习如何求梯度,反向传播。

模型代码

import numpy as np
from ._base_network import _baseNetwork


class SoftmaxRegression(_baseNetwork):
    def __init__(self, input_size=28 * 28, num_classes=10):
        super().__init__(input_size, num_classes)
        self._weight_init()
        self.net = _baseNetwork()
        self.input_size = input_size
        self.num_classes = num_classes

    def _weight_init(self):
        np.random.seed(1024)
        self.weights['W1'] = 0.001 * np.random.randn(self.input_size, self.num_classes)
        self.gradients['W1'] = np.zeros((self.input_size, self.num_classes))

    def forward(self, X, y, mode='train'):
        X = X.reshape(-1, 28 * 28)
        N, C = X.shape
        X1 = np.dot(X, self.weights['W1'])
        X2 = self.net.ReLU(X1)
       

        Y = self.net.softmax(X2)
        loss = self.net.cross_entropy_loss(Y, y)
        accuracy = self.net.compute_accuracy(Y, y)
       
         if mode == 'train':
            gradient = np.dot(X.transpose(1, 0), self.net.cross_entropy_dev(X2, y) * self.net.ReLU_dev(X1)) / N
            self.gradients['W1'] = gradient
            return loss, accuracy


        return loss, accuracy

骨干网络

import numpy as np


class _baseNetwork:
    def __init__(self, input_size=28 * 28, num_classes=10):
        self.input_size = input_size
        self.num_classes = num_classes

        self.weights = dict()
        self.gradients = dict()

    def _weight_init(self):
        pass

    def forward(self):
        pass

    def softmax(self, scores):
        prob = np.zeros(scores.shape)
        pro = []

        for i in range(scores.shape[0]):
            pro.append(np.sum(np.exp(scores[i])))
        for i in range(scores.shape[0]):
            for j in range(scores.shape[1]):
                prob[i,j] = np.exp(scores[i,j])/pro[i]

        return prob

    def cross_entropy_loss(self, x_pred, y):
        yy = []
        for i in range(x_pred.shape[0]):
            y0 = np.zeros(x_pred.shape[1])
            y0[y[i]] = 1
            yy.append(y0)
        yy = np.array(yy)
        loss = 0
        for i in range(x_pred.shape[0]):
            loss += -np.log(x_pred[i,y[i]])
        loss = loss / (x_pred.shape[0])
  
        return loss


    def compute_accuracy(self, x_pred, y):
        right = 0
        for i in range(x_pred.shape[0]):
            if y[i] == np.argmax(x_pred[i]):
                right += 1

        acc = right / x_pred.shape[0]
     
        return acc

    def sigmoid(self, X):
        out = np.zeros(X.shape)
        for i in range(X.shape[0]):
            for j in range(X.shape[1]):
                out[i,j] = 1/(1+np.exp(-X[i,j]))
        return out

    def sigmoid_dev(self, x):
        """
        The analytical derivative of sigmoid function at x
        :param x: Input data
        :return: The derivative of sigmoid function at x
        """
        ds = self.sigmoid(x) * (1 - self.sigmoid(x))
        return ds

    def ReLU(self, X):
        out = np.zeros(X.shape)
        for i in range(X.shape[0]):
            for j in range(X.shape[1]):
                if X[i,j] > 0:
                    out[i, j] = X[i,j]
                else:
                    out[i, j] = 0
        return out

    def ReLU_dev(self, X):
        out = np.zeros(X.shape)
        inr = self.ReLU(X)
        for i in range(X.shape[0]):
            for j in range(X.shape[1]):
                if inr[i,j] > 0:
                    out[i, j] = 1. #inr[i,j]
                else:
                    out[i, j] = 0
        return out



    def cross_entropy_dev(self, x_pred, y):
        yy = []
        for i in range(x_pred.shape[0]):
            y0 = np.zeros(x_pred.shape[1])
            y0[y[i]] = 1
            yy.append(y0)
        yy = np.array(yy)
        out = np.zeros(x_pred.shape)
        sm_out = self.softmax(x_pred)
        out = sm_out - yy
        # 这里注意cross_entropy和softmax是默认连接在一块的,所以梯度也是一块求的,而且这样更简单

        return out

注意事项

  1. 前向传播比较简单,反向传播的时候注意,损失对任意偏置也好,权重也好求梯度的时候,梯度的维度是与该权重/偏置是保持一致的!!
  2. 因此,一旦维度不一致那么一般有两种方法统一维度:①通过链式法则的前后两个梯度矩阵做矩阵乘法统一维度 ②如果是偏置如维度不一致什么的,比如求出的偏置梯度维度是(128,10),但是实际上,根据(1),偏置的梯度的维度需要跟偏置的维度保持一致即,应该维度是(128,),所以可以对某一个维度求平均或者求和(如np.mean或者np.sum)将维度统一。
  3. 特别注意交叉熵损失+Softmax这一个模块的反向求梯度,一般是默认连在一块求的,具体证明见下,因此不用挨个去求交叉熵模块,或者Softmax模块的梯度,而是直接用公式 gradient of module = yy - sm_out。其中gradient of module就是交叉熵+Softmax的梯度,yy是label的one-hot embedding,sm_out是logits经过Softmax的结果。
    交叉熵与Softmax梯度简化求法推导过程
  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值