用NumPy搭建简单神经网络

用NumPy搭建简单神经网络

模型代码

公共父类

后面所有类都需要继承该类,并实现其方法。

from abc import abstractmethod
import numpy as np

class Module(object):
    def __init__(self) -> None:
        super(Module, self).__init__()
    
    @abstractmethod
    def forward(self):
        pass
    
    def __call__(self, *args, **kwds):
        return self.forward(*args, **kwds)

全链接层

数学推导:
  • 前向传播公式:

    Y n × q = X n × p ⋅ W p × q + b 1 × q Y_{n\times q}=X_{n\times p} \cdot W_{p\times q} + b_{1\times q} Yn×q=Xn×pWp×q+b1×q

  • 反向传播公式:

    ∂ Y ∂ X n × p = Y ⋅ W T \frac{\partial Y}{\partial X_{n\times p}}=Y\cdot W^T Xn×pY=YWT

    ∂ Y ∂ W p × q = X T ⋅ Y \frac{\partial Y}{\partial W_{p\times q}}=X^T\cdot Y Wp×qY=XTY

    ∂ Y ∂ b 1 × q = 1 1 × q \frac{\partial Y}{\partial b_{1\times q}}=1_{1\times q} b1×qY=11×q

代码实现:
class Linear(Module):
    def __init__(self, in_features, out_features, bias=False) -> None:
        super(Linear, self).__init__()
        self.W = np.random.normal(size=(in_features, out_features))
        self.X = None
        self.bias = None
        if bias:
            self.bias = np.random.normal(size=(out_features))
    
    def forward(self, X):
        """
        X: (batch_size, hidden_size)
        """
        self.X = X
        Y = np.dot(X, self.W)
        if self.bias is not None:
            Y = Y + self.bias
        return Y
    
    def backward(self, delta_Y, lr):
        """
        delta_Y: (batch_size, output_size)
        """
        delta_Y_ = np.dot(delta_Y, self.W.transpose())
        self.W = self.W - np.dot(self.X.transpose(), delta_Y) * lr
        if self.bias is not None:
            self.bias -= np.average(delta_Y, axis=0)
        return delta_Y_

tanh激活函数层

数学推导:
  • 前向传播公式:

    Y n × m = e X n × m − e − X n × m e X n × m + e − X n × m Y_{n\times m}=\frac{e^{X_{n\times m}}-e^{-X_{n\times m}}}{e^{X_{n\times m}}+e^{-X_{n\times m}}} Yn×m=eXn×m+eXn×meXn×meXn×m

  • 反向传播公式:

    ∂ Y ∂ X n × m = 1 − t a n h ( X n × m ) 2 \frac{\partial Y}{\partial X_{n\times m}}=1-\mathrm{tanh}(X_{n\times m})^2 Xn×mY=1tanh(Xn×m)2

代码实现:
class Tanh(Module):
    def __init__(self) -> None:
        super(Tanh, self).__init__()
        self.Y = None

    def forward(self, X):
        self.Y = np.tanh(X)
        return self.Y

    def backward(self, delta_Y):
        return np.multiply((1 - self.Y ** 2), delta_Y)

Softmax层

数学推导:
  • 前向传播公式:

    Y i , j = e X i , j ∑ j = 1 m e X i , j Y_{i,j}=\frac{e^{X_{i,j}}}{\sum_{j=1}^m e^{X_{i,j}}} Yi,j=j=1meXi,jeXi,j

  • 反向传播公式:

    ∂ Y ∂ X i , j = ∑ k ≠ j − Y i , k ⋅ Y i , j + Y i , j ⋅ ( 1 − Y i , j ) \frac{\partial Y}{\partial X_{i,j}}=\sum_{k\ne j}-Y_{i,k}\cdot Y_{i,j}+Y_{i,j}\cdot(1-Y_{i,j}) Xi,jY=k=jYi,kYi,j+Yi,j(1Yi,j)

代码实现:
class Softmax(Module):
    def __init__(self) -> None:
        super(Softmax, self).__init__()
        self.exps = None
        self.exps_sum = None
    
    def forward(self, X):
        """
        X: (batch_size, features)
        """
        C = np.max(X)
        self.exps = np.exp(X - C)
        self.exps_sum = np.sum(self.exps, axis=1).reshape((-1, 1))
        return np.divide(self.exps, self.exps_sum)

    def backward(self, delta_Y):
        """
        delta_Y: (batch_size, features)
        """
        exps_sum_square = self.exps_sum ** 2
        ii_matrix = np.multiply(self.exps, self.exps_sum) / exps_sum_square # (batch_size, features)
        ij_matrix = - np.matmul(self.exps[:, :, np.newaxis], self.exps[:, np.newaxis, :]) / exps_sum_square[:, :, np.newaxis] # (batch_size, features, features)
        ij_Y = np.multiply(delta_Y[:, :, np.newaxis], ij_matrix).sum(axis=1) # (batch_size, features)
        delta_Y = ij_Y + np.multiply(delta_Y, ii_matrix) # (batch_size, features)
        return delta_Y

Log对数函数层

数学推导:
  • 前向传播公式:

    Y i , j = ln ⁡ ( X i , j ) Y_{i,j}=\ln(X_{i,j}) Yi,j=ln(Xi,j)

  • 反向传播公式:

    ∂ Y ∂ X i , j = 1 X i , j \frac{\partial Y}{\partial X_{i,j}}=\frac{1}{X_{i,j}} Xi,jY=Xi,j1

代码实现:
class Log(Module):
    def __init__(self) -> None:
        super(Log, self).__init__()
        self.inf = 1e-10
        self.X = None
    
    def forward(self, X):
        self.X = X
        return np.log(X + self.inf)
    
    def backward(self, delta_Y):
        return np.multiply(1 / (self.X + self.inf), delta_Y)

损失函数层

数学推导:
  • 前向传播公式:

    Y = ∑ i = 1 n ∑ j = 1 m − X i , j T i , j n Y=\frac{\sum_{i=1}^n\sum_{j=1}^m-X_{i,j}T_{i,j}}{n} Y=ni=1nj=1mXi,jTi,j

    其中:

    • X i , j X_{i,j} Xi,j:预测的第 i i i 条数据是标签 j j j 的概率;
    • T i , j T_{i,j} Ti,j:实际的第 i i i 条数据是标签 j j j 的概率【0,1】;
  • 反向传播公式:
    ∂ Y ∂ X i , j = − T i , j n \frac{\partial Y}{\partial X_{i,j}}=-\frac{T_{i,j}}{n} Xi,jY=nTi,j

代码实现:
class NLLloss(Module):
    def __init__(self) -> None:
        super(NLLloss, self).__init__()
        self.target = None
        self.loss = None
    
    def forward(self, Y, target):
        """
        Y: (batch_size, features)
        target: (batch_size)
        """
        self.target = np.ones(shape=Y.shape) * 1e-6
        for i, j in enumerate(target):
            self.target[i, j] = 1
        self.loss = -np.sum(np.multiply(Y, self.target)) / len(target)
        return self.loss
    
    def backward(self):
        return - self.target * self.loss

神经网络结构

模型结构

请添加图片描述

模型代码:

# 模型
linear1 = Linear(in_features=28 * 28, out_features=1024, bias=True)
tanh1 = Tanh()
linear2 = Linear(in_features=1024, out_features=10, bias=True)
softmax = Softmax()
log = Log()
nllloss = NLLloss()

模型实验

实验代码

# 训练参数
epochs = 30
batch_size_train = 64
batch_size_test = 1000
lr = 0.0003

# 模型
linear1 = Linear(in_features=28 * 28, out_features=1024, bias=True)
tanh1 = Tanh()
linear2 = Linear(in_features=1024, out_features=10, bias=True)
softmax = Softmax()
log = Log()
nllloss = NLLloss()

for epoch in range(epochs):
    """ train """
    process_bar = tqdm(range(len(X_train) // batch_size_train), ncols=150)
    for itor in process_bar:
        X = X_train[itor * batch_size_train: itor * batch_size_train + batch_size_train]
        y = y_train[itor * batch_size_train: itor * batch_size_train + batch_size_train]
        """ 前向传播 """
        tmp = X.reshape((batch_size_train, -1))
        tmp = linear1(tmp)
        tmp = tanh1(tmp)
        tmp = linear2(tmp)
        tmp = softmax(tmp)
        pre = np.argmax(tmp, axis=1)

        train_acc = np.sum(y == pre)
        train_total = len(y)
        
        tmp = log(tmp)
        loss = nllloss(tmp, y)
        """ 反向传播 """
        Y = nllloss.backward()
        Y = log.backward(Y)
        Y = softmax.backward(Y)
        Y = linear2.backward(Y, lr)
        Y = tanh1.backward(Y)
        Y = linear1.backward(Y, lr)

        process_bar.set_description('Train epoch:{} '.format(epoch + 1))
        process_bar.set_postfix_str('loss: {:.4f}  Acc:{:.2f}%'.format(
                                    loss, 100. * train_acc / train_total))
    
    """ test """
    test_total = 0
    test_acc = 0
    test_process_bar = tqdm(range(len(X_test) // batch_size_test), ncols=150)
    for itor in test_process_bar:
        X = X_test[itor * batch_size_test: itor * batch_size_test + batch_size_test]
        y = y_test[itor * batch_size_test: itor * batch_size_test + batch_size_test]

        tmp = X.reshape((batch_size_test, -1))
        tmp = linear1(tmp)
        tmp = tanh1(tmp)
        tmp = linear2(tmp)
        Y = softmax(tmp)
        Y = np.argmax(Y, axis=1)
        test_total += len(y)
        test_acc += np.sum(y == Y)
        test_process_bar.set_description('Test epoch:{} '.format(epoch + 1))
        test_process_bar.set_postfix_str('Acc [{}/{} ({:.2f}%)]'.format(
                                        test_acc, test_total, 100. * test_acc/test_total))

实验结果

请添加图片描述

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值