8月8日Pytorch笔记——Numpy 实现 Backpropagation

最新推荐文章于 2024-08-29 22:28:56 发布

Ashen_0nee

最新推荐文章于 2024-08-29 22:28:56 发布

阅读量229

点赞数

文章标签： numpy pytorch python

本文链接：https://blog.csdn.net/Ashen_0nee/article/details/126223285

版权

该博客介绍了如何使用Numpy从头实现一个多层感知机（MLP），包括前向传播和反向传播算法。在前向传播中，作者定义了sigmoid激活函数，并通过权重和偏置计算输出。在反向传播部分，计算了损失函数，然后逐层反向传播误差，更新权重和偏置。最后，训练函数展示了如何在MNIST数据集上进行训练，并提供了评估功能。

摘要由CSDN通过智能技术生成

前言

本文为8月8日Pytorch笔记，用 Numpy 实现 Backpropagation。

一、Numpy 实现 Backpropagation

代码如下：

import numpy as np
import random

def sigmoid(z):
    return 1. / (1. + np.exp(-z))

def sigmoid_prime(z):
    return sigmoid(z) * (1 - sigmoid(z))

class MLP_np:
    def __init__(self, sizes):
        '''

        :param sizes: [784, 30, 10]
        '''
        self.sizes = sizes
        self.num_layers = len(sizes) - 1

        # sizes: [784, 30, 10]
        # w: [ch_out, ch_in]
        # b: [ch_out]
        self.weights = [np.random.randn(ch2, ch1) for ch1, ch2 in zip(sizes[:-1], sizes[1:])] # [784, 30] [30, 10]
        # z = wx + b [30, 1]
        self.biases = [np.random.randn(ch, 1) for ch in sizes[1:]]

    def forward(self, x):
        '''

        :param x: [784, 1]
        :return: [10, 1]
        '''
        for b, w in zip(self.biases, self.weights):
            # [30, 784] @ [784, 1] ==> [30, 1] + [30, 1] ==> [30, 1]
            z = np.dot(w, x) + b
            # [30, 1]
            x = sigmoid(z)

        return x

    def backward(self, x, y):
        '''

        :param x: [784, 1]
        :param y: [10, 1] ont_hot encoding
        :return:
        '''
        # 1. forward
        # 生成列表保存梯度信息
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        nabla_b = [np.zeros(b.shape) for b in self.biases]

        # 保存每层的激活函数
        activations = [x]
        # 保存每层的 z
        zs = []
        activation = x

        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation) + b
            activation = sigmoid(z)

            zs.append(z)
            activations.append(activation)

        loss = np.power(activations[-1] - y, 2).sum()
        # 2. backward
        # 2.1 计算输出层的梯度
        # [10, 1] * [10, 1] ==> [10, 1]
        delta = activations[-1] * (1 - activations[-1]) * (activations[-1] - y)
        nabla_b[-1] = delta
        # [10, 1] @ [1, 30] ==> [10, 30]
        # acativation: [30, 1]
        nabla_w[-1] = np.dot(delta,  activations[-2].T)

        # 2.2 计算隐藏层的梯度
        for l in range(2, self.num_layers+1):
            l = -l
            z = zs[l]
            a = activations[l]

            # dlta_j
            # [10, 30].T @ [10, 1] ==> [30, 1] * [30, 1]
            delta = np.dot(self.weights[l+1].T, delta) * a * (1-a)

            nabla_b[l] = delta
            # [30, 1] @ [784, 1].T ==> [30, 784]
            nabla_w[l] = np.dot(delta, activations[l-1].T)

        return nabla_w, nabla_b, loss

    def train(self, training_data, epochs, batchsz, lr, test_data):
        '''

        :param training_data: list of (x, y)
        :param epochs: 1000
        :param batchsz: 10
        :param lr: 0.01
        :param test_data: list of (x, y)
        :return:
        '''
        if test_data:
            n_test = len(test_data)

        n = len(training_data)
        for j in range(epochs):
            random.shuffle(training_data)
            mini_batches = [
                training_data[k: k+batchsz]
                for k in range(0, n, batchsz)
            ]
            # for every batch in current batch
            for mini_batch in mini_batches:
                loss = self.update_mini_batch(mini_batch, lr)


            if test_data:
                print("Epoch {0}: {1} / {2}".format(
                    j, self.evaluate(test_data), n_test
                ), loss)
            else:
                print("Epoch {0} complete".format(j))

    def update_mini_batch(self, batch, lr):
        '''

        :param batch: list of (x, y)
        :param lr: 0.01
        :return:
        '''
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        loss = 0

        # for every sample in current batch
        for x, y in batch:
            # list of every w/b gradient
            # [w1, w2, w3]
            nabla_w_, nabla_b_, loss_ = self.backward(x, y)
            nabla_w =[accu+cur for accu, cur in zip(nabla_w, nabla_w_)]
            nabla_b = [accu + cur for accu, cur in zip(nabla_b, nabla_b_)]
            loss += loss_

        nabla_w = [w / len(batch) for w in nabla_w]
        nabla_b = [b / len(batch) for b in nabla_b]
        loss = loss / len(batch)

        # w = w - lr * nabla_w
        self.weights = [w - lr * nabla for w, nabla in zip(self.weights, nabla_w)]
        self.biases = [b - lr * nabla for b, nabla in zip(self.biases, nabla_b)]

        return loss


    def evaluate(self, test_data):
        '''

        :param test_data: list of (x, y)
        :return:
        '''
        result = [(np.argmax(self.forward(x)), y) for x, y in test_data]
        correct = sum(int(pred==y) for pred, y in result)

        return correct


def main():
    import mnist_loader
    # loading MNIST data
    training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
    print(len(training_data), training_data[0][0].shape, training_data[0][1].shape)
    print(len(test_data), test_data[0][0].shape, test_data[0][1].shape)

    # Set up a network with 30 hidden neurons
    net = MLP_np([784, 30, 10])
    # Use SGD to learn from the MNIST training data
    # 1000 epochs, with a mini-batch size of 10, lr=0.1
    net.train(training_data, 1000, 10, 0.1, test_data=test_data)


if __name__ == '__main__':
    main()

>>> Epoch 999: 9436 / 10000 0.0002412281437943844