Numpy实现MNIST手写数字识别(BP、MLP)

环境要求:
TF2.0 (仅仅用于加载数据集)
输入格式是一个List (x , y)的格式
输入层为784
隐藏层为30
输出层为10

from tensorflow.keras.datasets import mnist
import numpy as np
import random
import tensorflow


def sigmoid(inX):
    from numpy import exp
    return 1.0 / (1 + exp(-inX))

def dsigmoid(z):
    return sigmoid(z)(1 - sigmoid(z))


class MLP():

    def __init__(self, sizes):
        """
        :param size: [784,30,10]
        """
        self.size = sizes
        self.num_layers = len(sizes) - 1
        # size[784,30,10]
        # w:[输出,输入]
        # b:[输出]
        self.weight = [np.random.randn(ch2,ch1)
                       for ch1,ch2 in zip(sizes[:-1], sizes[1:])]
        # [784,30],[30,10]  z=wxx+b [30,1]
        self.bias = [np.random.rand(s, 1) for s in sizes[1:]]

    def forward(self, x):
        """

        :param x: [784,1]
        :return: [10]
        """
        for b, w in zip(self.bias, self.weight):
            # [30,784]@[784,1]->[30,1]+[30,1]=[30,1]
            z = np.dot(w, x) + b
            x = sigmoid(z)

        return x

    def backprop(self, x, y):
        """

        :param x: [784,1]
        :param y: [10,1]
        :return:
        """
        x=x.reshape(784,1)


        nabla_w = [np.zeros(w.shape) for w in self.weight]
        nabla_b = [np.zeros(b.shape) for b in self.bias]

        # 1.forward
        # 保存每一层的激活参数
        activations = [x]
        # 保存每一层的中间结果z
        zs = []
        activation = x
        for b, w in zip(self.bias, self.weight):
            z = np.dot(w, activation) + b
            activation = sigmoid(z)

            zs.append(z)
            activations.append(activation)

        loss=np.power((activations[-1]-y),2).sum()
        # 2.backward
        # 2.1计算输出层的梯度
        # [10,1] [10,1] ->[10,1]
        delta = activations[-1] * (1 - activations[-1]) * (activations[-1] - y)
        nabla_b[-1] = delta
        # [10,1]@[1,30] -> [10,30]
        # activation:[30,1]
        nabla_w[-1] = np.dot(delta, activations[-2].T)

        # 2.2 compute hidden grendient
        for l in range(2, self.num_layers+1):
            l = -l

            z = zs[l]
            a = activations[l]

            # delta_j
            # [10,30]T @ [10,1]  =>  [30,10] @ [10,1] =>[30,1] *[30,1] =>[30,1]
            delta = np.dot(self.weight[l + 1].T, delta) * a * (1 - a)

            nabla_b[l] = delta
            # [30,1] @ [784,1]T => [30,784]
            nabla_w[l] = np.dot(delta, activations[l - 1].T)

        return nabla_w, nabla_b,loss

    def train(self, training_data, epoches, batchsz, lr, test_data):
        """

        :param training_data: list of (x,y)
        :param epoches: 1000
        :param batchsz: 10
        :param lr: 0.1
        :param test_data: list of (x,y)
        :return:
        """
        n = len(training_data)
        for j in range(epoches):
            random.shuffle(train_data)
            mini_batches = [
                training_data[k:k + batchsz]
                for k in range(0, n, batchsz)]

            # for every batch in current batch
            for mini_batch in mini_batches:
                loss=self.update_mini_batch(mini_batch, lr)
            if test_data:
                n_test = len(test_data)
                print("Epoch {0}:{1}/{2}".format(j, self.evaluate(test_data), n_test),loss)
            else:
                print("Epoch {0} complete".format(j))

    def update_mini_batch(self, batch, lr):
        """

        :param batch: list of (x,y)
        :param lr: 0.01
        :return:
        """
        nabla_w = [np.zeros(w.shape) for w in self.weight]
        nabla_b = [np.zeros(b.shape) for b in self.bias]
        loss=0
        # for every sample in current batch
        for x, y in batch:
            # list of every w/b gradient
            # [w1,w2,w3]
            nabla_w_, nabla_b_,loss_ = self.backprop(x, y)
            nabla_w = [accu + cur for accu, cur in zip(nabla_w, nabla_w_)]
            nabla_b = [accu + cur for accu, cur in zip(nabla_b, nabla_b_)]
            loss+=loss_
        nabla_w = [w / len(batch) for w in nabla_w]
        nabla_b = [b / len(batch) for b in nabla_b]
        loss=loss/len(batch)
        # w = w - lr * nabla_w
        self.weight = [w - lr * nabla for w, nabla in zip(self.weight, nabla_w)]
        self.bias = [b - lr * nabla for b, nabla in zip(self.bias, nabla_b)]

        return loss

    def evaluate(self, test_data):
        """

        :param test_data: list of (x,y)
        :return:
        """
        result = [(np.argmax(self.forward(x.reshape([784,1]))), y)
                  for x, y in test_data]

        correct = sum(int(pred == y) for pred, y, in result)

        return correct


from tensorflow.keras.utils import to_categorical


def convert_to_one_hot(y, C):
    return np.eye(C)[y.reshape(-1)].T


if __name__ == '__main__':
    (train_x, train_y), (test_x, test_y) = mnist.load_data()
    train_data = []
    train_x = train_x.reshape([60000, 784])
    for i in range(train_x.shape[0]):
        # print(convert_to_one_hot(train_y[i],10).shape)
        train_data.append([train_x[i]/255, convert_to_one_hot(train_y[i], 10)])

    test_data = []
    test_x = test_x.reshape([10000, 784])
    for i in range(10000):
        test_data.append([test_x[i]/255, test_y[i]])

    net = MLP([784, 30, 10])
    net.train(train_data, 1000, 10, 0.1, test_data=test_data)

  • 9
    点赞
  • 27
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
好的,下面是利用Numpy实现MNIST手写数字识别的代码示例: ```python import numpy as np import gzip # 加载MNIST数据集 def load_data(): with gzip.open('train-images-idx3-ubyte.gz', 'rb') as f: train_data = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1, 28 * 28) with gzip.open('train-labels-idx1-ubyte.gz', 'rb') as f: train_label = np.frombuffer(f.read(), np.uint8, offset=8) with gzip.open('t10k-images-idx3-ubyte.gz', 'rb') as f: test_data = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1, 28 * 28) with gzip.open('t10k-labels-idx1-ubyte.gz', 'rb') as f: test_label = np.frombuffer(f.read(), np.uint8, offset=8) return train_data, train_label, test_data, test_label # 将标签转换为one-hot向量 def to_one_hot(labels, num_classes): num_labels = labels.shape[0] one_hot = np.zeros((num_labels, num_classes)) one_hot[np.arange(num_labels), labels] = 1 return one_hot # Softmax函数 def softmax(x): exp_x = np.exp(x) return exp_x / np.sum(exp_x, axis=1, keepdims=True) # 损失函数 def cross_entropy_loss(y_pred, y_true): num_samples = y_pred.shape[0] loss = -np.sum(y_true * np.log(y_pred)) / num_samples return loss # 定义模型 class Model: def __init__(self, input_dim, hidden_dim, output_dim): self.input_dim = input_dim self.hidden_dim = hidden_dim self.output_dim = output_dim self.W1 = np.random.randn(input_dim, hidden_dim) self.b1 = np.zeros((1, hidden_dim)) self.W2 = np.random.randn(hidden_dim, output_dim) self.b2 = np.zeros((1, output_dim)) # 前向传播 def forward(self, x): self.z1 = np.dot(x, self.W1) + self.b1 self.a1 = np.tanh(self.z1) self.z2 = np.dot(self.a1, self.W2) + self.b2 self.a2 = softmax(self.z2) return self.a2 # 反向传播 def backward(self, x, y_true, y_pred): delta2 = y_pred - y_true delta1 = np.dot(delta2, self.W2.T) * (1 - np.power(self.a1, 2)) dW2 = np.dot(self.a1.T, delta2) db2 = np.sum(delta2, axis=0, keepdims=True) dW1 = np.dot(x.T, delta1) db1 = np.sum(delta1, axis=0) return dW1, db1, dW2, db2 # 训练模型 def train(self, x_train, y_train, epochs, learning_rate): num_samples, input_dim = x_train.shape num_classes = y_train.shape[1] for epoch in range(epochs): for i in range(num_samples): x = x_train[i:i+1] y_true = y_train[i:i+1] y_pred = self.forward(x) dW1, db1, dW2, db2 = self.backward(x, y_true, y_pred) self.W1 -= learning_rate * dW1 self.b1 -= learning_rate * db1 self.W2 -= learning_rate * dW2 self.b2 -= learning_rate * db2 y_pred_train = self.forward(x_train) train_loss = cross_entropy_loss(y_pred_train, y_train) print('Epoch %d, loss: %f' % (epoch, train_loss)) # 测试模型 def test(self, x_test, y_test): y_pred = self.forward(x_test) test_loss = cross_entropy_loss(y_pred, y_test) accuracy = np.mean(np.argmax(y_pred, axis=1) == np.argmax(y_test, axis=1)) print('Test loss: %f, accuracy: %f' % (test_loss, accuracy)) # 加载数据集 x_train, y_train, x_test, y_test = load_data() # 将标签转换为one-hot向量 y_train = to_one_hot(y_train, 10) y_test = to_one_hot(y_test, 10) # 定义模型 model = Model(input_dim=784, hidden_dim=128, output_dim=10) # 训练模型 model.train(x_train, y_train, epochs=10, learning_rate=0.01) # 测试模型 model.test(x_test, y_test) ``` 这段代码实现了一个单隐藏层的神经网络,用于对MNIST手写数字进行分类。其中,softmax函数用于将神经网络的输出转换为概率分布,cross_entropy_loss函数用于计算损失函数,Model类定义了神经网络的结构和训练过程。在训练过程中,我们通过反向传播算法更新权重和偏置,将模型训练到收敛。最后,我们通过测试集评估模型的性能。
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值