Mnist分别用两层神经网络和三层神经网络实现(二)

最新推荐文章于 2023-03-24 15:02:22 发布

我就是人工智能

最新推荐文章于 2023-03-24 15:02:22 发布

阅读量479

点赞数 2

分类专栏：深度学习文章标签：神经网络 python numpy

本文链接：https://blog.csdn.net/m0_46387303/article/details/128041464

版权

深度学习专栏收录该内容

2 篇文章 0 订阅

订阅专栏

three_layer_net.py

import sys, os
from common.functions import *
from common.gradient import numerical_gradient

class ThreeLayerNet:

    def __init__(self, input_size, hidden_size1, hidden_size2, output_size,
                 weight_init_std=0.01):
        # 初始化权重
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size1)
        self.params['b1'] = np.zeros(hidden_size1)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size1, hidden_size2)
        self.params['b2'] = np.zeros(hidden_size2)
        self.params['W3'] = weight_init_std * np.random.randn(hidden_size2, output_size)
        self.params['b3'] = np.zeros(output_size)

    def predict(self, x):
        W1, W2, W3 = self.params['W1'], self.params['W2'], self.params['W3']
        b1, b2, b3 = self.params['b1'], self.params['b2'], self.params['b3']

        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        z2 = sigmoid(a2)
        a3 = np.dot(z2, W3) + b3
        y = softmax(a3)

        return y

    # x:输入数据, t:监督数据
    def loss(self, x, t):
        y = self.predict(x)

        return cross_entropy_error(y, t)

    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)

        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy

    # x:输入数据, t:监督数据
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)

        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        grads['W3'] = numerical_gradient(loss_W, self.params['W3'])
        grads['b3'] = numerical_gradient(loss_W, self.params['b3'])

        return grads

    def gradient(self, x, t):  # 梯度
        W1, W2, W3 = self.params['W1'], self.params['W2'], self.params['W3']
        b1, b2, b3 = self.params['b1'], self.params['b2'], self.params['b3']
        grads = {}  # 梯度

        batch_num = x.shape[0]  #

        # forward
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        z2 = sigmoid(a2)
        a3 = np.dot(z2, W3) + b3
        y = softmax(a3)

        # backward
        dy = (y - t) / batch_num  # y：输出结果。t：监督数据

        grads['W3'] = np.dot(z2.T, dy)
        grads['b3'] = np.sum(dy, axis=0)
        da2 = np.dot(dy, W3.T)
        dz2 = sigmoid_grad(a2) * da2

        grads['W2'] = np.dot(z1.T, dz2)
        grads['b2'] = np.sum(dz2, axis=0)
        da1 = np.dot(dz2, W2.T)
        dz1 = sigmoid_grad(a1) * da1

        grads['W1'] = np.dot(x.T, dz1)
        grads['b1'] = np.sum(dz1, axis=0)

        return grads

train_three.py

import numpy as np
from three_layer_net import ThreeLayerNet
from dataset.mnist import load_mnist
import matplotlib.pylab as plt
import pickle

(x_train, t_train), (x_test, t_test) = load_mnist(one_hot_label=True, normalize=True)
train_loss_list = []
train_acc_list = []
test_acc_list = []
epoch = 0

# 超参数
iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.5
network = ThreeLayerNet(784, 50, 100, 10)

# 平均每个epoch的重复次数
iter_per_epoch = max(train_size / batch_size, 1)

for i in range(iters_num):
    # print(i)  #进程监督
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    # 计算梯度
    # grad=network.numerical_gradient(x_batch,t_batch)
    grad = network.gradient(x_batch, t_batch)  # 高速版

    for key in ('W1', 'b1', 'W2', 'b2', 'W3', 'b3'):
        network.params[key] -= learning_rate * grad[key]

    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)

    if i % iter_per_epoch == 0:  # 每经历一个epoch（训练次数整除epoch）
        epoch += 1
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print('epoch: ' + str(epoch) + ' train acc, test acc | ' + str(train_acc) + ', ' + str(test_acc))

# 画识别精度
x = range(len(train_acc_list))
y1 = train_acc_list
y2 = test_acc_list
plt.plot(x, y1, label='train_acc')
plt.plot(x, y2, linestyle='--', label='test_acc')
plt.xlabel('epochs')
plt.ylabel('accurancy')
plt.legend()
plt.show()

# 画损失函数
x = range(iters_num)
y = train_loss_list
plt.plot(x, y)
plt.xlabel('learning time')
plt.ylabel('value of loss function')
plt.show()

# 训练好的权重存起来
file_name = 'sample_weight3.pkl'
with open(file_name, 'wb') as f:
    pickle.dump(network.params, f, -1)