深度学习入门 (八)：完整地实现全连接层并进行手写数字识别

最新推荐文章于 2023-09-24 21:07:11 发布

连理o

最新推荐文章于 2023-09-24 21:07:11 发布

阅读量868

点赞数 2

分类专栏：深度学习文章标签：神经网络深度学习

本文链接：https://blog.csdn.net/weixin_42437114/article/details/106466153

版权

深度学习专栏收录该内容

27 篇文章 18 订阅

订阅专栏

本文为《深度学习入门 – 基于 Python 的理论与实现》的读书笔记

全连接多层神经网络类的实现

import sys
file_path = __file__.replace('\\', '/')
dir_path = file_path[: file_path.rfind('/')] # 当前文件夹的路径
pardir_path = dir_path[: dir_path.rfind('/')]
sys.path.append(pardir_path) # 添加上上级目录到python模块搜索路径

import numpy as np
from func.gradient import numerical_gradient, gradient_check
from layer.common import *
from collections import OrderedDict
import os
import pickle

class MultiLayerNet:
    """
    Parameters
    ----------
    input_size : 输入大小（MNIST的情况下为784）
    hidden_size_list : 隐藏层的神经元数量的列表（e.g. [100, 100, 100]）
    output_size : 输出大小（MNIST的情况下为10）
    activation : 'relu' or 'sigmoid'
    weight_init_std : 指定权重的标准差（e.g. 0.01）
        指定'relu'或'he'的情况下设定“He的初始值”
        指定'sigmoid'或'xavier'的情况下设定“Xavier的初始值”
    weight_decay_lambda : Weight Decay（L2范数）的强度 权值衰减抑制过拟合
    use_dropout: 是否使用Dropout，如果使用的话则在每个全连接层(除了最后一层)的激活层之后都插入Dropout层
    dropout_ration : Dropout的比例(暂不支持对每个dropout层采用不同的dropout_ration)
    use_batchNorm: 是否使用Batch Normalization，如果使用的话则每个全连接层(除了最后一层)后都跟一个BN层后再接激活函数层
    """
    def __init__(self, input_size, hidden_size_list, output_size,
                 activation='relu', weight_init_std='relu', weight_decay_lambda=0, 
                 use_dropout=False, dropout_ration=0.5, use_batchnorm=False, 
                 pretrain_flag=True, pkl_file_name=None):
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size_list = hidden_size_list
        self.hidden_layer_num = len(hidden_size_list)
        self.use_dropout = use_dropout
        self.weight_decay_lambda = weight_decay_lambda
        self.use_batchnorm = use_batchnorm
        self.pkl_file_name = pkl_file_name
        self.params = {}

        if pretrain_flag == 1 and os.path.exists(self.pkl_file_name):
            self.load_pretrain_model()
        else:
            # 初始化权重
            self.__init_weight(weight_init_std)

            # 生成层
            activation_layer = {'sigmoid': Sigmoid, 'relu': Relu}
            self.layers = OrderedDict()
            for idx in range(1, self.hidden_layer_num + 1):
                self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)],
                                                        self.params['b' + str(idx)])
                if self.use_batchnorm:
                    self.params['gamma' + str(idx)] = np.ones(hidden_size_list[idx-1])
                    self.params['beta' + str(idx)] = np.zeros(hidden_size_list[idx-1])
                    self.layers['BatchNorm' + str(idx)] = BatchNormalization(self.params['gamma' + str(idx)], self.params['beta' + str(idx)])
                    
                self.layers['Activation_function' + str(idx)] = activation_layer[activation]()
                
                if self.use_dropout:
                    self.layers['Dropout' + str(idx)] = Dropout(dropout_ration)

            # 输出层
            idx = self.hidden_layer_num + 1
            self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)])

            self.last_layer = SoftmaxWithLoss()

    def load_pretrain_model(self):
        with open(self.pkl_file_name, 'rb') as f:
            model = pickle.load(f)
            for key in ('params', 'layers', 'last_layer'):
                exec('self.' + key + '=model.' + key)
            print('params loaded!')

    def __init_weight(self, weight_init_std):
        """设定权重的初始值

        Parameters
        ----------
        weight_init_std : 指定权重的标准差（e.g. 0.01）
            指定'relu'或'he'的情况下设定“He的初始值”
            指定'sigmoid'或'xavier'的情况下设定“Xavier的初始值”
        """
        all_size_list = [self.input_size] + self.hidden_size_list + [self.output_size]
        for idx in range(1, len(all_size_list)):
            scale = weight_init_std
            if str(weight_init_std).lower() in ('relu', 'he'):
                scale = np.sqrt(2.0 / all_size_list[idx - 1])  # 使用ReLU的情况下推荐的初始值
            elif str(weight_init_std).lower() in ('sigmoid', 'xavier'):
                scale = np.sqrt(1.0 / all_size_list[idx - 1])  # 使用sigmoid的情况下推荐的初始值
            self.params['W' + str(idx)] = scale * np.random.randn(all_size_list[idx-1], all_size_list[idx])
            self.params['b' + str(idx)] = np.zeros(all_size_list[idx])

    def predict(self, x, train_flg=False):
        for key, layer in self.layers.items():
            if "Dropout" in key or "BatchNorm" in key:
                x = layer.forward(x, train_flg)
            else:
                x = layer.forward(x)

        return x

    def loss(self, x, t, train_flg=False):
        y = self.predict(x, train_flg)

        weight_decay = 0
        for idx in range(1, self.hidden_layer_num + 2): # 对隐藏层以及输出层进行权值衰减
            W = self.params['W' + str(idx)]
            weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2)

        return self.last_layer.forward(y, t) + weight_decay

    def accuracy(self, X, T):
        Y = self.predict(X, train_flg=False)
        Y = np.argmax(Y, axis=1)
        if T.ndim != 1: 
            T = np.argmax(T, axis=1)

        accuracy = np.sum(Y == T) / float(X.shape[0])
        return accuracy

    def numerical_gradient(self, X, T):
        """求梯度（数值微分）
        Returns
        -------
        具有各层的梯度的字典变量
            grads['W1']、grads['W2']、...是各层的权重
            grads['b1']、grads['b2']、...是各层的偏置
        """
        loss_W = lambda W: self.loss(X, T, train_flg=True)

        grads = {}
        for idx in range(1, self.hidden_layer_num+2):
            grads['W' + str(idx)] = numerical_gradient(loss_W, self.params['W' + str(idx)])
            grads['b' + str(idx)] = numerical_gradient(loss_W, self.params['b' + str(idx)])
            
            if self.use_batchnorm and idx != self.hidden_layer_num+1:
                grads['gamma' + str(idx)] = numerical_gradient(loss_W, self.params['gamma' + str(idx)])
                grads['beta' + str(idx)] = numerical_gradient(loss_W, self.params['beta' + str(idx)])

        return grads
        
    def gradient(self, x, t):
        # forward
        self.loss(x, t, train_flg=True)

        # backward
        dout = 1
        dout = self.last_layer.backward(dout)
        for layer_name in reversed(self.layers):
            dout = self.layers[layer_name].backward(dout)

        # 设定
        grads = {}
        for idx in range(1, self.hidden_layer_num+2):
            grads['W' + str(idx)] = self.layers['Affine' + str(idx)].dW + self.weight_decay_lambda * self.params['W' + str(idx)]
            grads['b' + str(idx)] = self.layers['Affine' + str(idx)].db

            if self.use_batchnorm and idx != self.hidden_layer_num+1:
                grads['gamma' + str(idx)] = self.layers['BatchNorm' + str(idx)].dgamma
                grads['beta' + str(idx)] = self.layers['BatchNorm' + str(idx)].dbeta

        return grads

训练类的实现

import sys
file_path = __file__.replace('\\', '/')
dir_path = file_path[: file_path.rfind('/')] # 当前文件夹的路径
pardir_path = dir_path[: dir_path.rfind('/')]
sys.path.append(pardir_path) # 添加上上级目录到python模块搜索路径

import numpy as np
from optimizer.optimizer import *
import pickle, shelve
import os
import matplotlib.pyplot as plt

class Trainer:
    """
    进行神经网络的训练的类
    evaluate_sample_num_per_epoch为每个epoch结束后计算模型在训练集与测试集上精度时要测试的样本数量，默认为测试全部样本
    """
    def __init__(self, network, x_train, t_train, x_test, t_test,
                 epochs=20, mini_batch_size=100, optimizer='SGD', optimizer_param={'lr':0.01},
                 save_model_flag=True, pkl_file_name=None, plot_flag=True, fig_name=None,
                 evaluate_sample_num_per_epoch=None, verbose=True):
        self.network = network
        self.verbose = verbose
        self.x_train = x_train
        self.t_train = t_train
        self.x_test = x_test
        self.t_test = t_test
        self.epochs = epochs
        self.save_model_flag = save_model_flag
        self.pkl_file_name = pkl_file_name
        self.plot_flag = plot_flag
        self.fig_name = fig_name
        self.best_loss = 1e10 # 模型在训练集上的最好表现
        self.batch_size = mini_batch_size
        self.evaluate_sample_num_per_epoch = evaluate_sample_num_per_epoch

        # optimzer
        optimizer_class_dict = {'sgd':SGD, 'momentum':Momentum, 'nesterov':Nesterov,
                                'adagrad':AdaGrad, 'rmsprpo':RMSprop, 'adadelta':AdaDelta, 'adam':Adam}
        self.optimizer = optimizer_class_dict[optimizer.lower()](**optimizer_param)
        
        self.train_size = x_train.shape[0]
        self.iter_per_epoch = max(self.train_size / mini_batch_size, 1)
        self.max_iter = int(epochs * self.iter_per_epoch)
        self.current_iter = 0
        self.current_epoch = 0
        
        self.train_loss_list = []
        self.train_acc_list = []
        self.test_acc_list = []

    def train_step(self):
        batch_mask = np.random.choice(self.train_size, self.batch_size)
        x_batch = self.x_train[batch_mask]
        t_batch = self.t_train[batch_mask]
        
        grads = self.network.gradient(x_batch, t_batch)
        self.optimizer.update(self.network.params, grads)
        
        loss = self.network.loss(x_batch, t_batch)
        self.train_loss_list.append(loss)
        if self.verbose: 
            print("train loss:" + str(loss))
        
        if self.current_iter % self.iter_per_epoch == 0 or self.current_iter == self.max_iter - 1:
            x_train_sample, t_train_sample = self.x_train, self.t_train
            x_test_sample, t_test_sample = self.x_test, self.t_test
            if not self.evaluate_sample_num_per_epoch is None:
                t = self.evaluate_sample_num_per_epoch
                x_train_sample, t_train_sample = self.x_train[:t], self.t_train[:t]
                x_test_sample, t_test_sample = self.x_test[:t], self.t_test[:t]
                
            train_acc = self.network.accuracy(x_train_sample, t_train_sample)
            test_acc = self.network.accuracy(x_test_sample, t_test_sample)
            self.train_acc_list.append(train_acc)
            self.test_acc_list.append(test_acc)

            if self.verbose: 
                print("=== epoch:" + str(self.current_epoch) + ", train acc:" + str(train_acc) + ", test acc:" + str(test_acc) + " ===")
            
            if self.save_model_flag == True and self.current_epoch > 0:
                current_loss = self.network.loss(x_test_sample, t_test_sample)
                if self.best_loss > current_loss:
                    self.best_loss = current_loss
                    with open(self.pkl_file_name, 'wb') as f:
                        pickle.dump(self.network, f)
                        if self.verbose: 
                            print('net params saved!')

            self.current_epoch += 1

        self.current_iter += 1

    def plot_acc_loss_list(self):
        fig, axes = plt.subplots(1, 2)

        x = np.arange(len(self.train_acc_list))
        axes[0].plot(x, self.train_acc_list, 'r', label='train acc')
        axes[0].plot(x, self.test_acc_list, 'g--', label='test acc')
        
        axes[0].set_xlabel("epochs")
        axes[0].set_ylabel("accuracy")
        axes[0].set_ylim(0, 1.0)
        axes[0].legend(loc='best')

        x = np.arange(len(self.train_loss_list))
        axes[1].plot(x, self.train_loss_list, 'r', label='train loss')
        axes[1].set_xlabel("iters")
        axes[1].set_ylabel("loss")
        axes[1].legend(loc='best')

        plt.savefig(self.fig_name)
        print('fig {0} saved!'.format(self.fig_name))

    def train(self):
        for i in range(self.max_iter):
            self.train_step()

        test_acc = self.network.accuracy(self.x_test, self.t_test)

        if self.verbose:
            print("=============== Final Test Accuracy ===============")
            print("test acc:" + str(test_acc))

        if self.plot_flag:
            self.plot_acc_loss_list()

MNIST 数据集

数据集简介

这里使用的数据集是 MNIST 手写数字图像集。MNIST 是机器学习领域最有名的数据集之一，被应用于从简单的实验到发表的论文研究等各种场合。MNIST数据集是由 0 到 9 的数字图像构成的。训练图像有 6 万张，测试图像有 1 万张
MNIST 的图像数据是 $28$ 像素 $\times$ $28$ 像素的灰度图像（1 通道），各个像素的取值在 0 到 255 之间。每个图像数据都相应地标有 “7” “2” “1” 等标签。数据集中的每张图片都事先经过了大小归一化和居中处理，因此需注意用该数据集训练出的网络在预测手写数字时图片也须经过大小归一化和居中处理

数据集下载及预处理

数据集下载地址：http://yann.lecun.com/exdb/mnist/, 需要下载其中的 4 个文件：
将下载下的文件与下面读取数据集的代码放在同一个文件夹下即可

import gzip
import pickle
import numpy as np
import os
from PIL import Image

IMG_SIZE = 784 # 28*28

key_file = {
    'train_img':'train-images-idx3-ubyte.gz.gz',
    'train_label':'train-labels-idx1-ubyte.gz',
    'test_img':'t10k-images-idx3-ubyte.gz.gz',
    'test_label':'t10k-labels-idx1-ubyte.gz'
}

file_path = __file__.replace('\\', '/')
dataset_path = file_path[: file_path.rfind('/')] # 当前文件夹的路径
save_file = dataset_path + "/mnist.pkl"

def _load_label(file_name):
    file_path = dataset_path + '/' + file_name
    
    with gzip.open(file_path, 'rb') as f:
        labels = np.frombuffer(f.read(), np.uint8, offset=8)
    print(file_name, "loaded")
    
    return labels

def _load_img(file_name):
    file_path = dataset_path + '/' + file_name
    
    with gzip.open(file_path, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=16)
    data = data.reshape(-1, IMG_SIZE)
    print(file_name, "loaded")
    
    return data

def _convert_numpy():
    dataset = {}
    dataset['train_img'] =  _load_img(key_file['train_img'])
    dataset['train_label'] = _load_label(key_file['train_label'])    
    dataset['test_img'] = _load_img(key_file['test_img'])
    dataset['test_label'] = _load_label(key_file['test_label'])
    
    return dataset

def init_mnist():
    dataset = _convert_numpy()
    with open(save_file, 'wb') as f:
        pickle.dump(dataset, f, -1)
    print("Done!")

def _change_one_hot_label(x):
    t = np.zeros((x.size, 10))
    for idx, row in enumerate(t):
        row[x[idx]] = 1
        
    return t

def shuffle_dataset(x, t):
    permutation = np.random.permutation(x.shape[0])
    x = x[permutation, :] if x.ndim == 2 else x[permutation,:,:,:]
    t = t[permutation]

    return x, t

def load_mnist(normalize=True, flatten=False, one_hot_label=True, shuffle_data=True):
    """读入MNIST数据集
    
    Parameters
    ----------
    normalize : 将图像的像素值正规化为0.0~1.0
    one_hot_label : 
        one_hot_label为True的情况下，标签作为one-hot数组返回
        one-hot数组是指[0,0,1,0,0,0,0,0,0,0]这样的数组
    flatten : 是否将图像展开为一维数组
    shuffle_data : 是否打乱训练集
    
    Returns
    -------
    (训练图像, 训练标签), (测试图像, 测试标签)
    """
    if not os.path.exists(save_file):
        init_mnist()
        
    with open(save_file, 'rb') as f:
        dataset = pickle.load(f)
    
    if normalize:
        for key in ('train_img', 'test_img'):
            dataset[key] = dataset[key].astype(np.float32)
            dataset[key] /= 255.0
            
    if one_hot_label:
        dataset['train_label'] = _change_one_hot_label(dataset['train_label'])
        dataset['test_label'] = _change_one_hot_label(dataset['test_label'])
    
    if not flatten:
         for key in ('train_img', 'test_img'):
            dataset[key] = dataset[key].reshape(-1, 1, 28, 28)

    if shuffle_data:
        dataset['train_img'], dataset['train_label'] = shuffle_dataset(dataset['train_img'], dataset['train_label'])

    return (dataset['train_img'], dataset['train_label']), (dataset['test_img'], dataset['test_label']) 

def img_show(img):
    pil_img = Image.fromarray(np.uint8(img))
    pil_img.show()

if __name__ == '__main__':
    (x_train, t_train),  (x_test, t_test) = load_mnist(normalize=False)
    print(x_train.shape, t_train.shape, x_test.shape, t_test.shape)
    img = x_train[0]
    label = t_train[0]
    print(label) 
    img_show(img)

第一次运行的代码输出(第一次运行会将压缩文件中的内容转换成numpy的ndarray类型后存储到.pkl文件中，之后运行就只需要读取.pkl文件即可)：

train-images-idx3-ubyte.gz.gz loaded
train-labels-idx1-ubyte.gz loaded
t10k-images-idx3-ubyte.gz.gz loaded
t10k-labels-idx1-ubyte.gz loaded
Done!
(60000, 1, 28, 28) (60000, 10) (10000, 1, 28, 28) (10000, 10)

图像可视化使用PIL(Python Image Library)模块：

from PIL import Image

def img_show(img):
    pil_img = Image.fromarray(np.uint8(img))
    pil_img.show()

if __name__ == '__main__':
    (x_train, t_train),  (x_test, t_test) = load_mnist(normalize=False)
    print(x_train.shape, t_train.shape, x_test.shape, t_test.shape)
    img = x_train[0]
    label = t_train[0]
    print(label) 
    img_show(img)

图像输出：

训练神经网络并进行预测

if __name__ == '__main__':
    from dataset.mnist import load_mnist
    from trainer.trainer import Trainer

    (x_train, t_train),  (x_test, t_test) = load_mnist(normalize=True, flatten=True, one_hot_label=True, shuffle_data=True)

    # setting
    train_flag = 1 # 进行训练还是预测
    gradcheck_flag = 0 # 对已训练的网络进行梯度检验
    
    pkl_file_name = dir_path + '/multi_layer_net.pkl'
    fig_name = dir_path + '/multi_layer_net.png'

    net = MultiLayerNet(784, [100, 100, 100, 100, 100, 100, 100], 10,
                 activation='relu', weight_init_std='relu', weight_decay_lambda=0, 
                 use_dropout=False, dropout_ration=0.5, use_batchnorm=True, 
                 pretrain_flag=False, pkl_file_name=pkl_file_name)

    trainer = Trainer(net, x_train, t_train, x_test, t_test,
                 epochs=20, mini_batch_size=100,
                 optimizer='SGD', optimizer_param={'lr':0.01}, 
                 save_model_flag=True, pkl_file_name=pkl_file_name, plot_flag=True, fig_name=fig_name,
                 evaluate_sample_num_per_epoch=None, verbose=True)

    if gradcheck_flag == 1:
        # net.load_pretrain_model()  
        gradient_check(net, x_train[0].reshape(1,-1), t_train[0].reshape(1,-1))

    if train_flag:
        trainer.train()
    else:           
        acc = net.accuracy(x_train, t_train)
        print('accuracy:', acc)

将神经网络设置为 7 个隐藏层，每个隐藏层 100 个神经元，并且使用 Batch Norm

=============== Final Test Accuracy ===============
test acc:0.9689

在训练 20 个 epoch 后，训练精度提高到了 0.9689. 同时也可以看到网络出现了过拟合现象

连理o

关注

2
点赞
踩
5

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

专栏目录