- 本文为《深度学习入门 – 基于 Python 的理论与实现》的读书笔记
全连接多层神经网络类的实现
import sys
file_path = __file__.replace('\\', '/')
dir_path = file_path[: file_path.rfind('/')] # 当前文件夹的路径
pardir_path = dir_path[: dir_path.rfind('/')]
sys.path.append(pardir_path) # 添加上上级目录到python模块搜索路径
import numpy as np
from func.gradient import numerical_gradient, gradient_check
from layer.common import *
from collections import OrderedDict
import os
import pickle
class MultiLayerNet:
"""
Parameters
----------
input_size : 输入大小(MNIST的情况下为784)
hidden_size_list : 隐藏层的神经元数量的列表(e.g. [100, 100, 100])
output_size : 输出大小(MNIST的情况下为10)
activation : 'relu' or 'sigmoid'
weight_init_std : 指定权重的标准差(e.g. 0.01)
指定'relu'或'he'的情况下设定“He的初始值”
指定'sigmoid'或'xavier'的情况下设定“Xavier的初始值”
weight_decay_lambda : Weight Decay(L2范数)的强度 权值衰减抑制过拟合
use_dropout: 是否使用Dropout,如果使用的话则在每个全连接层(除了最后一层)的激活层之后都插入Dropout层
dropout_ration : Dropout的比例(暂不支持对每个dropout层采用不同的dropout_ration)
use_batchNorm: 是否使用Batch Normalization,如果使用的话则每个全连接层(除了最后一层)后都跟一个BN层后再接激活函数层
"""
def __init__(self, input_size, hidden_size_list, output_size,
activation='relu', weight_init_std='relu', weight_decay_lambda=0,
use_dropout=False, dropout_ration=0.5, use_batchnorm=False,
pretrain_flag=True, pkl_file_name=None):
self.input_size = input_size
self.output_size = output_size
self.hidden_size_list = hidden_size_list
self.hidden_layer_num = len(hidden_size_list)
self.use_dropout = use_dropout
self.weight_decay_lambda = weight_decay_lambda
self.use_batchnorm = use_batchnorm
self.pkl_file_name = pkl_file_name
self.params = {}
if pretrain_flag == 1 and os.path.exists(self.pkl_file_name):
self.load_pretrain_model()
else:
# 初始化权重
self.__init_weight(weight_init_std)
# 生成层
activation_layer = {'sigmoid': Sigmoid, 'relu': Relu}
self.layers = OrderedDict()
for idx in range(1, self.hidden_layer_num + 1):
self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)],
self.params['b' + str(idx)])
if self.use_batchnorm:
self.params['gamma' + str(idx)] = np.ones(hidden_size_list[idx-1])
self.params['beta' + str(idx)] = np.zeros(hidden_size_list[idx-1])
self.layers['BatchNorm' + str(idx)] = BatchNormalization(self.params['gamma' + str(idx)], self.params['beta' + str(idx)])
self.layers['Activation_function' + str(idx)] = activation_layer[activation]()
if self.use_dropout:
self.layers['Dropout' + str(idx)] = Dropout(dropout_ration)
# 输出层
idx = self.hidden_layer_num + 1
self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)])
self.last_layer = SoftmaxWithLoss()
def load_pretrain_model(self):
with open(self.pkl_file_name, 'rb') as f:
model = pickle.load(f)
for key in ('params', 'layers', 'last_layer'):
exec('self.' + key + '=model.' + key)
print('params loaded!')
def __init_weight(self, weight_init_std):
"""设定权重的初始值
Parameters
----------
weight_init_std : 指定权重的标准差(e.g. 0.01)
指定'relu'或'he'的情况下设定“He的初始值”
指定'sigmoid'或'xavier'的情况下设定“Xavier的初始值”
"""
all_size_list = [self.input_size] + self.hidden_size_list + [self.output_size]
for idx in range(1, len(all_size_list)):
scale = weight_init_std
if str(weight_init_std).lower() in ('relu', 'he'):
scale = np.sqrt(2.0 / all_size_list[idx - 1]) # 使用ReLU的情况下推荐的初始值
elif str(weight_init_std).lower() in ('sigmoid', 'xavier'):
scale = np.sqrt(1.0 / all_size_list[idx - 1]) # 使用sigmoid的情况下推荐的初始值
self.params['W' + str(idx)] = scale * np.random.randn(all_size_list[idx-1], all_size_list[idx])
self.params['b' + str(idx)] = np.zeros(all_size_list[idx])
def predict(self, x, train_flg=False):
for key, layer in self.layers.items():
if "Dropout" in key or "BatchNorm" in key:
x = layer.forward(x, train_flg)
else:
x = layer.forward(x)
return x
def loss(self, x, t, train_flg=False):
y = self.predict(x, train_flg)
weight_decay = 0
for idx in range(1, self.hidden_layer_num + 2): # 对隐藏层以及输出层进行权值衰减
W = self.params['W' + str(idx)]
weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2)
return self.last_layer.forward(y, t) + weight_decay
def accuracy(self, X, T):
Y = self.predict(X, train_flg=False)
Y = np.argmax(Y, axis=1)
if T.ndim != 1:
T = np.argmax(T, axis=1)
accuracy = np.sum(Y == T) / float(X.shape[0])
return accuracy
def numerical_gradient(self, X, T):
"""求梯度(数值微分)
Returns
-------
具有各层的梯度的字典变量
grads['W1']、grads['W2']、...是各层的权重
grads['b1']、grads['b2']、...是各层的偏置
"""
loss_W = lambda W: self.loss(X, T, train_flg=True)
grads = {}
for idx in range(1, self.hidden_layer_num+2):
grads['W' + str(idx)] = numerical_gradient(loss_W, self.params['W' + str(idx)])
grads['b' + str(idx)] = numerical_gradient(loss_W, self.params['b' + str(idx)])
if self.use_batchnorm and idx != self.hidden_layer_num+1:
grads['gamma' + str(idx)] = numerical_gradient(loss_W, self.params['gamma' + str(idx)])
grads['beta' + str(idx)] = numerical_gradient(loss_W, self.params['beta' + str(idx)])
return grads
def gradient(self, x, t):
# forward
self.loss(x, t, train_flg=True)
# backward
dout = 1
dout = self.last_layer.backward(dout)
for layer_name in reversed(self.layers):
dout = self.layers[layer_name].backward(dout)
# 设定
grads = {}
for idx in range(1, self.hidden_layer_num+2):
grads['W' + str(idx)] = self.layers['Affine' + str(idx)].dW + self.weight_decay_lambda * self.params['W' + str(idx)]
grads['b' + str(idx)] = self.layers['Affine' + str(idx)].db
if self.use_batchnorm and idx != self.hidden_layer_num+1:
grads['gamma' + str(idx)] = self.layers['BatchNorm' + str(idx)].dgamma
grads['beta' + str(idx)] = self.layers['BatchNorm' + str(idx)].dbeta
return grads
训练类的实现
import sys
file_path = __file__.replace('\\', '/')
dir_path = file_path[: file_path.rfind('/')] # 当前文件夹的路径
pardir_path = dir_path[: dir_path.rfind('/')]
sys.path.append(pardir_path) # 添加上上级目录到python模块搜索路径
import numpy as np
from optimizer.optimizer import *
import pickle, shelve
import os
import matplotlib.pyplot as plt
class Trainer:
"""
进行神经网络的训练的类
evaluate_sample_num_per_epoch为每个epoch结束后计算模型在训练集与测试集上精度时要测试的样本数量,默认为测试全部样本
"""
def __init__(self, network, x_train, t_train, x_test, t_test,
epochs=20, mini_batch_size=100, optimizer='SGD', optimizer_param={'lr':0.01},
save_model_flag=True, pkl_file_name=None, plot_flag=True, fig_name=None,
evaluate_sample_num_per_epoch=None, verbose=True):
self.network = network
self.verbose = verbose
self.x_train = x_train
self.t_train = t_train
self.x_test = x_test
self.t_test = t_test
self.epochs = epochs
self.save_model_flag = save_model_flag
self.pkl_file_name = pkl_file_name
self.plot_flag = plot_flag
self.fig_name = fig_name
self.best_loss = 1e10 # 模型在训练集上的最好表现
self.batch_size = mini_batch_size
self.evaluate_sample_num_per_epoch = evaluate_sample_num_per_epoch
# optimzer
optimizer_class_dict = {'sgd':SGD, 'momentum':Momentum, 'nesterov':Nesterov,
'adagrad':AdaGrad, 'rmsprpo':RMSprop, 'adadelta':AdaDelta, 'adam':Adam}
self.optimizer = optimizer_class_dict[optimizer.lower()](**optimizer_param)
self.train_size = x_train.shape[0]
self.iter_per_epoch = max(self.train_size / mini_batch_size, 1)
self.max_iter = int(epochs * self.iter_per_epoch)
self.current_iter = 0
self.current_epoch = 0
self.train_loss_list = []
self.train_acc_list = []
self.test_acc_list = []
def train_step(self):
batch_mask = np.random.choice(self.train_size, self.batch_size)
x_batch = self.x_train[batch_mask]
t_batch = self.t_train[batch_mask]
grads = self.network.gradient(x_batch, t_batch)
self.optimizer.update(self.network.params, grads)
loss = self.network.loss(x_batch, t_batch)
self.train_loss_list.append(loss)
if self.verbose:
print("train loss:" + str(loss))
if self.current_iter % self.iter_per_epoch == 0 or self.current_iter == self.max_iter - 1:
x_train_sample, t_train_sample = self.x_train, self.t_train
x_test_sample, t_test_sample = self.x_test, self.t_test
if not self.evaluate_sample_num_per_epoch is None:
t = self.evaluate_sample_num_per_epoch
x_train_sample, t_train_sample = self.x_train[:t], self.t_train[:t]
x_test_sample, t_test_sample = self.x_test[:t], self.t_test[:t]
train_acc = self.network.accuracy(x_train_sample, t_train_sample)
test_acc = self.network.accuracy(x_test_sample, t_test_sample)
self.train_acc_list.append(train_acc)
self.test_acc_list.append(test_acc)
if self.verbose:
print("=== epoch:" + str(self.current_epoch) + ", train acc:" + str(train_acc) + ", test acc:" + str(test_acc) + " ===")
if self.save_model_flag == True and self.current_epoch > 0:
current_loss = self.network.loss(x_test_sample, t_test_sample)
if self.best_loss > current_loss:
self.best_loss = current_loss
with open(self.pkl_file_name, 'wb') as f:
pickle.dump(self.network, f)
if self.verbose:
print('net params saved!')
self.current_epoch += 1
self.current_iter += 1
def plot_acc_loss_list(self):
fig, axes = plt.subplots(1, 2)
x = np.arange(len(self.train_acc_list))
axes[0].plot(x, self.train_acc_list, 'r', label='train acc')
axes[0].plot(x, self.test_acc_list, 'g--', label='test acc')
axes[0].set_xlabel("epochs")
axes[0].set_ylabel("accuracy")
axes[0].set_ylim(0, 1.0)
axes[0].legend(loc='best')
x = np.arange(len(self.train_loss_list))
axes[1].plot(x, self.train_loss_list, 'r', label='train loss')
axes[1].set_xlabel("iters")
axes[1].set_ylabel("loss")
axes[1].legend(loc='best')
plt.savefig(self.fig_name)
print('fig {0} saved!'.format(self.fig_name))
def train(self):
for i in range(self.max_iter):
self.train_step()
test_acc = self.network.accuracy(self.x_test, self.t_test)
if self.verbose:
print("=============== Final Test Accuracy ===============")
print("test acc:" + str(test_acc))
if self.plot_flag:
self.plot_acc_loss_list()
MNIST 数据集
数据集简介
- 这里使用的数据集是 MNIST 手写数字图像集。MNIST 是机器学习领域最有名的数据集之一,被应用于从简单的实验到发表的论文研究等各种场合。MNIST数据集是由 0 到 9 的数字图像构成的。训练图像有 6 万张,测试图像有 1 万张
- MNIST 的图像数据是 28 28 28 像素 × × × 28 28 28 像素的灰度图像(1 通道),各个像素的取值在 0 到 255 之间。每个图像数据都相应地标有 “7” “2” “1” 等标签。数据集中的每张图片都事先经过了大小归一化和居中处理,因此需注意用该数据集训练出的网络在预测手写数字时图片也须经过大小归一化和居中处理
数据集下载及预处理
- 数据集下载地址:http://yann.lecun.com/exdb/mnist/, 需要下载其中的 4 个文件:
将下载下的文件与下面读取数据集的代码放在同一个文件夹下即可
import gzip
import pickle
import numpy as np
import os
from PIL import Image
IMG_SIZE = 784 # 28*28
key_file = {
'train_img':'train-images-idx3-ubyte.gz.gz',
'train_label':'train-labels-idx1-ubyte.gz',
'test_img':'t10k-images-idx3-ubyte.gz.gz',
'test_label':'t10k-labels-idx1-ubyte.gz'
}
file_path = __file__.replace('\\', '/')
dataset_path = file_path[: file_path.rfind('/')] # 当前文件夹的路径
save_file = dataset_path + "/mnist.pkl"
def _load_label(file_name):
file_path = dataset_path + '/' + file_name
with gzip.open(file_path, 'rb') as f:
labels = np.frombuffer(f.read(), np.uint8, offset=8)
print(file_name, "loaded")
return labels
def _load_img(file_name):
file_path = dataset_path + '/' + file_name
with gzip.open(file_path, 'rb') as f:
data = np.frombuffer(f.read(), np.uint8, offset=16)
data = data.reshape(-1, IMG_SIZE)
print(file_name, "loaded")
return data
def _convert_numpy():
dataset = {}
dataset['train_img'] = _load_img(key_file['train_img'])
dataset['train_label'] = _load_label(key_file['train_label'])
dataset['test_img'] = _load_img(key_file['test_img'])
dataset['test_label'] = _load_label(key_file['test_label'])
return dataset
def init_mnist():
dataset = _convert_numpy()
with open(save_file, 'wb') as f:
pickle.dump(dataset, f, -1)
print("Done!")
def _change_one_hot_label(x):
t = np.zeros((x.size, 10))
for idx, row in enumerate(t):
row[x[idx]] = 1
return t
def shuffle_dataset(x, t):
permutation = np.random.permutation(x.shape[0])
x = x[permutation, :] if x.ndim == 2 else x[permutation,:,:,:]
t = t[permutation]
return x, t
def load_mnist(normalize=True, flatten=False, one_hot_label=True, shuffle_data=True):
"""读入MNIST数据集
Parameters
----------
normalize : 将图像的像素值正规化为0.0~1.0
one_hot_label :
one_hot_label为True的情况下,标签作为one-hot数组返回
one-hot数组是指[0,0,1,0,0,0,0,0,0,0]这样的数组
flatten : 是否将图像展开为一维数组
shuffle_data : 是否打乱训练集
Returns
-------
(训练图像, 训练标签), (测试图像, 测试标签)
"""
if not os.path.exists(save_file):
init_mnist()
with open(save_file, 'rb') as f:
dataset = pickle.load(f)
if normalize:
for key in ('train_img', 'test_img'):
dataset[key] = dataset[key].astype(np.float32)
dataset[key] /= 255.0
if one_hot_label:
dataset['train_label'] = _change_one_hot_label(dataset['train_label'])
dataset['test_label'] = _change_one_hot_label(dataset['test_label'])
if not flatten:
for key in ('train_img', 'test_img'):
dataset[key] = dataset[key].reshape(-1, 1, 28, 28)
if shuffle_data:
dataset['train_img'], dataset['train_label'] = shuffle_dataset(dataset['train_img'], dataset['train_label'])
return (dataset['train_img'], dataset['train_label']), (dataset['test_img'], dataset['test_label'])
def img_show(img):
pil_img = Image.fromarray(np.uint8(img))
pil_img.show()
if __name__ == '__main__':
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=False)
print(x_train.shape, t_train.shape, x_test.shape, t_test.shape)
img = x_train[0]
label = t_train[0]
print(label)
img_show(img)
第一次运行的代码输出(第一次运行会将压缩文件中的内容转换成numpy的ndarray类型后存储到.pkl文件中,之后运行就只需要读取.pkl文件即可):
train-images-idx3-ubyte.gz.gz loaded
train-labels-idx1-ubyte.gz loaded
t10k-images-idx3-ubyte.gz.gz loaded
t10k-labels-idx1-ubyte.gz loaded
Done!
(60000, 1, 28, 28) (60000, 10) (10000, 1, 28, 28) (10000, 10)
- 图像可视化使用PIL(Python Image Library)模块:
from PIL import Image
def img_show(img):
pil_img = Image.fromarray(np.uint8(img))
pil_img.show()
if __name__ == '__main__':
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=False)
print(x_train.shape, t_train.shape, x_test.shape, t_test.shape)
img = x_train[0]
label = t_train[0]
print(label)
img_show(img)
- 图像输出:
训练神经网络并进行预测
if __name__ == '__main__':
from dataset.mnist import load_mnist
from trainer.trainer import Trainer
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, flatten=True, one_hot_label=True, shuffle_data=True)
# setting
train_flag = 1 # 进行训练还是预测
gradcheck_flag = 0 # 对已训练的网络进行梯度检验
pkl_file_name = dir_path + '/multi_layer_net.pkl'
fig_name = dir_path + '/multi_layer_net.png'
net = MultiLayerNet(784, [100, 100, 100, 100, 100, 100, 100], 10,
activation='relu', weight_init_std='relu', weight_decay_lambda=0,
use_dropout=False, dropout_ration=0.5, use_batchnorm=True,
pretrain_flag=False, pkl_file_name=pkl_file_name)
trainer = Trainer(net, x_train, t_train, x_test, t_test,
epochs=20, mini_batch_size=100,
optimizer='SGD', optimizer_param={'lr':0.01},
save_model_flag=True, pkl_file_name=pkl_file_name, plot_flag=True, fig_name=fig_name,
evaluate_sample_num_per_epoch=None, verbose=True)
if gradcheck_flag == 1:
# net.load_pretrain_model()
gradient_check(net, x_train[0].reshape(1,-1), t_train[0].reshape(1,-1))
if train_flag:
trainer.train()
else:
acc = net.accuracy(x_train, t_train)
print('accuracy:', acc)
- 将神经网络设置为 7 个隐藏层,每个隐藏层 100 个神经元,并且使用 Batch Norm
=============== Final Test Accuracy ===============
test acc:0.9689
- 在训练 20 个 epoch 后,训练精度提高到了 0.9689. 同时也可以看到网络出现了过拟合现象