一、实验目的
掌握神经网络的设计原理,熟练掌握神经网络的训练和推断方法,能够使用 Python 语
言实现一个三层全连接神经网络模型对手写数字分类的训练和使用。
1)实现三层神经网络模型进行手写数字分类,建立一个简单而完整的神经网络工程。
通过本实验理解神经网络中基本模块的作用和模块间的关系,为后续建立更复杂的神经网
络(如风格迁移)奠定基础。
2)利用高级编程语言 Python 实现神经网络基本单元的前向传播(正向传播)和反向传
播计算,加深对神经网络中基本单元的理解,包括全连接层、激活函数、损失函数等基本
单元。
3)利用高级编程语言 Python 实现神经网络训练所使用的梯度下降算法,加深对神经
网络训练过程的理解
二、实验内容
设计一个三层神经网络实现手写数字图像分类。该网络包含两个隐层和一个输出层,其中输入神经元个数由输入数据维度决定,输出层的神经元个数由数据集包含的类别决定,两个隐层的神经元个数可以作为超参数自行设置。对于手写数字图像的分类问题,输入数据为手写数字图像,原始图像一般可表示为二维矩阵(灰度图像)或三维矩阵(彩色图像),在输入神经网络前会将图像矩阵调整为一维向量作为输入。待分类的类别数一般是提前预设的,如手写数字包含0至9共10 个类别,则神经网络的输出神经元个数为 10。
三、实验代码
# coding=utf-8 import numpy as np import struct import os import time import matplotlib.pyplot as plt from layers_1 import FullyConnectedLayer, ReLULayer, SoftmaxLossLayer,SigmoidLayer MNIST_DIR = "../mnist_data" TRAIN_DATA = "train-images-idx3-ubyte" TRAIN_LABEL = "train-labels-idx1-ubyte" TEST_DATA = "t10k-images-idx3-ubyte" TEST_LABEL = "t10k-labels-idx1-ubyte" class MNIST_MLP(object): #学习率0.01---》0.001有提高 def __init__(self, batch_size=100, input_size=784, hidden1=32, hidden2=16, out_classes=10, lr=0.001, max_epoch=1, print_iter=100): self.batch_size = batch_size #self是对当前实例的引用 self.input_size = input_size self.hidden1 = hidden1 self.hidden2 = hidden2 self.out_classes = out_classes self.lr = lr self.max_epoch = max_epoch self.print_iter = print_iter def display_prediction(self, i): # 获取测试数据集中第i个数字的图像和标签 image = self.test_data[i, :-1].reshape(28, 28) true_label = self.test_data[i, -1] # 使用模型进行预测 prob = self.forward(self.test_data[i, :-1].reshape(1, -1)) pred_label = np.argmax(prob, axis=1)[0] # 显示图像、真实标签和预测标签 plt.imshow(image, cmap='gray') plt.title(f"True Label: {true_label}, Predicted Label: {pred_label}") plt.show() def load_mnist(self, file_dir, is_images='True'): # Read binary data bin_file = open(file_dir, 'rb') bin_data = bin_file.read() bin_file.close() # Analysis file header if is_images: # Read images fmt_header = '>iiii' magic, num_images, num_rows, num_cols = struct.unpack_from(fmt_header, bin_data, 0) else: # Read labels fmt_header = '>ii' magic, num_images = struct.unpack_from(fmt_header, bin_data, 0) num_rows, num_cols = 1, 1 data_size = num_images * num_rows * num_cols mat_data = struct.unpack_from('>' + str(data_size) + 'B', bin_data, struct.calcsize(fmt_header)) mat_data = np.reshape(mat_data, [num_images, num_rows * num_cols]) print('Load images from %s, number: %d, data shape: %s' % (file_dir, num_images, str(mat_data.shape))) return mat_data def load_data(self): # TODO: 调用函数 load_mnist 读取和预处理 MNIST 中训练数据和测试数据的图像和标记 print('Loading MNIST data from files...') train_images = self.load_mnist(os.path.join(MNIST_DIR, TRAIN_DATA), True) train_labels = self.load_mnist(os.path.join(MNIST_DIR, TRAIN_LABEL), False) test_images = self.load_mnist(os.path.join(MNIST_DIR, TEST_DATA), True) test_labels = self.load_mnist(os.path.join(MNIST_DIR, TEST_LABEL), False) # self.train_data = np.append(train_images, train_labels[:, np.newaxis], axis=1) # self.test_data = np.append(test_images, test_labels[:, np.newaxis], axis=1) self.train_data = np.append(train_images, train_labels, axis=1) #沿第1个轴(即列轴)进行拼接 成二维数组 self.test_data = np.append(test_images, test_labels, axis=1) # self.test_data = np.concatenate((self.train_data, self.test_data), axis=0) def shuffle_data(self): print('Randomly shuffle MNIST data...') np.random.shuffle(self.train_data) #随机打乱MNIST数据集中的训练数据 原地打乱 def build_model(self): # 建立网络结构 # TODO:建立三层神经网络结构 print('Building multi-layer perception model...') self.fc1 = FullyConnectedLayer(self.input_size, self.hidden1) #self.relu1 = ReLULayer() #可用sigmoid self.sig1=SigmoidLayer() self.fc2 = FullyConnectedLayer(self.hidden1, self.hidden2) #self.relu2 = ReLULayer() self.sig2 = SigmoidLayer() #self.fc3 = (self.hidden2, self.out_classes) self.fc3 = FullyConnectedLayer(self.hidden2, self.out_classes) self.softmax = SoftmaxLossLayer() self.update_layer_list = [self.fc1, self.fc2, self.fc3] def init_model(self): print('Initializing parameters of each layer in MLP...') for layer in self.update_layer_list: #对每一层需要更新参数的引用 layer.init_param() #初始化该层的参数 def load_model(self, param_dir): print('Loading parameters from file ' + param_dir) params = np.load(param_dir).item() self.fc1.load_param(params['w1'], params['b1']) self.fc2.load_param(params['w2'], params['b2']) self.fc3.load_param(params['w3'], params['b3']) def save_model(self, param_dir): print('Saving parameters to file ' + param_dir) params = {} params['w1'], params['b1'] = self.fc1.save_param() params['w2'], params['b2'] = self.fc2.save_param() params['w3'], params['b3'] = self.fc3.save_param() np.save(param_dir, params) def forward(self, input): # 神经网络的前向传播 # TODO:神经网络的前向传播 # h1 = self.fc1.forward(input) # h1 = self.relu1.forward(h1) # h2 = self.fc2.forward(h1) # h2 = self.relu2.forward(h2) # h3 = self.fc3.forward(h2) # prob = self.softmax.forward(h3) h1 = self.fc1.forward(input) h1 = self.sig1.forward(h1) h2 = self.fc2.forward(h1) h2 = self.sig2.forward(h2) h3 = self.fc3.forward(h2) prob = self.softmax.forward(h3) return prob def backward(self): # 神经网络的反向传播 # TODO:神经网络的反向传播 # dloss = self.softmax.backward() # dh3 = self.fc3.backward(dloss) # dh2 = self.relu2.backward(dh3) # dh1 = self.fc2.backward(dh2) # dh1 = self.relu1.backward(dh1) # dh1 = self.fc1.backward(dh1) dloss = self.softmax.backward() dh3 = self.fc3.backward(dloss) dh2 = self.sig2.backward(dh3) dh1 = self.fc2.backward(dh2) dh1 = self.sig1.backward(dh1) dh1 = self.fc1.backward(dh1) def update(self, lr): for layer in self.update_layer_list: layer.update_param(lr) def train(self): max_batch = self.train_data.shape[0] / self.batch_size print('Start training...') for idx_epoch in range(self.max_epoch): self.shuffle_data() for idx_batch in range(int(max_batch)): batch_images = self.train_data[idx_batch*self.batch_size:(idx_batch+1)*self.batch_size, :-1] batch_labels = self.train_data[idx_batch*self.batch_size:(idx_batch+1)*self.batch_size, -1] prob = self.forward(batch_images) loss = self.softmax.get_loss(batch_labels) self.backward() self.update(self.lr) if idx_batch % self.print_iter == 0: print('Epoch %d, iter %d, loss: %.6f' % (idx_epoch, idx_batch, loss)) def evaluate(self): pred_results = np.zeros([self.test_data.shape[0]]) #for idx in range(self.test_data.shape[0] / self.batch_size): #range()函数要求其参数必须是整数。当self.test_data.shape[0]不能被self.batch_size整除时,除法结果会是一个浮点数,从而导致TypeError。 for idx in range(int(self.test_data.shape[0] / self.batch_size)): #使用int向下取整 batch_images = self.test_data[idx * self.batch_size:(idx + 1) * self.batch_size, :-1] start = time.time() prob = self.forward(batch_images) end = time.time() print("inferencing time: %f" % (end - start)) pred_labels = np.argmax(prob, axis=1) pred_results[idx * self.batch_size:(idx + 1) * self.batch_size] = pred_labels accuracy = np.mean(pred_results == self.test_data[:, -1]) print('Accuracy in test set: %f' % accuracy) def save_model(self, filename=None): params = {} params['w1'], params['b1'] = self.fc1.save_param() params['w2'], params['b2'] = self.fc2.save_param() np.save(filename, params) print(f"Model saved to {filename}.") def build_mnist_mlp(param_dir='weight.npy'): h1, h2, e = 1000, 640, 10 mlp = MNIST_MLP(hidden1=h1, hidden2=h2, max_epoch=e) mlp.load_data() mlp.build_model() mlp.init_model() mlp.train() mlp.save_model('mlp-%d-%d-%depoch.npy' % (h1, h2, e)) # mlp.load_model('mlp-%d-%d-%depoch.npy' % (h1, h2, e)) return mlp if __name__ == '__main__': mlp = build_mnist_mlp() mlp.evaluate() # 显示第i个数字的识别效果,例如i=5 i = 5 mlp.display_prediction(i)
layer1.py
# coding=utf-8 import numpy as np import struct import os import time class FullyConnectedLayer(object): def __init__(self, num_input, num_output): # 全连接层初始化 self.num_input = num_input self.num_output = num_output print('\tFully connected layer with input %d, output %d.' % (self.num_input, self.num_output)) def init_param(self, std=0.01): # 参数初始化 初始化权重时使用的标准差,默认值为0.01 self.weight = np.random.normal(loc=0.0, scale=std, size=(self.num_input, self.num_output)) self.bias = np.zeros([1, self.num_output]) def forward(self, input): # 前向传播计算 start_time = time.time() self.input = input # TODO:全连接层的前向传播,计算输出结果 self.output = np.dot(self.input, self.weight) + self.bias return self.output def backward(self, top_diff): # 反向传播的计算 “正向传播”求损失,“反向传播”回传误差 # TODO:全连接层的反向传播,计算参数梯度和本层损失 #self.d_weight = np.dot self.d_weight = np.dot(self.input.T, top_diff) self.d_bias = np.sum(top_diff, axis=0) bottom_diff = np.dot(top_diff, self.weight.T) return bottom_diff #输入的误差信号 def update_param(self, learning_rate): self.weight -= learning_rate * self.d_weight self.bias -= learning_rate * self.d_bias def save_param(self): return self.weight, self.bias # def save_param(self, filename=None): #保存权重和偏差 # if filename is None: # filename = f"fc_layer_params_{self.num_input}_{self.num_output}.npy" # np.save(filename, {'weight': self.weight, 'bias': self.bias}) # print(f"Parameters saved to {filename}.") # # def show_time(param, param1): # pass # class ReLULayer(object): #大于0返回本身,小于0返回o0 def __init__(self): print('\tReLU layer.') def forward(self, input): start_time = time.time() self.input = input in_mask = input <= 0 # output = np.maximum(0, self.input) output = input.copy() output[in_mask] = 0 return output def backward(self, top_diff): bottom_diff = top_diff.copy() bottom_diff[self.input < 0] = 0 return bottom_diff #新sigmoid class SigmoidLayer(object): def __init__(self): self.output = None print('\tSigmod layer.') def forward(self, input): output = 1.0 / (1.0 + np.exp(-input)) self.output = output return output # def backward(self, top_diff): # bottom_diff = top_diff * (1.0 - self.output) * self.output # # return bottom_diff def backward(self, top_diff): sigma = self.output bottom_diff = top_diff * sigma * (1 - sigma) return bottom_diff class SoftmaxLossLayer(object): def __init__(self): print('\tSoftmax loss layer.') def forward(self, input): # 前向传播的计算 # TODO:softmax 损失层的前向传播,计算输出结果 input_max = np.max(input, axis=1, keepdims=True) input_exp = np.exp(input - input_max) self.prob = input_exp / np.sum(input_exp, axis=1, keepdims=True) return self.prob def get_loss(self, label): # 计算损失 self.batch_size = self.prob.shape[0] self.label_onehot = np.zeros_like(self.prob) self.label_onehot[np.arange(self.batch_size), label] = 1.0 loss = -np.sum(np.log(self.prob) * self.label_onehot) / self.batch_size return loss def backward(self): # 反向传播的计算 # TODO:softmax 损失层的反向传播,计算本层损失 bottom_diff = self.prob - self.label_onehot return bottom_diff
四、个人学习理解
主要是对面向对象设计更了解
class MNIST_MLP(object): # class去定义一个类,继承object类(父类),可在父类基础上添加或覆盖
def __init__( hidden1=32, hidden2=16,):#初始化方法,赋初始值
self.batch_size = batch_size
#self是对当前实例的引用
batchsize是类的属性,=右边的是从外部传进来的批次大小,使每个实例都有自己的独立的batchsize属性,而不是共享一个变量(面向对象的特点)
创建 MNIST_MLP
类的一个新实例,并将其赋值给变量 mlp
的操作 未赋值的变量为之前初始化的值。