Layer
# coding=utf-8
import numpy as np
import struct
import os
import time
class FullyConnectedLayer(object):
def __init__(self, num_input, num_output): # 全连接层初始化
self.num_input = num_input
self.num_output = num_output
print('\tFully connected layer with input %d, output %d.' % (self.num_input, self.num_output))
def init_param(self, std=0.01): # 参数初始化
self.weight = np.random.normal(loc=0.0, scale=std, size=(self.num_input, self.num_output))
self.bias = np.zeros([1, self.num_output])
def forward(self, input): # 前向传播计算
start_time = time.time()
self.input = input
# TODO:全连接层的前向传播,计算输出结果
self.output = np.matmul(input,self.weight)+self.bias
return self.output
def backward(self, top_diff): # 反向传播的计算
# TODO:全连接层的反向传播,计算参数梯度和本层损失
self.d_weight = np.matmul(self.input.T,top_diff)
self.d_bias = np.sum(top_diff,axis=0)
bottom_diff = np.matmul(top_diff,self.weight.T)
return bottom_diff
def update_param(self, lr): # 参数更新
# TODO:对全连接层参数利用参数进行更新
self.weight = self.weight - lr * self.d_weight
self.bias = self.bias - lr * self.d_bias
def load_param(self, weight, bias): # 参数加载
assert self.weight.shape == weight.shape
assert self.bias.shape == bias.shape
self.weight = weight
self.bias = bias
def save_param(self): # 参数保存
return self.weight, self.bias
class ReLULayer(object):
def __init__(self):
print('\tReLU layer.')
def forward(self, input): # 前向传播的计算
start_time = time.time()
self.input = input
# TODO:ReLU层的前向传播,计算输出结果
output = input.copy()
output[(input < 0)] = 0
return output
def backward(self, top_diff): # 反向传播的计算
# TODO:ReLU层的反向传播,计算本层损失
bottom_diff = top_diff
bottom_diff[self.input<0] = 0
return bottom_diff
class SoftmaxLossLayer(object):
def __init__(self):
print('\tSoftmax loss layer.')
def forward(self, input): # 前向传播的计算
# TODO:softmax 损失层的前向传播,计算输出结果
input_max = np.max(input, axis=1, keepdims=True)
input_exp = np.exp(input - input_max)
self.prob = input_exp / np.sum(input_exp, axis=1, keepdims=True)
return self.prob
def get_loss(self, label): # 计算损失
self.batch_size = self.prob.shape[0]
self.label_onehot = np.zeros_like(self.prob)
self.label_onehot[np.arange(self.batch_size), label] = 1.0
loss = -np.sum(np.log(self.prob) * self.label_onehot) / self.batch_size
return loss
def backward(self): # 反向传播的计算
# TODO:softmax 损失层的反向传播,计算本层损失
bottom_diff = (self.prob - self.label_onehot) / self.batch_size
return bottom_diff
cpu
# coding=utf-8
import numpy as np
import struct
import os
import time
from layers_1 import FullyConnectedLayer, ReLULayer, SoftmaxLossLayer
MNIST_DIR = "./mnist_data"
TRAIN_DATA = "train-images-idx3-ubyte"
TRAIN_LABEL = "train-labels-idx1-ubyte"
TEST_DATA = "t10k-images-idx3-ubyte"
TEST_LABEL = "t10k-labels-idx1-ubyte"
def show_matrix(mat, name):
#print(name + str(mat.shape) + ' mean %f, std %f' % (mat.mean(), mat.std()))
pass
class MNIST_MLP(object):
def __init__(self, batch_size=100, input_size=784, hidden1=32, hidden2=16, hidden3=8, out_classes=10, lr=0.01, max_epoch=2, print_iter=100):
# 神经网络初始化
self.batch_size = batch_size
self.input_size = input_size
self.hidden1 = hidden1
self.hidden2 = hidden2
self.hidden3 = hidden3
self.out_classes = out_classes
self.lr = lr
self.max_epoch = max_epoch
self.print_iter = print_iter
def load_mnist(self, file_dir, is_images = 'True'):
# Read binary data
bin_file = open(file_dir, 'rb')
bin_data = bin_file.read()
bin_file.close()
# Analysis file header
if is_images:
# Read images
fmt_header = '>iiii'
# struct.unpack_from(fmt, buffer, offset=0)
# 按照指定的格式fmt,从偏移位置offset开始解包,
# 返回数据格式是一个元组(v1, v2...)
magic, num_images, num_rows, num_cols = struct.unpack_from(fmt_header, bin_data, 0)
else:
# Read labels
fmt_header = '>ii'
magic, num_images = struct.unpack_from(fmt_header, bin_data, 0)
num_rows, num_cols = 1, 1
data_size = num_images * num_rows * num_cols
mat_data = struct.unpack_from('>' + str(data_size) + 'B', bin_data, struct.calcsize(fmt_header))
mat_data = np.reshape(mat_data, [num_images, num_rows * num_cols])
print('Load images from %s, number: %d, data shape: %s' % (file_dir, num_images, str(mat_data.shape)))
return mat_data
def load_data(self):
# TODO: 调用函数 load_mnist 读取和预处理 MNIST 中训练数据和测试数据的图像和标记
print('Loading MNIST data from files...')
train_images = self.load_mnist(os.path.join(MNIST_DIR, TRAIN_DATA), True)
train_labels = self.load_mnist(os.path.join(MNIST_DIR, TRAIN_LABEL), False)
test_images = self.load_mnist(os.path.join(MNIST_DIR, TEST_DATA), True)
test_labels = self.load_mnist(os.path.join(MNIST_DIR, TEST_LABEL), False)
self.train_data = np.append(train_images, train_labels, axis=1)
self.test_data = np.append(test_images, test_labels, axis=1)
# print("train1", self.train_data.shape, 'train2', train_labels.shape)
# print("test1", self.test_data.shape, 'test2', test_images.shape)
# exit(0)
# self.test_data = np.concatenate((self.train_data, self.test_data), axis=0)
def shuffle_data(self):
print('Randomly shuffle MNIST data...')
np.random.shuffle(self.train_data)
def build_model(self): # 建立网络结构
# TODO:建立三层神经网络结构
print('Building multi-layer perception model...')
self.fc1 = FullyConnectedLayer(self.input_size, self.hidden1)
self.relu1 = ReLULayer()
self.fc2 = FullyConnectedLayer(self.hidden1, self.hidden2)
self.relu2 = ReLULayer()
self.fc3 = FullyConnectedLayer(self.hidden2, self.hidden3)
self.relu3 = ReLULayer()
self.fc4 = FullyConnectedLayer(self.hidden3, self.out_classes)
self.softmax = SoftmaxLossLayer()
self.update_layer_list = [self.fc1, self.fc2, self.fc3, self.fc4]
def init_model(self):
print('Initializing parameters of each layer in MLP...')
for layer in self.update_layer_list:
layer.init_param()
def load_model(self, param_dir): # 加载神经网络权值
print('Loading parameters from file ' + param_dir)
params = np.load(param_dir, allow_pickle=True).item()
self.fc1.load_param(params['w1'], params['b1'])
self.fc2.load_param(params['w2'], params['b2'])
self.fc3.load_param(params['w3'], params['b3'])
self.fc3.load_param(params['w4'], params['b4'])
def save_model(self, param_dir):
print('Saving parameters to file ' + param_dir)
params = {}
params['w1'], params['b1'] = self.fc1.save_param()
params['w2'], params['b2'] = self.fc2.save_param()
params['w3'], params['b3'] = self.fc3.save_param()
params['w4'], params['b4'] = self.fc3.save_param()
np.save(param_dir, params)
def forward(self, input): # 神经网络的前向传播
# TODO:神经网络的前向传播
h1 = self.fc1.forward(input)
h1 = self.relu1.forward(h1)
h2 = self.fc2.forward(h1)
h2 = self.relu2.forward(h2)
h3 = self.fc3.forward(h2)
h3 = self.relu3.forward(h3)
h4 = self.fc4.forward(h3)
prob = self.softmax.forward(h4)
return prob
def backward(self): # 神经网络的反向传播
# TODO:神经网络的反向传播
dloss = self.softmax.backward()
dh4 = self.fc4.backward(dloss)
dh3 = self.relu3.backward(dh4)
dh3 = self.fc3.backward(dh3)
dh2 = self.relu2.backward(dh3)
dh2 = self.fc2.backward(dh2)
dh1 = self.relu1.backward(dh2)
dh1 = self.fc1.backward(dh1)
def update(self, lr): # 神经网络的参数更新
for layer in self.update_layer_list:
layer.update_param(lr)
def train(self): # 训练函数
max_batch = self.train_data.shape[0] // self.batch_size
# print("train.0",self.train_data.shape[0] )
# print("self.batch_size",self.batch_size )
# exit(0)
print('Start training...')
for idx_epoch in range(self.max_epoch):
self.shuffle_data()
print(idx_epoch)
for idx_batch in range(max_batch):
batch_images = self.train_data[idx_batch*self.batch_size:(idx_batch+1)*self.batch_size, :-1]
batch_labels = self.train_data[idx_batch*self.batch_size:(idx_batch+1)*self.batch_size, -1]
prob = self.forward(batch_images)
loss = self.softmax.get_loss(batch_labels)
self.backward()
self.update(self.lr)
if idx_batch % self.print_iter == 0:
print('Epoch %d, iter %d, loss: %.6f' % (idx_epoch, idx_batch, loss))
def evaluate(self): #推断函数
pred_results = np.zeros([self.test_data.shape[0]])
start_time = time.time()
for idx in range(self.test_data.shape[0]//self.batch_size):
batch_images = self.test_data[idx*self.batch_size:(idx+1)*self.batch_size, :-1]
prob = self.forward(batch_images)
end = time.time()
pred_labels = np.argmax(prob, axis=1)
pred_results[idx*self.batch_size:(idx+1)*self.batch_size] = pred_labels
print("All evaluate time: %f"%(time.time()-start_time))
accuracy = np.mean(pred_results == self.test_data[:,-1])
print('Accuracy in test set: %f' % accuracy)
if __name__ == '__main__':
h1, h2,h3, e = 64, 32, 16, 10
# def __init__(self, batch_size=100, input_size=784,
# hidden1=32, hidden2=16, out_classes=10,
# lr=0.01, max_epoch=2,
# print_iter=100):
mlp = MNIST_MLP(hidden1=h1, hidden2=h2,hidden3=h3, max_epoch=e)
mlp.load_data()
mlp.build_model()
mlp.init_model()
start_time = time.time()
mlp.train()
print("All train time: %f"%(time.time()-start_time))
mlp.save_model('mlp-%d-%d-%depoch.npy' % (h1, h2, e))
mlp.load_model('mlp-%d-%d-%depoch.npy' % (h1, h2, e))
mlp.evaluate()