three_layer_net.py
import sys, os from common.functions import * from common.gradient import numerical_gradient class ThreeLayerNet: def __init__(self, input_size, hidden_size1, hidden_size2, output_size, weight_init_std=0.01): # 初始化权重 self.params = {} self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size1) self.params['b1'] = np.zeros(hidden_size1) self.params['W2'] = weight_init_std * np.random.randn(hidden_size1, hidden_size2) self.params['b2'] = np.zeros(hidden_size2) self.params['W3'] = weight_init_std * np.random.randn(hidden_size2, output_size) self.params['b3'] = np.zeros(output_size) def predict(self, x): W1, W2, W3 = self.params['W1'], self.params['W2'], self.params['W3'] b1, b2, b3 = self.params['b1'], self.params['b2'], self.params['b3'] a1 = np.dot(x, W1) + b1 z1 = sigmoid(a1) a2 = np.dot(z1, W2) + b2 z2 = sigmoid(a2) a3 = np.dot(z2, W3) + b3 y = softmax(a3) return y # x:输入数据, t:监督数据 def loss(self, x, t): y = self.predict(x) return cross_entropy_error(y, t) def accuracy(self, x, t): y = self.predict(x) y = np.argmax(y, axis=1) t = np.argmax(t, axis=1) accuracy = np.sum(y == t) / float(x.shape[0]) return accuracy # x:输入数据, t:监督数据 def numerical_gradient(self, x, t): loss_W = lambda W: self.loss(x, t) grads = {} grads['W1'] = numerical_gradient(loss_W, self.params['W1']) grads['b1'] = numerical_gradient(loss_W, self.params['b1']) grads['W2'] = numerical_gradient(loss_W, self.params['W2']) grads['b2'] = numerical_gradient(loss_W, self.params['b2']) grads['W3'] = numerical_gradient(loss_W, self.params['W3']) grads['b3'] = numerical_gradient(loss_W, self.params['b3']) return grads def gradient(self, x, t): # 梯度 W1, W2, W3 = self.params['W1'], self.params['W2'], self.params['W3'] b1, b2, b3 = self.params['b1'], self.params['b2'], self.params['b3'] grads = {} # 梯度 batch_num = x.shape[0] # # forward a1 = np.dot(x, W1) + b1 z1 = sigmoid(a1) a2 = np.dot(z1, W2) + b2 z2 = sigmoid(a2) a3 = np.dot(z2, W3) + b3 y = softmax(a3) # backward dy = (y - t) / batch_num # y:输出结果。t:监督数据 grads['W3'] = np.dot(z2.T, dy) grads['b3'] = np.sum(dy, axis=0) da2 = np.dot(dy, W3.T) dz2 = sigmoid_grad(a2) * da2 grads['W2'] = np.dot(z1.T, dz2) grads['b2'] = np.sum(dz2, axis=0) da1 = np.dot(dz2, W2.T) dz1 = sigmoid_grad(a1) * da1 grads['W1'] = np.dot(x.T, dz1) grads['b1'] = np.sum(dz1, axis=0) return grads
train_three.py
import numpy as np from three_layer_net import ThreeLayerNet from dataset.mnist import load_mnist import matplotlib.pylab as plt import pickle (x_train, t_train), (x_test, t_test) = load_mnist(one_hot_label=True, normalize=True) train_loss_list = [] train_acc_list = [] test_acc_list = [] epoch = 0 # 超参数 iters_num = 10000 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.5 network = ThreeLayerNet(784, 50, 100, 10) # 平均每个epoch的重复次数 iter_per_epoch = max(train_size / batch_size, 1) for i in range(iters_num): # print(i) #进程监督 batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] # 计算梯度 # grad=network.numerical_gradient(x_batch,t_batch) grad = network.gradient(x_batch, t_batch) # 高速版 for key in ('W1', 'b1', 'W2', 'b2', 'W3', 'b3'): network.params[key] -= learning_rate * grad[key] loss = network.loss(x_batch, t_batch) train_loss_list.append(loss) if i % iter_per_epoch == 0: # 每经历一个epoch(训练次数整除epoch) epoch += 1 train_acc = network.accuracy(x_train, t_train) test_acc = network.accuracy(x_test, t_test) train_acc_list.append(train_acc) test_acc_list.append(test_acc) print('epoch: ' + str(epoch) + ' train acc, test acc | ' + str(train_acc) + ', ' + str(test_acc)) # 画识别精度 x = range(len(train_acc_list)) y1 = train_acc_list y2 = test_acc_list plt.plot(x, y1, label='train_acc') plt.plot(x, y2, linestyle='--', label='test_acc') plt.xlabel('epochs') plt.ylabel('accurancy') plt.legend() plt.show() # 画损失函数 x = range(iters_num) y = train_loss_list plt.plot(x, y) plt.xlabel('learning time') plt.ylabel('value of loss function') plt.show() # 训练好的权重存起来 file_name = 'sample_weight3.pkl' with open(file_name, 'wb') as f: pickle.dump(network.params, f, -1)