前言
在上一章节《Numpy批次训练》中提到要聊优化器,那么本章就带来第一个优化器——Momentum。
import numpy as np
import time
from sklearn.datasets import make_moons
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import starknn#导入自己写的模块
一、数据准备
#准备数据
X, y = make_moons(n_samples = 1000, noise=0.3)#数据和标签
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)#拆分数据集,训练集:测试集=9:1
y_train = starknn.idx2onehot(y_train)#标签转化,独热编码
二、Momentum介绍
在介绍Momentum之前,先了解指数加权平均。假如我们有一周的天气数据,我们怎么根据一周的数据画出走势图呢?答案是,根据昨天的天气,与今天的天气来画。
def exponent_prediction(curr_temp, pre_temp, beta):
return beta * pre_temp + (1 - beta) * curr_temp
week_temp = np.array([34, 32, 31, 30, 29, 18, 17])#一个星期的温度数据
week_offset = np.append(week_temp[1: ], 20)#添加20是为了让两个向量保持一致,你也可以添加其他值
week_prediction = exponent_prediction(week_temp, week_offset, 0.9)
week_day = np.arange(1, 8)
绘图如下。
fig, ax = plt.subplots(figsize=(7,4))
s = ax.scatter(week_day, week_temp, c='r')
l = ax.plot(week_day, week_prediction, c='g')
ax.legend(["temperature", "prediction"])
ax.set_xlabel('day')
ax.set_ylabel('degree')
plt.savefig('exponent.png')
Momentum也是一样,只是把温度换成梯度。根据之前的梯度和当前的梯度,计算新的梯度值。
三、超参数设置
batch_size = 32#批次大小
learning_rate = 0.01#学习率
beta = 0.9#momentum中的beta系数
epochs = 10000#训练轮次
#初始化模型参数
nn_cfg = [{"in_features": 2, "out_features": 25, "activation": "relu"},#(2,25)
{"in_features": 25, "out_features": 50, "activation": "relu"},#(25,50)
{"in_features": 50, "out_features": 50, "activation": "relu"},#(50,50)
{"in_features": 50, "out_features": 25, "activation": "relu"},#(50,25)
{"in_features": 25, "out_features": 2, "activation": "sigmoid"}]#(25,2)
四、代码实现
def calc_momentum(pre_value, curr_value, beta):
return beta * pre_value + (1 - beta) * curr_value#指数加权平均
def momentum_optimizer(curr_grads, pre_grads, beta):
results = {}
if pre_grads:#如果字典不为空
for layer, curr_value in curr_grads.items():#获取当前层名字和模型参数值
pre_value = pre_grads[layer]#取出之前的梯度
results[layer] = calc_momentum(pre_value, curr_value, beta)
else:#字典为空,第一次执行梯度下降,之前没有值
results = curr_grads
return results
#批次数据训练
def momentum_train(X, Y, nn_cfg, epochs, learning_rate, batch_size, beta, train=True):
params = starknn.init_layers(nn_cfg, 2)
num_batch = X.shape[0] // batch_size#数据数量整除批次大小
acc_history = []
cost_history = []
pre_grads = {}#梯度是一个字典,保存所有的权重和偏置
for i in range(epochs):
offset_idx = i % num_batch#一批次训练
X_batch = X[offset_idx: (offset_idx + 1) * batch_size, :]
Y_batch = Y[offset_idx: (offset_idx + 1) * batch_size, :]
#前向传播
Y_hat, memory = starknn.forward_full_layer(X_batch, params, nn_cfg)
#计算准确率
accuracy = starknn.calc_accuracy(Y_hat, Y_batch, train=train)
#计算损失
cost = starknn.calc_cost(Y_hat, Y_batch)
acc_history.append(accuracy)
cost_history.append(cost)
#反向传播
curr_grads = starknn.full_backward_propagation(Y_hat, Y_batch, memory, params, nn_cfg)
grads = momentum_optimizer(curr_grads, pre_grads, beta)
#更新参数
params = starknn.update(params, grads, nn_cfg, learning_rate)
return params, acc_history, cost_history
start = time.time()
params, acc_history, cost_history = momentum_train(x_train, y_train, nn_cfg, epochs, learning_rate, batch_size, beta)
end = time.time()
print('The momentum optimizer time is {:.2f} second.'.format(end-start))
#测试
y_hat, _ = starknn.forward_full_layer(x_test, params, nn_cfg)
test_accuracy = starknn.calc_accuracy(y_hat, y_test, train=False)
print('The accuracy of this test dataset is {}%.'.format(test_accuracy * 100))
The momentum optimizer time is 37.38 second.
The accuracy of this test dataset is 94.0%.
总结
Momentum优化器比《Numpy批次训练》中的SGD要好很多,很多优点我就不展开了。下一章节将会继续讨论其他优化器,敬请期待。