任务描述
博主之前没做过此类工作,于是打算记录下这次从0开始进行模型优化的经历。
初步的输入数据为2dim 输出为1dim, 映射关系为非线性的(实际任务需求是3维的输入1维度的输出(这里在8.会进行修改, 变为3维数据的输入即本来的任务),这里仅考虑二维是由于其中1维为天然离散,且降低维度后或许更加便于处理(?))
此外在后续的观察中发现数据集具有较高的不均匀性, 绘制散点图入如下:
初始模型结构
'''导入的CSV文件的前三列分别为层数、TiO2层厚度和外加电压, 此为神经网络的输入
第四列为所激发的光电流大小,此为网络输入(或者说预测量)'''
import csv
import numpy as np
import matplotlib.pyplot as plt
from keras.layers import Layer
from keras import backend as K
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import SGD
from keras.optimizers import adam
from keras.layers import Activation
import keras
from pylab import *
from mpl_toolkits.mplot3d import Axes3D
'''导入csv文件'''
csv_train = np.loadtxt(open("./data/2D_in_without_N/U&T&I_1L.csv","rb"),delimiter=",",skiprows=0)#返回的数据为ndarry
print('train.csv导入成功!')
# 测试集姑且搁置, 如果在原始数据上都无法很好的收敛那这测试集不要也罢(doge)
csv_test = np.loadtxt(open("./data/2D_in_without_N/U&T&I_1L.csv","rb"),delimiter=",",skiprows=0)#返回的数据为ndarry
print('test.csv导入成功!')
# data_shape = csv_data.shape #返回数据的维度
# data_dim = csv_data.ndim #ndarry的秩
# [m, n] = data_shape # 返回数据的行数和列数
# print("cav_data.dim = ",data_dim)
# print("cav_data.shape = ",data_shape)
# print("cav_data m ={0}, cav_data m ={1}".format(m, n))
'''选取训练集及测试集'''
# 训练集
X_train = csv_train[:,0:2]# [:,np.newaxis]
Y_train = csv_train[:,2]
# 测试集
X_test = csv_test[:,0:2]
Y_test = csv_test[:,2]
'''检查输入形状'''
# print("==============================================================================")
# print(X_train[:, 0].shape)
# print(X_train)
# print("==============================================================================")
# print(Y_train)
# print(Y_train.shape)
'''绘制原始数据图像'''
# # print(Z_pred.shape)
# fig = figure(figsize=(16, 9))
# # ax = Axes3D(fig)
# ax = Axes3D(fig,auto_add_to_figure=False)
# fig.add_axes(ax)
# X = csv_train[:, 0]
# Y = csv_train[:, 1]
# Z = csv_train[:, 2]
# '''数据切片(减少计算量)'''
# s = slice(0, 1420, 2)
# x = X[s]
# y = Y[s]
# z = Z[s]
# z_ = Z_pred[s]
# # print(x.shape)
# # 绘制网格
# X, Y = np.meshgrid(X, Y)
# '''形状检查'''
# # print("==============================================================================")
# # print(X.shape)
# # print(Y.shape)
# # print(Z.shape)
# # print("==============================================================================")
# '''绘制'''
# ax.scatter(x, y, z)
# # ax.plot_surface(x, y, z_, rstride=1, cstride=1, cmap=plt.get_cmap('rainbow'))
# # 加标签(坐标轴)
# ax.set_zlabel('I', fontdict={'size': 15, 'color': 'red'})
# ax.set_ylabel('U', fontdict={'size': 15, 'color': 'blue'})
# ax.set_xlabel('Thickness', fontdict={'size': 15, 'color': 'green'})
# plt.show()
'''设置网络结构'''
# 创建RBF层
class RBFLayer(Layer):
def __init__(self, units, gamma, **kwargs):
super(RBFLayer, self).__init__(**kwargs)
self.units = units
self.gamma = K.cast_to_floatx(gamma)
def build(self, input_shape):
self.mu = self.add_weight(name='mu',
shape=(int(input_shape[1]), self.units),
initializer='uniform',
trainable=True)
super(RBFLayer, self).build(input_shape)
def call(self, inputs):
diff = K.expand_dims(inputs) - self.mu
l2 = K.sum(K.pow(diff,2), axis=1)
res = K.exp(-1 * self.gamma * l2)
return res
def compute_output_shape(self, input_shape):
return (input_shape[0], self.units)
model = Sequential()
'''第一层'''
model.add(Dense(units=128, input_dim = 2, kernel_initializer=keras.initializers.random_normal(stddev=0.01)))
model.add(keras.layers.normalization.BatchNormalization())
# model.add(Activation(keras.layers.advanced_activations.LeakyReLU(alpha=0.2)))
# 增加leakyrelu可以显著降低lost
model.add(keras.layers.advanced_activations.LeakyReLU(alpha=0.2))
'''第二层'''
model.add(Dense(units=32, input_dim = 128, kernel_initializer=keras.initializers.random_normal(stddev=0.01)))
model.add(keras.layers.normalization.BatchNormalization())
model.add(keras.layers.advanced_activations.LeakyReLU(alpha=0.2))
# model.add(Activation('tanh'))
'''第三层'''
# model.add(RBFLayer(32, 0.5))
# dropout严重降低了回归结果
# model.add(keras.layers.Dropout(0.5))
model.add(Dense(units=8, input_dim=32, kernel_initializer=keras.initializers.random_normal(stddev=0.01)))
# model.add(Activation('tanh'))
'''第四层'''
model.add(Dense(units=1, input_dim = 8, kernel_initializer=keras.initializers.random_normal(stddev=0.01)))
model.add(keras.layers.normalization.BatchNormalization())
'''激活函数'''
# model.add(Activation('elu')) # 一阶段收敛于 <cost = 120>
# model.add(Activation('selu')) # 一阶段收敛于 <cost = 111>
# model.add(Activation('softplus')) # 一阶段收敛于 <cost = 131>
# model.add(Activation('softsign')) # 一阶段收敛于 <cost = 284>
# model.add(Activation('relu')) # 一阶段收敛于 <cost = 131>
# model.add(Activation('tanh')) # 一阶段收敛于 <cost = 281>
# model.add(Activation('sigmoid')) # 一阶段收敛于 <cost = 293>
# model.add(Activation('hard_sigmoid')) # 一阶段收敛于 <cost = 293>
# model.add(Activation('exponential')) # 一阶段收敛于 <cost = 131>
model.add(Activation('linear')) # 一阶段收敛于 <cost = 0.24>
'''绘制网络结构'''
model.summary()
'''选择优化器'''
Adam = keras.optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=True) # 二阶段在<step = 100000> 收敛于 <cost = 10 >
# SGD 因为更新比较频繁,会造成 cost function 有严重的震荡
sgd = SGD(lr=5) # Nan
RMSprop = keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0) # 二阶段在<step = 100000> 收敛于 <cost = 10000>
Adagrad = keras.optimizers.Adagrad(lr=0.01, epsilon=None, decay=0.0) # 二阶段在<step = 100000> 收敛于 <cost = 50000>
Adadelta = keras.optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0) # 二阶段在<step = 70000> 收敛于 <cost = 250>
Adamax = keras.optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0) # 二阶段在<step = 100000> 收敛于 <cost = 22 >
Nadam = keras.optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004) # 二阶段在<step = 110000> 收敛于 <cost = 30 >
# 设置优化器和损失函数
model.compile(optimizer=Adam, loss='mse')
'''训练'''
for step in range(50001):
loss = model.train_on_batch(X_train, Y_train)
# loss,accuracy = model.evaluate(X_test, Y_test, verbose=0)
# print(model.evaluate(X_test,Y_test))
if step % 500 == 0:
print("The step is ", step , "......................................" + '[loss]:', loss)
# "............." + '[cost, acc_train]:', train_cost_acc,
# "............." + '[lost, acc_test]:', loss, accuracy)
'''plt绘图'''
# # 2D
# # y_pred = model.predict(X_train)
# # plt.figure(figsize=(16, 9))
# # plt.subplot(1,3,1)
# # plt.scatter(X_train[:, 0], Y_train)
# # plt.plot(X_train[:, 0], y_pred, 'r-', lw=3)
# # plt.subplot(1,3,2)
# # plt.scatter(X_train[:, 1], Y_train)
# # plt.plot(X_train[:, 1], y_pred, 'r-', lw=3)
# # plt.subplot(1,3,3)
# # plt.scatter(X_train[:, 2], Y_train)
# # plt.plot(X_train[:, 2], y_pred, 'r-', lw=3)
# # plt.show()
# 3D
# Z_pred = model.predict(X_train)
# # print(Z_pred.shape)
# fig = figure(figsize=(16, 9))
# # ax = Axes3D(fig)
# ax = Axes3D(fig,auto_add_to_figure=False)
# fig.add_axes(ax)
# X = X_train[:, 0]
# Y = X_train[:, 1]
# Z = csv_train[:, 2]
# '''数据切片(减少计算量)'''
# s = slice(0, 1420, 2)
# x = X[s]
# y = Y[s]
# z = Z[s]
# z_ = Z_pred[s]
# # print(x.shape)
# # 绘制网格
# X, Y = np.meshgrid(X, Y)
# '''形状检查'''
# # print("==============================================================================")
# # print(X.shape)
# # print(Y.shape)
# # print(Z.shape)
# # print("==============================================================================")
# '''绘制'''
# ax.scatter(x, y, z)
# # ax.plot_surface(x, y, z_, rstride=1, cstride=1, cmap=plt.get_cmap('rainbow'))
# # 加标签(坐标轴)
# ax.set_zlabel('I', fontdict={'size': 15, 'color': 'red'})
# ax.set_ylabel('U', fontdict={'size': 15, 'color': 'blue'})
# ax.set_xlabel('Thickness', fontdict={'size': 15, 'color': 'green'})
# plt.show()
# ax.set_zlabel('I', fontdict={'size': 15, 'color': 'red'})
# ax.set_ylabel('Thickness', fontdict={'size': 15, 'color': 'blue'})
# ax.set_xlabel('layer', fontdict={'size': 15, 'color': 'green'})
# show()
'''保存模型'''
save_path = r'D:\anaconda\project\Spectral_analysis\model_save\3D\U&T&I_1L_weight.h5'
model.save_weights(save_path)
# model.save(save_path) # 创建 HDF5 文件 'my_model.h5'
# model = load_model('my_model.h5')
# '''模型可视化(Graphviz)'''
# from keras.utils import plot_model
# plot_model(model, to_file='model.png')
过程
step=100000 左右
1. 加入Dropout
loss大幅上升
2. leakyrelu换为普通relu
无明显影响
3. 加入RBFLayer
无明显影响
4. 增加层数
无明显影响
5. 更换优化器
(1)RMSprop
loss值很大波动, 但其总趋势收敛, 在100000轮次收敛于30左右
穷举法都试了一边, 发现模型进入瓶颈了, 猜测只能通过更换结构进行新的尝试(这里以未来人的身份先吐槽一下前两天的自己, 为啥不好好看看Adagrad…这个真的很适合QAQ)
6. 加深网络结构, 加大参数量
由:
改为:
实际并未收敛…
7. 于前几层内加入tanh激活函数层
130k轮后模型收敛至loss=89,收敛效率较大下降, 但最终是否会和往常一样陷入局部最优解尚未可知
200k轮后收敛于loss=30, 貌似还有收敛的空间, 继续进行第三轮100k轮训练,
250k轮后模型收敛于loss=11
跑了两个五十万轮, 基本上是确定收敛到8.9这样子了
8. 查阅了相关资料后发现优化器Adagrad在处理分布不均匀的数据时会有不错的表现
由是对优化器进行了调整, 并重新采用了较为简单的网络结构即多层感知机配合BN层和LeakyRelu函数对网络模型进行搭建,完整代码如下(这里输入维度变为三个, 也就是完整版的回归模型):
'''导入的CSV文件的前三列分别为层数、TiO2层厚度和外加电压, 此为神经网络的输入
第四列为所激发的光电流大小,此为网络输入(或者说预测量)'''
import csv
import numpy as np
import matplotlib.pyplot as plt
from keras.layers import Layer
from keras import backend as K
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import SGD
from keras.optimizers import adam
from keras.layers import Activation
import keras
from pylab import *
from mpl_toolkits.mplot3d import Axes3D
# 文件保存路径
path = './model_save/4D/'
'''导入csv文件'''
csv_train = np.loadtxt(open("./data/mess/train1.csv","rb"),delimiter=",",skiprows=0)#返回的数据为ndarry
print('train.csv导入成功!')
csv_test = np.loadtxt(open("./data/mess/test.csv","rb"),delimiter=",",skiprows=0)#返回的数据为ndarry
print('test.csv导入成功!')
# data_shape = csv_data.shape #返回数据的维度
# data_dim = csv_data.ndim #ndarry的秩
# [m, n] = data_shape # 返回数据的行数和列数
# print("cav_data.dim = ",data_dim)
# print("cav_data.shape = ",data_shape)
# print("cav_data m ={0}, cav_data m ={1}".format(m, n))
'''选取训练集及测试集'''
# 训练集
X_train = csv_train[:,0:3]# [:,np.newaxis]
Y_train = csv_train[:,3]
# 测试集
X_test = csv_test[:,0:3]
Y_test = csv_test[:,3]
'''检查输入形状'''
# print("==============================================================================")
# print(X_train[:, 0].shape)
# print(X_train)
# print("==============================================================================")
# print(Y_train)
# print(Y_train.shape)
'''绘制原始数据图像'''
# plt.scatter(X_train,Y_train, c='r', marker='x')
# plt.xlabel('X_train')
# plt.ylabel('Y_train')
# plt.show()
'''设置网络结构'''
# 创建RBF层
'''
class RBFLayer(Layer):
def __init__(self, units, gamma, **kwargs):
super(RBFLayer, self).__init__(**kwargs)
self.units = units
self.gamma = K.cast_to_floatx(gamma)
def build(self, input_shape):
self.mu = self.add_weight(name='mu',
shape=(int(input_shape[1]), self.units),
initializer='uniform',
trainable=True)
super(RBFLayer, self).build(input_shape)
def call(self, inputs):
diff = K.expand_dims(inputs) - self.mu
l2 = K.sum(K.pow(diff,2), axis=1)
res = K.exp(-1 * self.gamma * l2)
return res
def compute_output_shape(self, input_shape):
return (input_shape[0], self.units)
'''
model = Sequential()
'''第一层'''
model.add(Dense(units=128, input_dim = 3))
model.add(keras.layers.normalization.BatchNormalization())
model.add(Activation(keras.layers.advanced_activations.LeakyReLU(alpha=0.2)))
'''第二层'''
model.add(Dense(units=32, input_dim = 128))
model.add(keras.layers.normalization.BatchNormalization())
model.add(Activation(keras.layers.advanced_activations.LeakyReLU(alpha=0.2)))
# model.add(Activation('tanh'))
'''第三层'''
# model.add(RBFLayer(20, 0.5))
# model.add(keras.layers.Dropout(0.5))
model.add(Dense(units=32, input_dim=32))
model.add(keras.layers.normalization.BatchNormalization())
model.add(Activation(keras.layers.advanced_activations.LeakyReLU(alpha=0.2)))
# model.add(Activation('tanh'))
'''第四层'''
model.add(Dense(units=8, input_dim = 32, kernel_initializer=keras.initializers.random_normal(stddev=0.01)))
model.add(keras.layers.normalization.BatchNormalization())
model.add(Activation(keras.layers.advanced_activations.LeakyReLU(alpha=0.2)))
'''第五层'''
model.add(Dense(units=1, input_dim = 8, kernel_initializer=keras.initializers.random_normal(stddev=0.01)))
# model.add(keras.layers.normalization.BatchNormalization())
# model.add(Activation(keras.layers.advanced_activations.LeakyReLU(alpha=0.2)))
'''激活函数'''
# model.add(Activation('elu')) # 一阶段收敛于 <cost = 120>
# model.add(Activation('selu')) # 一阶段收敛于 <cost = 111>
# model.add(Activation('softplus')) # 一阶段收敛于 <cost = 131>
# model.add(Activation('softsign')) # 一阶段收敛于 <cost = 284>
# model.add(Activation('relu')) # 一阶段收敛于 <cost = 131>
# model.add(Activation('tanh')) # 一阶段收敛于 <cost = 281>
# model.add(Activation('sigmoid')) # 一阶段收敛于 <cost = 293>
# model.add(Activation('hard_sigmoid')) # 一阶段收敛于 <cost = 293>
# model.add(Activation('exponential')) # 一阶段收敛于 <cost = 131>
model.add(Activation('linear')) # 一阶段收敛于 <cost = 0.24>
'''绘制网络结构'''
model.summary()
'''选择优化器'''
Adam = keras.optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=True) # 二阶段在<step = 100000> 收敛于 <cost = 22 >
# SGD 因为更新比较频繁,会造成 cost function 有严重的震荡
sgd = SGD(lr=5) # Nan
RMSprop = keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0) # 二阶段在<step = 100000> 收敛于 <cost = 10000>
Adagrad = keras.optimizers.Adagrad(lr=0.00001, epsilon=None, decay=0.0) # 二阶段在<step = 100000> 收敛于 <cost = 50000>
Adadelta = keras.optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0) # 二阶段在<step = 70000> 收敛于 <cost = 250>
Adamax = keras.optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0) # 二阶段在<step = 100000> 收敛于 <cost = 22 >
Nadam = keras.optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004) # 二阶段在<step = 110000> 收敛于 <cost = 30 >
# 设置优化器和损失函数
model.compile(optimizer=Adagrad, loss='mse')
# 导入模型
model.load_weights(path + 'loss=4.202981233596802.h5')
'''训练'''
for step in range(10001):
loss_train = model.train_on_batch(X_train, Y_train)
loss_test = model.evaluate(X_test, Y_test, verbose=0)
# print(model.evaluate(X_test,Y_test))
if step % 100 == 0:
print("The step is ", step , "..............." + '[loss_train]:', loss_train)
print("The step is ", step , "..............................................." + '[loss_test]:', loss_test)
if loss_test < 3 :
save_path = path + 'step=' + str(step) + '&loss=' + str(loss_test) + '.h5'
model.save_weights(save_path)
'''保存模型'''
save_path = path + 'loss=' + str(loss_test) + '.h5'
model.save_weights(save_path)
'''plt绘图'''
# # 2D
# # y_pred = model.predict(X_train)
# # plt.figure(figsize=(16, 9))
# # plt.subplot(1,3,1)
# # plt.scatter(X_train[:, 0], Y_train)
# # plt.plot(X_train[:, 0], y_pred, 'r-', lw=3)
# # plt.subplot(1,3,2)
# # plt.scatter(X_train[:, 1], Y_train)
# # plt.plot(X_train[:, 1], y_pred, 'r-', lw=3)
# # plt.subplot(1,3,3)
# # plt.scatter(X_train[:, 2], Y_train)
# # plt.plot(X_train[:, 2], y_pred, 'r-', lw=3)
# # plt.show()
# # 3D
# Z_pred = model.predict(X_train)
# # print(Z_pred.shape)
# fig = figure(figsize=(16, 9))
# ax = Axes3D(fig)
# X = X_train[:, 0]
# Y = X_train[:, 1]
# Z = csv_train[:, 3]
# # X, Y = np.meshgrid(X, Y)
# '''形状检查'''
# # print("==============================================================================")
# # print(X.shape)
# # print(Y.shape)
# # print(Z.shape)
# # print("==============================================================================")
# '''数据切片'''
# s = slice(0, 2200, 8)
# x = X[s]
# y = Y[s]
# z = Z[s]
# z_ = Z_pred[s]
# # print(x.shape)
# '''绘图'''
# ax.scatter(x, y, z)
# ax.plot_surface(x, y, z_, rstride=1, cstride=1, cmap='hot')
# # 加标签(坐标轴)
# ax.set_zlabel('I', fontdict={'size': 15, 'color': 'red'})
# ax.set_ylabel('Thickness', fontdict={'size': 15, 'color': 'blue'})
# ax.set_xlabel('layer', fontdict={'size': 15, 'color': 'green'})
# show()
# '''模型可视化(Graphviz)'''
# # from keras.utils import plot_model
# # plot_model(model, to_file='model.png')
开始Ir设定为0.1,模型在20000轮后loss值从大约20k降至到0.1k左右,此时便可以调节学习率进行不断使得模型收敛,经过大概几十万轮和数次降低Ir数量级后可结果如下:
发现模型虽然可以收敛但其收敛速度缓慢,猜测是否可通过增加感知机的层数进行进一步优化
9. 保留当前层块结构并增加层数
'''此处为TF2的代码, 运行于阿里云天池实验室, 仅作为逻辑验证即结构分析使用'''
import csv
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Layer
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Activation
from pylab import *
from mpl_toolkits.mplot3d import Axes3D
'''设置路径'''
data_path = './Data/'
save_path = './Model_save/'
'''导入csv文件'''
csv_train = np.loadtxt(open(data_path + "train1.csv","rb"),delimiter=",",skiprows=0)#返回的数据为ndarry
print('train.csv导入成功!')
csv_test = np.loadtxt(open(data_path + "test.csv","rb"),delimiter=",",skiprows=0)#返回的数据为ndarry
print('test.csv导入成功!')
# data_shape = csv_data.shape #返回数据的维度
# data_dim = csv_data.ndim #ndarry的秩
# [m, n] = data_shape # 返回数据的行数和列数
# print("cav_data.dim = ",data_dim)
# print("cav_data.shape = ",data_shape)
# print("cav_data m ={0}, cav_data m ={1}".format(m, n))
'''选取训练集及测试集'''
# 训练集
X_train = csv_train[:,0:3]# [:,np.newaxis]
Y_train = csv_train[:,3]
# 测试集
X_test = csv_test[:,0:3]
Y_test = csv_test[:,3]
'''形状检查'''
print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)
'''设置网络结构'''
model = Sequential()
model.add(Dense(units=256, input_dim = 3, use_bias=True, kernel_initializer='glorot_uniform',bias_initializer='zeros'))
model.add(tf.keras.layers.BatchNormalization())
# model.add(Activation(keras.layers.advanced_activations.LeakyReLU(alpha=0.2)))
# 增加leakyrelu可以显著降低lost
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
model.add(Dense(units=128, input_dim = 256, use_bias=True, kernel_initializer='glorot_uniform',bias_initializer='zeros'))
model.add(tf.keras.layers.BatchNormalization())
# model.add(Activation(keras.layers.advanced_activations.LeakyReLU(alpha=0.2)))
# 增加leakyrelu可以显著降低lost
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
model.add(Dense(units=64, input_dim = 128, use_bias=True, kernel_initializer='glorot_uniform',bias_initializer='zeros'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# model.add(Activation('tanh'))
model.add(Dense(units=32, input_dim = 64, use_bias=True, kernel_initializer='glorot_uniform',bias_initializer='zeros'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# model.add(RBFLayer(32, 0.5))
# dropout严重降低了回归结果
# model.add(keras.layers.Dropout(0.5))
model.add(Dense(units=16, input_dim=32, use_bias=True, kernel_initializer='glorot_uniform',bias_initializer='zeros'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# model.add(Activation('tanh'))
model.add(Dense(units=8, input_dim=16, use_bias=True, kernel_initializer='glorot_uniform',bias_initializer='zeros'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
model.add(Dense(units=4, input_dim=8, use_bias=True, kernel_initializer='glorot_uniform',bias_initializer='zeros'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
model.add(Dense(units=2, input_dim=4, use_bias=True, kernel_initializer='glorot_uniform',bias_initializer='zeros'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
model.add(Dense(units=1, input_dim = 2, use_bias=False, kernel_initializer='glorot_uniform',bias_initializer='zeros'))
# model.add(keras.layers.normalization.BatchNormalization())
'''激活函数'''
# model.add(Activation('elu')) # 一阶段收敛于 <cost = 120>
# model.add(Activation('selu')) # 一阶段收敛于 <cost = 111>
# model.add(Activation('softplus')) # 一阶段收敛于 <cost = 131>
# model.add(Activation('softsign')) # 一阶段收敛于 <cost = 284>
# model.add(Activation('relu')) # 一阶段收敛于 <cost = 131>
# model.add(Activation('tanh')) # 一阶段收敛于 <cost = 281>
# model.add(Activation('sigmoid')) # 一阶段收敛于 <cost = 293>
# model.add(Activation('hard_sigmoid')) # 一阶段收敛于 <cost = 293>
# model.add(Activation('exponential')) # 一阶段收敛于 <cost = 131>
# model.add(Activation('linear')) # 一阶段收敛于 <cost = 0.24>
'''绘制网络结构'''
model.summary()
'''选择优化器
# SGD 因为更新比较频繁,会造成 cost function 有严重的震荡
sgd = SGD(lr=5)
Adam = tf.keras.optimizers.Adam(lr=0.005, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=True) # 二阶段在<step = 100000> 收敛于 <cost = 10 > # Nan
RMSprop = tf.keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0) # 二阶段在<step = 100000> 收敛于 <cost = 10000>
Adagrad = tf.keras.optimizers.Adagrad(lr=0.001, epsilon=None, decay=0.0) # 二阶段在<step = 100000> 收敛于 <cost = 50000>
Adadelta = tf.keras.optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0) # 二阶段在<step = 70000> 收敛于 <cost = 250>
Adamax = tf.keras.optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0) # 二阶段在<step = 100000> 收敛于 <cost = 22 >
Nadam = tf.keras.optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004) # 二阶段在<step = 110000> 收敛于 <cost = 30 >
# 设置优化器和损失函数
model.compile(optimizer=Adagrad, loss='mse')
'''
# 导入模型
# model.load_weights(path + '&loss=11.725552.h5')
Adagrad = tf.keras.optimizers.Adagrad(lr=0.1, epsilon=None, decay=0.0)
model.compile(optimizer=Adagrad, loss='mse')
'''训练'''
for step in range(20001):
loss_train = model.train_on_batch(X_train, Y_train)
loss_test = model.evaluate(X_test, Y_test, verbose=0)
# print(model.evaluate(X_test,Y_test))
if step % 200 == 0:
print("The step is ", step , "..............." + '[loss_train]:', loss_train)
print("The step is ", step , "..............................................." + '[loss_test]:', loss_test)
# "............." + '[cost, acc_train]:', train_cost_acc,
# "............." + '[lost, acc_test]:', loss, accuracy)
if loss_test < 10 :
break # 这里顺带一提, 最好多遍历几遍再确定是否要减小学习率, 而不是看到了就break,因为对于复杂情况可能会陷入局部最优解这个坑, 博主是知道数据结构(结合上面的三维图和离散的第四维度的明显下降趋势)下才这样break的
# path = './model_weight' + 'step=' + str(step) + '&loss=' + str(loss_test) + '.h5'
# model.save_weights(path)
model.summary()结果如下:
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense (Dense) (None, 256) 1024
_________________________________________________________________
batch_normalization (BatchNo (None, 256) 1024
_________________________________________________________________
leaky_re_lu (LeakyReLU) (None, 256) 0
_________________________________________________________________
dense_1 (Dense) (None, 128) 32896
_________________________________________________________________
batch_normalization_1 (Batch (None, 128) 512
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU) (None, 128) 0
_________________________________________________________________
dense_2 (Dense) (None, 64) 8256
_________________________________________________________________
batch_normalization_2 (Batch (None, 64) 256
_________________________________________________________________
leaky_re_lu_2 (LeakyReLU) (None, 64) 0
_________________________________________________________________
dense_3 (Dense) (None, 32) 2080
_________________________________________________________________
batch_normalization_3 (Batch (None, 32) 128
_________________________________________________________________
leaky_re_lu_3 (LeakyReLU) (None, 32) 0
_________________________________________________________________
dense_4 (Dense) (None, 16) 528
_________________________________________________________________
batch_normalization_4 (Batch (None, 16) 64
_________________________________________________________________
leaky_re_lu_4 (LeakyReLU) (None, 16) 0
_________________________________________________________________
dense_5 (Dense) (None, 8) 136
_________________________________________________________________
batch_normalization_5 (Batch (None, 8) 32
_________________________________________________________________
leaky_re_lu_5 (LeakyReLU) (None, 8) 0
_________________________________________________________________
dense_6 (Dense) (None, 4) 36
_________________________________________________________________
batch_normalization_6 (Batch (None, 4) 16
_________________________________________________________________
leaky_re_lu_6 (LeakyReLU) (None, 4) 0
_________________________________________________________________
dense_7 (Dense) (None, 2) 10
_________________________________________________________________
batch_normalization_7 (Batch (None, 2) 8
_________________________________________________________________
leaky_re_lu_7 (LeakyReLU) (None, 2) 0
_________________________________________________________________
dense_8 (Dense) (None, 1) 2
=================================================================
Total params: 47,008
Trainable params: 45,988
Non-trainable params: 1,020
_________________________________________________________________
首先先开大学习率进行查找
结果如下
The step is 0 ...............[loss_train]: 114611.71
The step is 0 ...............................................[loss_test]: 172391.3114601418
The step is 200 ...............[loss_train]: 83820.58
The step is 200 ...............................................[loss_test]: 203178.7780597828
The step is 400 ...............[loss_train]: 56734.734
The step is 400 ...............................................[loss_test]: 107056.16631260625
The step is 600 ...............[loss_train]: 39401.14
The step is 600 ...............................................[loss_test]: 105926.1129466516
The step is 800 ...............[loss_train]: 26855.354
The step is 800 ...............................................[loss_test]: 37229.10588156735
The step is 1000 ...............[loss_train]: 18211.143
The step is 1000 ...............................................[loss_test]: 25702.297907793964
The step is 1200 ...............[loss_train]: 12521.096
The step is 1200 ...............................................[loss_test]: 20885.482175332527
The step is 1400 ...............[loss_train]: 8289.523
The step is 1400 ...............................................[loss_test]: 20825.459784613715
The step is 1600 ...............[loss_train]: 5598.1313
The step is 1600 ...............................................[loss_test]: 7263.409276891638
The step is 1800 ...............[loss_train]: 3779.045
The step is 1800 ...............................................[loss_test]: 11323.699178059896
The step is 2000 ...............[loss_train]: 2734.4216
The step is 2000 ...............................................[loss_test]: 10499.821085611979
The step is 2200 ...............[loss_train]: 2066.2224
The step is 2200 ...............................................[loss_test]: 11231.576365152994
The step is 2400 ...............[loss_train]: 1399.5436
The step is 2400 ...............................................[loss_test]: 4327.667751736111
The step is 2600 ...............[loss_train]: 942.9171
The step is 2600 ...............................................[loss_test]: 999.5631623444734
The step is 2800 ...............[loss_train]: 690.4152
The step is 2800 ...............................................[loss_test]: 958.5449817798756
The step is 3000 ...............[loss_train]: 552.9158
The step is 3000 ...............................................[loss_test]: 1946.2712932162815
The step is 3200 ...............[loss_train]: 438.0123
The step is 3200 ...............................................[loss_test]: 322.64597264042607
The step is 3400 ...............[loss_train]: 381.48965
The step is 3400 ...............................................[loss_test]: 878.8487818682635
The step is 3600 ...............[loss_train]: 327.04526
The step is 3600 ...............................................[loss_test]: 386.65829693829573
The step is 3800 ...............[loss_train]: 286.7559
The step is 3800 ...............................................[loss_test]: 161.48108376397028
The step is 4000 ...............[loss_train]: 271.89587
The step is 4000 ...............................................[loss_test]: 94.23103749310529
The step is 4200 ...............[loss_train]: 280.47284
The step is 4200 ...............................................[loss_test]: 539.7342062702885
The step is 4400 ...............[loss_train]: 228.70996
The step is 4400 ...............................................[loss_test]: 480.16451263427734
The step is 4600 ...............[loss_train]: 210.3707
The step is 4600 ...............................................[loss_test]: 138.39223879354972
The step is 4800 ...............[loss_train]: 211.48886
The step is 4800 ...............................................[loss_test]: 1558.2459713971173
The step is 5000 ...............[loss_train]: 202.70021
The step is 5000 ...............................................[loss_test]: 610.9397009390372
The step is 5200 ...............[loss_train]: 189.43668
The step is 5200 ...............................................[loss_test]: 189.83002500180845
The step is 5400 ...............[loss_train]: 170.43565
The step is 5400 ...............................................[loss_test]: 263.44775955765334
The step is 5600 ...............[loss_train]: 167.36116
The step is 5600 ...............................................[loss_test]: 199.97994754932546
The step is 5800 ...............[loss_train]: 154.43687
The step is 5800 ...............................................[loss_test]: 114.9971363456161
The step is 6000 ...............[loss_train]: 154.35269
The step is 6000 ...............................................[loss_test]: 268.57427427503796
The step is 6200 ...............[loss_train]: 138.18042
The step is 6200 ...............................................[loss_test]: 98.96797434488933
The step is 6400 ...............[loss_train]: 132.2034
The step is 6400 ...............................................[loss_test]: 60.25527487860786
The step is 6600 ...............[loss_train]: 126.0921
The step is 6600 ...............................................[loss_test]: 230.91299975359883
The step is 6800 ...............[loss_train]: 120.14387
The step is 6800 ...............................................[loss_test]: 179.14771440294055
The step is 7000 ...............[loss_train]: 113.09017
The step is 7000 ...............................................[loss_test]: 274.64273615236635
The step is 7200 ...............[loss_train]: 106.633606
The step is 7200 ...............................................[loss_test]: 75.57680504410355
The step is 7400 ...............[loss_train]: 102.05847
The step is 7400 ...............................................[loss_test]: 164.7102032414189
The step is 7600 ...............[loss_train]: 98.3386
The step is 7600 ...............................................[loss_test]: 60.37751501577872
The step is 7800 ...............[loss_train]: 93.270706
The step is 7800 ...............................................[loss_test]: 87.26740183653655
The step is 8000 ...............[loss_train]: 90.41298
The step is 8000 ...............................................[loss_test]: 87.52722210354275
The step is 8200 ...............[loss_train]: 86.37759
The step is 8200 ...............................................[loss_test]: 37.38890177232248
基本可以确定该模型可以在10000轮内收敛至loss<50,接下来就继续逐渐降低学习率进行逐步逼近即可,记录如下
学习率降为0.01, 轮次设定为10000
结果如下:
The step is 0 ...............[loss_train]: 86.42706
The step is 0 ...............................................[loss_test]: 176584.0962818287
The step is 200 ...............[loss_train]: 55.15376
The step is 200 ...............................................[loss_test]: 36.63536894762957
The step is 400 ...............[loss_train]: 43.339535
The step is 400 ...............................................[loss_test]: 25.461906759827226
The step is 600 ...............[loss_train]: 35.98868
The step is 600 ...............................................[loss_test]: 22.652673491725213
The step is 800 ...............[loss_train]: 30.745687
The step is 800 ...............................................[loss_test]: 21.072192280380815
The step is 1000 ...............[loss_train]: 26.83507
The step is 1000 ...............................................[loss_test]: 20.187618140821105
The step is 1200 ...............[loss_train]: 23.76442
The step is 1200 ...............................................[loss_test]: 19.8514552337152
The step is 1400 ...............[loss_train]: 21.318758
The step is 1400 ...............................................[loss_test]: 18.98354634090706
The step is 1600 ...............[loss_train]: 19.318905
The step is 1600 ...............................................[loss_test]: 18.71392482298392
The step is 1800 ...............[loss_train]: 17.67323
The step is 1800 ...............................................[loss_test]: 17.41641167799632
The step is 2000 ...............[loss_train]: 16.340149
The step is 2000 ...............................................[loss_test]: 17.108064192312735
The step is 2200 ...............[loss_train]: 15.204404
The step is 2200 ...............................................[loss_test]: 16.666790767952246
The step is 2400 ...............[loss_train]: 14.216499
The step is 2400 ...............................................[loss_test]: 16.24462084196232
The step is 2600 ...............[loss_train]: 13.379973
The step is 2600 ...............................................[loss_test]: 15.805169149681374
The step is 2800 ...............[loss_train]: 12.676864
The step is 2800 ...............................................[loss_test]: 15.627896874039262
The step is 3000 ...............[loss_train]: 12.05489
The step is 3000 ...............................................[loss_test]: 15.302822227831241
The step is 3200 ...............[loss_train]: 11.531205
The step is 3200 ...............................................[loss_test]: 14.878443188137478
The step is 3400 ...............[loss_train]: 11.06205
The step is 3400 ...............................................[loss_test]: 14.79662839130119
The step is 3600 ...............[loss_train]: 10.656072
The step is 3600 ...............................................[loss_test]: 14.529879874653286
The step is 3800 ...............[loss_train]: 10.294985
The step is 3800 ...............................................[loss_test]: 14.38862990449976
The step is 4000 ...............[loss_train]: 9.962561
The step is 4000 ...............................................[loss_test]: 14.064813642590135
The step is 4200 ...............[loss_train]: 9.668994
The step is 4200 ...............................................[loss_test]: 13.895457245685437
The step is 4400 ...............[loss_train]: 9.412963
The step is 4400 ...............................................[loss_test]: 13.285678810543484
The step is 4600 ...............[loss_train]: 9.184889
The step is 4600 ...............................................[loss_test]: 12.950146304236519
The step is 4800 ...............[loss_train]: 8.988347
The step is 4800 ...............................................[loss_test]: 12.570631839610913
The step is 5000 ...............[loss_train]: 8.812597
The step is 5000 ...............................................[loss_test]: 12.327483592209992
The step is 5200 ...............[loss_train]: 8.647274
The step is 5200 ...............................................[loss_test]: 12.095677896782204
The step is 5400 ...............[loss_train]: 8.49862
The step is 5400 ...............................................[loss_test]: 11.935940117747695
The step is 5600 ...............[loss_train]: 8.360638
The step is 5600 ...............................................[loss_test]: 11.762868885640744
The step is 5800 ...............[loss_train]: 8.252315
The step is 5800 ...............................................[loss_test]: 11.65299234787623
The step is 6000 ...............[loss_train]: 8.142995
The step is 6000 ...............................................[loss_test]: 11.606743737503335
The step is 6200 ...............[loss_train]: 8.045058
The step is 6200 ...............................................[loss_test]: 11.445402070328042
The step is 6400 ...............[loss_train]: 7.9554386
The step is 6400 ...............................................[loss_test]: 11.466319931877983
The step is 6600 ...............[loss_train]: 7.8543243
The step is 6600 ...............................................[loss_test]: 12.178227601227936
The step is 6800 ...............[loss_train]: 7.764409
The step is 6800 ...............................................[loss_test]: 11.409689333703783
The step is 7000 ...............[loss_train]: 7.699924
The step is 7000 ...............................................[loss_test]: 12.132889319349218
The step is 7200 ...............[loss_train]: 7.6150947
The step is 7200 ...............................................[loss_test]: 11.036518141075417
The step is 7400 ...............[loss_train]: 7.551992
The step is 7400 ...............................................[loss_test]: 11.125180884643838
The step is 7600 ...............[loss_train]: 7.474767
The step is 7600 ...............................................[loss_test]: 10.238509796283862
The step is 7800 ...............[loss_train]: 7.432384
The step is 7800 ...............................................[loss_test]: 10.522431135177612
The step is 8000 ...............[loss_train]: 7.380323
The step is 8000 ...............................................[loss_test]: 10.418629246729392
The step is 8200 ...............[loss_train]: 7.3203545
The step is 8200 ...............................................[loss_test]: 9.820853198016131
The step is 8400 ...............[loss_train]: 7.278713
The step is 8400 ...............................................[loss_test]: 9.517102943526375
The step is 8600 ...............[loss_train]: 7.2392435
The step is 8600 ...............................................[loss_test]: 10.838831124482331
The step is 8800 ...............[loss_train]: 7.1980114
The step is 8800 ...............................................[loss_test]: 10.582803063922459
The step is 9000 ...............[loss_train]: 7.160175
The step is 9000 ...............................................[loss_test]: 10.492271679419058
The step is 9200 ...............[loss_train]: 7.1177893
The step is 9200 ...............................................[loss_test]: 9.498841475557398
The step is 9400 ...............[loss_train]: 7.0815673
The step is 9400 ...............................................[loss_test]: 9.490555502750256
The step is 9600 ...............[loss_train]: 7.0470953
The step is 9600 ...............................................[loss_test]: 9.387842151853773
The step is 9800 ...............[loss_train]: 7.004253
The step is 9800 ...............................................[loss_test]: 9.185716176474536
The step is 10000 ...............[loss_train]: 6.983739
The step is 10000 ...............................................[loss_test]: 8.704513786015687
可以看到还在明显的收敛,所以保持Ir不变再开10000轮,结果如下
The step is 0 ...............[loss_train]: 6.9829164
The step is 0 ...............................................[loss_test]: 124498.90631216543
The step is 200 ...............[loss_train]: 8.10008
The step is 200 ...............................................[loss_test]: 25.2269318457003
The step is 400 ...............[loss_train]: 7.5502853
The step is 400 ...............................................[loss_test]: 18.174842097141124
The step is 600 ...............[loss_train]: 7.345558
The step is 600 ...............................................[loss_test]: 12.517702800256234
The step is 800 ...............[loss_train]: 7.216028
The step is 800 ...............................................[loss_test]: 11.535340238500524
The step is 1000 ...............[loss_train]: 7.089348
The step is 1000 ...............................................[loss_test]: 11.44769095932996
The step is 1200 ...............[loss_train]: 7.011389
The step is 1200 ...............................................[loss_test]: 10.270000312063429
The step is 1400 ...............[loss_train]: 6.9484954
The step is 1400 ...............................................[loss_test]: 8.921192584214387
The step is 1600 ...............[loss_train]: 6.8936195
The step is 1600 ...............................................[loss_test]: 8.472521826072976
The step is 1800 ...............[loss_train]: 6.8382845
The step is 1800 ...............................................[loss_test]: 8.96128922480124
The step is 2000 ...............[loss_train]: 6.7917943
The step is 2000 ...............................................[loss_test]: 8.723530018771136
The step is 2200 ...............[loss_train]: 6.748647
The step is 2200 ...............................................[loss_test]: 8.844898210631477
The step is 2400 ...............[loss_train]: 6.711681
The step is 2400 ...............................................[loss_test]: 8.574164946873983
The step is 2600 ...............[loss_train]: 6.6803555
The step is 2600 ...............................................[loss_test]: 8.301508846106353
The step is 2800 ...............[loss_train]: 6.6487784
The step is 2800 ...............................................[loss_test]: 7.853043971238313
The step is 3000 ...............[loss_train]: 6.620329
The step is 3000 ...............................................[loss_test]: 7.830964693316707
The step is 3200 ...............[loss_train]: 6.5954256
The step is 3200 ...............................................[loss_test]: 7.630005408216406
The step is 3400 ...............[loss_train]: 6.570092
The step is 3400 ...............................................[loss_test]: 7.443931003411611
The step is 3600 ...............[loss_train]: 6.5469995
The step is 3600 ...............................................[loss_test]: 7.332441828869007
The step is 3800 ...............[loss_train]: 6.525951
The step is 3800 ...............................................[loss_test]: 7.098393744892544
The step is 4000 ...............[loss_train]: 6.505974
The step is 4000 ...............................................[loss_test]: 6.934731631367295
The step is 4200 ...............[loss_train]: 6.487751
The step is 4200 ...............................................[loss_test]: 6.873793182549654
The step is 4400 ...............[loss_train]: 6.470075
The step is 4400 ...............................................[loss_test]: 6.820729240223214
The step is 4600 ...............[loss_train]: 6.4532843
The step is 4600 ...............................................[loss_test]: 6.749174435933431
The step is 4800 ...............[loss_train]: 6.437518
The step is 4800 ...............................................[loss_test]: 6.657903397524798
The step is 5000 ...............[loss_train]: 6.420953
The step is 5000 ...............................................[loss_test]: 6.6118698252571955
The step is 5200 ...............[loss_train]: 6.4070225
The step is 5200 ...............................................[loss_test]: 6.562360595773767
The step is 5400 ...............[loss_train]: 6.393041
The step is 5400 ...............................................[loss_test]: 6.535672415185858
The step is 5600 ...............[loss_train]: 6.380245
The step is 5600 ...............................................[loss_test]: 6.494779123200311
The step is 5800 ...............[loss_train]: 6.365892
The step is 5800 ...............................................[loss_test]: 6.502580373375504
The step is 6000 ...............[loss_train]: 6.3532867
The step is 6000 ...............................................[loss_test]: 6.425201546262811
The step is 6200 ...............[loss_train]: 6.3409605
The step is 6200 ...............................................[loss_test]: 6.394948252925166
The step is 6400 ...............[loss_train]: 6.3265924
The step is 6400 ...............................................[loss_test]: 6.300970377745451
The step is 6600 ...............[loss_train]: 6.313385
The step is 6600 ...............................................[loss_test]: 6.31483358365518
The step is 6800 ...............[loss_train]: 6.300455
The step is 6800 ...............................................[loss_test]: 6.294367648937084
The step is 7000 ...............[loss_train]: 6.2876782
The step is 7000 ...............................................[loss_test]: 6.195465485254924
The step is 7200 ...............[loss_train]: 6.2750783
The step is 7200 ...............................................[loss_test]: 6.167593545383877
The step is 7400 ...............[loss_train]: 6.2630615
The step is 7400 ...............................................[loss_test]: 6.127459212585732
The step is 7600 ...............[loss_train]: 6.251576
The step is 7600 ...............................................[loss_test]: 6.081679441310741
The step is 7800 ...............[loss_train]: 6.238767
The step is 7800 ...............................................[loss_test]: 6.118286468364574
The step is 8000 ...............[loss_train]: 6.2280426
The step is 8000 ...............................................[loss_test]: 6.024300456047058
The step is 8200 ...............[loss_train]: 6.21642
The step is 8200 ...............................................[loss_test]: 5.953339185979631
The step is 8400 ...............[loss_train]: 6.2062006
The step is 8400 ...............................................[loss_test]: 5.910111387570699
The step is 8600 ...............[loss_train]: 6.1953826
The step is 8600 ...............................................[loss_test]: 5.871508668970178
The step is 8800 ...............[loss_train]: 6.1841655
The step is 8800 ...............................................[loss_test]: 5.8409152472460715
The step is 9000 ...............[loss_train]: 6.171872
The step is 9000 ...............................................[loss_test]: 5.822817193137275
The step is 9200 ...............[loss_train]: 6.160478
The step is 9200 ...............................................[loss_test]: 5.7752892661977695
The step is 9400 ...............[loss_train]: 6.1494985
The step is 9400 ...............................................[loss_test]: 5.740575900784245
The step is 9600 ...............[loss_train]: 6.1369925
The step is 9600 ...............................................[loss_test]: 5.748277935716841
The step is 9800 ...............[loss_train]: 6.125249
The step is 9800 ...............................................[loss_test]: 5.827146790645741
The step is 10000 ...............[loss_train]: 6.11406
The step is 10000 ...............................................[loss_test]: 5.662381264898512
收敛的依旧很明显, 可以再续杯一次(step += 10000)
[有点后悔学习率直接降了一个数量级了QAQ, 下次在本地训练的时候可以换成Ir = 0.02, 开它30000轮]
结果如下:
The step is 0 ...............[loss_train]: 6.113809
The step is 0 ...............................................[loss_test]: 74464.96134867491
The step is 200 ...............[loss_train]: 7.4602675
The step is 200 ...............................................[loss_test]: 29.92432438885724
The step is 400 ...............[loss_train]: 6.777582
The step is 400 ...............................................[loss_test]: 13.023062992978979
The step is 600 ...............[loss_train]: 6.553012
The step is 600 ...............................................[loss_test]: 12.073077700756214
The step is 800 ...............[loss_train]: 6.4018126
The step is 800 ...............................................[loss_test]: 9.314167848339787
The step is 1000 ...............[loss_train]: 6.307288
The step is 1000 ...............................................[loss_test]: 7.409950070910984
The step is 1200 ...............[loss_train]: 6.2511005
The step is 1200 ...............................................[loss_test]: 6.793920146094428
The step is 1400 ...............[loss_train]: 6.194964
The step is 1400 ...............................................[loss_test]: 6.177425419842756
The step is 1600 ...............[loss_train]: 6.159448
The step is 1600 ...............................................[loss_test]: 5.941014484122947
The step is 1800 ...............[loss_train]: 6.114718
The step is 1800 ...............................................[loss_test]: 5.90687581344887
The step is 2000 ...............[loss_train]: 6.086493
The step is 2000 ...............................................[loss_test]: 5.755285660425822
The step is 2200 ...............[loss_train]: 6.066393
The step is 2200 ...............................................[loss_test]: 5.590765710230227
The step is 2400 ...............[loss_train]: 6.0466113
The step is 2400 ...............................................[loss_test]: 5.527593824598524
The step is 2600 ...............[loss_train]: 6.0272145
The step is 2600 ...............................................[loss_test]: 5.481305769196263
The step is 2800 ...............[loss_train]: 6.009251
The step is 2800 ...............................................[loss_test]: 5.39416327741411
The step is 3000 ...............[loss_train]: 5.986756
The step is 3000 ...............................................[loss_test]: 5.324613496109292
The step is 3200 ...............[loss_train]: 5.9663205
The step is 3200 ...............................................[loss_test]: 5.252431878337154
The step is 3400 ...............[loss_train]: 5.944916
The step is 3400 ...............................................[loss_test]: 5.21808111888391
The step is 3600 ...............[loss_train]: 5.925144
The step is 3600 ...............................................[loss_test]: 5.162776152292888
The step is 3800 ...............[loss_train]: 5.9260387
The step is 3800 ...............................................[loss_test]: 5.124254010341786
The step is 4000 ...............[loss_train]: 5.907517
The step is 4000 ...............................................[loss_test]: 5.053566848790204
The step is 4200 ...............[loss_train]: 5.896279
The step is 4200 ...............................................[loss_test]: 5.008880564460048
The step is 4400 ...............[loss_train]: 5.8747034
The step is 4400 ...............................................[loss_test]: 4.971017649880162
The step is 4600 ...............[loss_train]: 5.867584
The step is 4600 ...............................................[loss_test]: 4.97145022727825
The step is 4800 ...............[loss_train]: 5.8515553
The step is 4800 ...............................................[loss_test]: 4.9247304885475724
The step is 5000 ...............[loss_train]: 5.853008
The step is 5000 ...............................................[loss_test]: 4.873373799853855
The step is 5200 ...............[loss_train]: 5.8374043
The step is 5200 ...............................................[loss_test]: 4.84392174968013
The step is 5400 ...............[loss_train]: 5.8179545
The step is 5400 ...............................................[loss_test]: 4.825992456188908
The step is 5600 ...............[loss_train]: 5.816919
The step is 5600 ...............................................[loss_test]: 4.960121079727456
The step is 5800 ...............[loss_train]: 5.815625
The step is 5800 ...............................................[loss_test]: 4.864058117071788
The step is 6000 ...............[loss_train]: 5.794406
The step is 6000 ...............................................[loss_test]: 4.727302915520138
The step is 6200 ...............[loss_train]: 5.7906203
The step is 6200 ...............................................[loss_test]: 4.630874850131847
The step is 6400 ...............[loss_train]: 5.782653
The step is 6400 ...............................................[loss_test]: 4.634813489737334
The step is 6800 ...............[loss_train]: 5.771205
The step is 6800 ...............................................[loss_test]: 4.5297522279951306
The step is 7000 ...............[loss_train]: 5.7540708
The step is 7000 ...............................................[loss_test]: 4.593969795438978
The step is 7200 ...............[loss_train]: 5.7515583
The step is 7200 ...............................................[loss_test]: 4.738888449139065
The step is 7400 ...............[loss_train]: 5.7449126
The step is 7400 ...............................................[loss_test]: 4.690183379031994
The step is 7600 ...............[loss_train]: 5.735485
The step is 7600 ...............................................[loss_test]: 4.6057106256484985
The step is 7800 ...............[loss_train]: 5.7293587
The step is 7800 ...............................................[loss_test]: 4.497223915877165
The step is 8000 ...............[loss_train]: 5.725987
The step is 8000 ...............................................[loss_test]: 4.648918041476497
The step is 8200 ...............[loss_train]: 5.715697
The step is 8200 ...............................................[loss_test]: 4.566120553899695
The step is 8400 ...............[loss_train]: 5.709465
The step is 8400 ...............................................[loss_test]: 4.5503166958137795
The step is 8600 ...............[loss_train]: 5.700506
The step is 8600 ...............................................[loss_test]: 4.492288523250156
The step is 8800 ...............[loss_train]: 5.6951985
The step is 8800 ...............................................[loss_test]: 4.453176644113329
The step is 9000 ...............[loss_train]: 5.6912746
The step is 9000 ...............................................[loss_test]: 4.381203682334335
The step is 9200 ...............[loss_train]: 5.6852994
The step is 9200 ...............................................[loss_test]: 4.302691402258696
The step is 9400 ...............[loss_train]: 5.6860147
The step is 9400 ...............................................[loss_test]: 4.259407895582694
The step is 9600 ...............[loss_train]: 5.6731906
The step is 9600 ...............................................[loss_test]: 4.197927722224483
The step is 9800 ...............[loss_train]: 5.6625967
The step is 9800 ...............................................[loss_test]: 4.210703657733069
The step is 10000 ...............[loss_train]: 5.6571145
The step is 10000 ...............................................[loss_test]: 4.1902374558978615
Ir = 0.01 跑了40000轮了…看看结果.
The step is 0 ...............[loss_train]: 5.656614
The step is 0 ...............................................[loss_test]: 63664.60655848185
The step is 200 ...............[loss_train]: 7.11886
The step is 200 ...............................................[loss_test]: 52.09049730830722
The step is 400 ...............[loss_train]: 6.4600625
The step is 400 ...............................................[loss_test]: 9.230565380167079
The step is 600 ...............[loss_train]: 6.1862617
The step is 600 ...............................................[loss_test]: 7.453303297360738
The step is 800 ...............[loss_train]: 6.0408626
The step is 800 ...............................................[loss_test]: 5.200981718522531
The step is 1000 ...............[loss_train]: 5.965068
The step is 1000 ...............................................[loss_test]: 4.875279819523847
The step is 1200 ...............[loss_train]: 5.908588
The step is 1200 ...............................................[loss_test]: 4.743478435057181
The step is 1400 ...............[loss_train]: 5.8607306
The step is 1400 ...............................................[loss_test]: 4.672507754078618
The step is 1600 ...............[loss_train]: 5.8314376
The step is 1600 ...............................................[loss_test]: 4.563611352885211
The step is 1800 ...............[loss_train]: 5.8040433
The step is 1800 ...............................................[loss_test]: 4.489517644599632
The step is 2000 ...............[loss_train]: 5.786041
The step is 2000 ...............................................[loss_test]: 4.4442360401153564
The step is 2200 ...............[loss_train]: 5.7753854
The step is 2200 ...............................................[loss_test]: 4.408069910826506
The step is 2400 ...............[loss_train]: 5.754279
The step is 2400 ...............................................[loss_test]: 4.439098349323979
The step is 2600 ...............[loss_train]: 5.741812
The step is 2600 ...............................................[loss_test]: 4.3816952308019
The step is 2800 ...............[loss_train]: 5.7278595
The step is 2800 ...............................................[loss_test]: 4.320185467048928
The step is 3000 ...............[loss_train]: 5.7178035
The step is 3000 ...............................................[loss_test]: 4.267243645809315
The step is 3200 ...............[loss_train]: 5.7073455
The step is 3200 ...............................................[loss_test]: 4.231665818779557
The step is 3400 ...............[loss_train]: 5.6986403
The step is 3400 ...............................................[loss_test]: 4.206608145325272
The step is 3600 ...............[loss_train]: 5.6875467
The step is 3600 ...............................................[loss_test]: 4.169648960784629
The step is 3800 ...............[loss_train]: 5.677865
The step is 3800 ...............................................[loss_test]: 4.150309984330778
The step is 4000 ...............[loss_train]: 5.6691937
The step is 4000 ...............................................[loss_test]: 4.1209083354031595
The step is 4200 ...............[loss_train]: 5.66471
The step is 4200 ...............................................[loss_test]: 4.105338900177567
The step is 4400 ...............[loss_train]: 5.6526175
The step is 4400 ...............................................[loss_test]: 4.073122616167422
The step is 4600 ...............[loss_train]: 5.647765
The step is 4600 ...............................................[loss_test]: 4.0576327862562955
The step is 4800 ...............[loss_train]: 5.6425214
The step is 4800 ...............................................[loss_test]: 4.033924257313764
The step is 5000 ...............[loss_train]: 5.6352935
The step is 5000 ...............................................[loss_test]: 4.021246839452673
The step is 5200 ...............[loss_train]: 5.6252394
The step is 5200 ...............................................[loss_test]: 3.984381507944178
The step is 5400 ...............[loss_train]: 5.6231766
The step is 5400 ...............................................[loss_test]: 3.9664067780530012
The step is 5600 ...............[loss_train]: 5.6154747
The step is 5600 ...............................................[loss_test]: 3.94403839552844
The step is 5800 ...............[loss_train]: 5.60753
The step is 5800 ...............................................[loss_test]: 3.9218364424175687
The step is 6000 ...............[loss_train]: 5.597889
The step is 6000 ...............................................[loss_test]: 3.901428116692437
The step is 6200 ...............[loss_train]: 5.593694
The step is 6200 ...............................................[loss_test]: 3.8766607929159096
The step is 6400 ...............[loss_train]: 5.585779
The step is 6400 ...............................................[loss_test]: 3.861407372686598
The step is 6600 ...............[loss_train]: 5.5833907
The step is 6600 ...............................................[loss_test]: 3.847714212205675
The step is 6800 ...............[loss_train]: 5.5749383
The step is 6800 ...............................................[loss_test]: 3.841452404304787
The step is 7000 ...............[loss_train]: 5.5737963
The step is 7000 ...............................................[loss_test]: 3.8388270183845803
The step is 7200 ...............[loss_train]: 5.566976
The step is 7200 ...............................................[loss_test]: 3.8146474493874445
The step is 7400 ...............[loss_train]: 5.561958
The step is 7400 ...............................................[loss_test]: 3.806270864274767
The step is 7600 ...............[loss_train]: 5.5528736
The step is 7600 ...............................................[loss_test]: 3.7607763166780823
The step is 7800 ...............[loss_train]: 5.551815
The step is 7800 ...............................................[loss_test]: 3.772628837161594
The step is 8000 ...............[loss_train]: 5.545028
The step is 8000 ...............................................[loss_test]: 3.766728259898998
The step is 8200 ...............[loss_train]: 5.5421486
The step is 8200 ...............................................[loss_test]: 3.7382217424887196
The step is 8400 ...............[loss_train]: 5.5393515
The step is 8400 ...............................................[loss_test]: 3.708954506450229
The step is 8600 ...............[loss_train]: 5.53146
The step is 8600 ...............................................[loss_test]: 3.685774811991939
The step is 8800 ...............[loss_train]: 5.5273795
The step is 8800 ...............................................[loss_test]: 3.668190426296658
The step is 9000 ...............[loss_train]: 5.5238485
The step is 9000 ...............................................[loss_test]: 3.6644358811555087
The step is 9200 ...............[loss_train]: 5.519959
The step is 9200 ...............................................[loss_test]: 3.6394993552455195
The step is 9400 ...............[loss_train]: 5.515499
The step is 9400 ...............................................[loss_test]: 3.6224264016857854
The step is 9600 ...............[loss_train]: 5.5084605
The step is 9600 ...............................................[loss_test]: 3.5936923887994556
The step is 9800 ...............[loss_train]: 5.5047555
The step is 9800 ...............................................[loss_test]: 3.5875721088162176
The step is 10000 ...............[loss_train]: 5.5000877
The step is 10000 ...............................................[loss_test]: 3.568454658543622
再来10000轮
…刚才校园网崩了, 不过跑了9000轮效果一般(收敛到loss=3.1那样),继续改小Ir, 理论上如此进行下去可以收敛到loss<1或者更小(即误差可接受范围)
后续博主也会写一个完整的博客从数据提取到完成相关材料,记录整个任务的全过程, 此贴暂时结束