记录：多元非线性回归网络模型结构调整经历

Leisureconfused

已于 2022-10-06 21:36:23 修改

阅读量2.3k

点赞数 2

分类专栏： tensorflow学习笔记文章标签：回归 python 深度学习

于 2022-10-04 09:08:10 首次发布

本文链接：https://blog.csdn.net/k903161661/article/details/127155720

版权

tensorflow学习同时被 2 个专栏收录

15 篇文章 1 订阅

订阅专栏

笔记

2 篇文章 0 订阅

订阅专栏

任务描述

博主之前没做过此类工作，于是打算记录下这次从0开始进行模型优化的经历。
初步的输入数据为2dim 输出为1dim, 映射关系为非线性的(实际任务需求是3维的输入1维度的输出(这里在8.会进行修改, 变为3维数据的输入即本来的任务)，这里仅考虑二维是由于其中1维为天然离散，且降低维度后或许更加便于处理(?))

此外在后续的观察中发现数据集具有较高的不均匀性, 绘制散点图入如下：
在这里插入图片描述

初始模型结构

'''导入的CSV文件的前三列分别为层数、TiO2层厚度和外加电压, 此为神经网络的输入
   第四列为所激发的光电流大小，此为网络输入(或者说预测量)'''

import csv
import numpy as np
import matplotlib.pyplot as plt
from keras.layers import Layer
from keras import backend as K
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import SGD
from keras.optimizers import adam
from keras.layers import Activation
import keras

from pylab import *
from mpl_toolkits.mplot3d import Axes3D


'''导入csv文件'''
csv_train = np.loadtxt(open("./data/2D_in_without_N/U&T&I_1L.csv","rb"),delimiter=",",skiprows=0)#返回的数据为ndarry
print('train.csv导入成功!')
# 测试集姑且搁置, 如果在原始数据上都无法很好的收敛那这测试集不要也罢(doge)
csv_test = np.loadtxt(open("./data/2D_in_without_N/U&T&I_1L.csv","rb"),delimiter=",",skiprows=0)#返回的数据为ndarry
print('test.csv导入成功!')
# data_shape = csv_data.shape #返回数据的维度 
# data_dim = csv_data.ndim #ndarry的秩
# [m, n] = data_shape # 返回数据的行数和列数
# print("cav_data.dim = ",data_dim)
# print("cav_data.shape = ",data_shape)
# print("cav_data m ={0}, cav_data m ={1}".format(m, n))

'''选取训练集及测试集'''
# 训练集
X_train = csv_train[:,0:2]# [:,np.newaxis]
Y_train = csv_train[:,2]
# 测试集
X_test = csv_test[:,0:2]
Y_test = csv_test[:,2]

'''检查输入形状'''
# print("==============================================================================")
# print(X_train[:, 0].shape)
# print(X_train)
# print("==============================================================================")
# print(Y_train)
# print(Y_train.shape)

'''绘制原始数据图像'''
# # print(Z_pred.shape)

# fig = figure(figsize=(16, 9))
# # ax = Axes3D(fig)
# ax = Axes3D(fig,auto_add_to_figure=False)
# fig.add_axes(ax)

# X = csv_train[:, 0]
# Y = csv_train[:, 1]
# Z = csv_train[:, 2]

# '''数据切片(减少计算量)'''
# s = slice(0, 1420, 2)
# x = X[s]
# y = Y[s]
# z = Z[s]
# z_ = Z_pred[s]
# # print(x.shape)

# # 绘制网格
# X, Y = np.meshgrid(X, Y)

# '''形状检查'''
# # print("==============================================================================")
# # print(X.shape)
# # print(Y.shape)
# # print(Z.shape)
# # print("==============================================================================")


# '''绘制'''
# ax.scatter(x, y, z)
# # ax.plot_surface(x, y, z_, rstride=1, cstride=1, cmap=plt.get_cmap('rainbow'))

# # 加标签(坐标轴)
# ax.set_zlabel('I', fontdict={'size': 15, 'color': 'red'})
# ax.set_ylabel('U', fontdict={'size': 15, 'color': 'blue'})
# ax.set_xlabel('Thickness', fontdict={'size': 15, 'color': 'green'})

# plt.show()



'''设置网络结构'''
# 创建RBF层
class RBFLayer(Layer):
    def __init__(self, units, gamma, **kwargs):
        super(RBFLayer, self).__init__(**kwargs)
        self.units = units
        self.gamma = K.cast_to_floatx(gamma)

    def build(self, input_shape):
        self.mu = self.add_weight(name='mu',
                                  shape=(int(input_shape[1]), self.units),
                                  initializer='uniform',
                                  trainable=True)
        super(RBFLayer, self).build(input_shape)

    def call(self, inputs):
        diff = K.expand_dims(inputs) - self.mu
        l2 = K.sum(K.pow(diff,2), axis=1)
        res = K.exp(-1 * self.gamma * l2)
        return res

    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.units)

model = Sequential()

'''第一层'''
model.add(Dense(units=128, input_dim = 2, kernel_initializer=keras.initializers.random_normal(stddev=0.01)))
model.add(keras.layers.normalization.BatchNormalization())
# model.add(Activation(keras.layers.advanced_activations.LeakyReLU(alpha=0.2)))
# 增加leakyrelu可以显著降低lost
model.add(keras.layers.advanced_activations.LeakyReLU(alpha=0.2))

'''第二层'''
model.add(Dense(units=32, input_dim = 128, kernel_initializer=keras.initializers.random_normal(stddev=0.01)))
model.add(keras.layers.normalization.BatchNormalization())
model.add(keras.layers.advanced_activations.LeakyReLU(alpha=0.2))
# model.add(Activation('tanh'))

'''第三层'''
# model.add(RBFLayer(32, 0.5))
# dropout严重降低了回归结果
# model.add(keras.layers.Dropout(0.5))
model.add(Dense(units=8, input_dim=32, kernel_initializer=keras.initializers.random_normal(stddev=0.01)))
# model.add(Activation('tanh'))

'''第四层'''
model.add(Dense(units=1, input_dim = 8, kernel_initializer=keras.initializers.random_normal(stddev=0.01)))
model.add(keras.layers.normalization.BatchNormalization())

'''激活函数'''
# model.add(Activation('elu'))            # 一阶段收敛于 <cost = 120>
# model.add(Activation('selu'))           # 一阶段收敛于 <cost = 111>
# model.add(Activation('softplus'))       # 一阶段收敛于 <cost = 131>
# model.add(Activation('softsign'))       # 一阶段收敛于 <cost = 284>
# model.add(Activation('relu'))           # 一阶段收敛于 <cost = 131>
# model.add(Activation('tanh'))           # 一阶段收敛于 <cost = 281>
# model.add(Activation('sigmoid'))        # 一阶段收敛于 <cost = 293>
# model.add(Activation('hard_sigmoid'))   # 一阶段收敛于 <cost = 293>
# model.add(Activation('exponential'))    # 一阶段收敛于 <cost = 131>
model.add(Activation('linear'))           # 一阶段收敛于 <cost = 0.24>

'''绘制网络结构'''
model.summary()

'''选择优化器'''
Adam = keras.optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=True)  # 二阶段在<step = 100000> 收敛于 <cost = 10 >
# SGD 因为更新比较频繁，会造成 cost function 有严重的震荡
sgd = SGD(lr=5)                                                                                         # Nan
RMSprop = keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0)                          # 二阶段在<step = 100000> 收敛于 <cost = 10000>
Adagrad = keras.optimizers.Adagrad(lr=0.01, epsilon=None, decay=0.0)                                    # 二阶段在<step = 100000> 收敛于 <cost = 50000>
Adadelta = keras.optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0)                         # 二阶段在<step = 70000>  收敛于 <cost = 250> 
Adamax = keras.optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)           # 二阶段在<step = 100000> 收敛于 <cost = 22 >
Nadam = keras.optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004)  # 二阶段在<step = 110000> 收敛于 <cost = 30 > 

# 设置优化器和损失函数
model.compile(optimizer=Adam, loss='mse')

'''训练'''
for step in range(50001):
   loss = model.train_on_batch(X_train, Y_train)
   # loss,accuracy = model.evaluate(X_test, Y_test, verbose=0)
   # print(model.evaluate(X_test,Y_test))
   if step % 500 == 0:
       print("The step is ", step , "......................................" + '[loss]:', loss)
             # "............." + '[cost, acc_train]:', train_cost_acc,
             # "............." + '[lost, acc_test]:', loss, accuracy)





'''plt绘图'''
# # 2D
# # y_pred = model.predict(X_train)
# # plt.figure(figsize=(16, 9))
# # plt.subplot(1,3,1)
# # plt.scatter(X_train[:, 0], Y_train)
# # plt.plot(X_train[:, 0], y_pred, 'r-', lw=3)

# # plt.subplot(1,3,2)
# # plt.scatter(X_train[:, 1], Y_train)
# # plt.plot(X_train[:, 1], y_pred, 'r-', lw=3)

# # plt.subplot(1,3,3)
# # plt.scatter(X_train[:, 2], Y_train)
# # plt.plot(X_train[:, 2], y_pred, 'r-', lw=3)
# # plt.show()

# 3D
# Z_pred = model.predict(X_train)
# # print(Z_pred.shape)

# fig = figure(figsize=(16, 9))
# # ax = Axes3D(fig)
# ax = Axes3D(fig,auto_add_to_figure=False)
# fig.add_axes(ax)

# X = X_train[:, 0]
# Y = X_train[:, 1]
# Z = csv_train[:, 2]

# '''数据切片(减少计算量)'''
# s = slice(0, 1420, 2)
# x = X[s]
# y = Y[s]
# z = Z[s]
# z_ = Z_pred[s]
# # print(x.shape)

# # 绘制网格
# X, Y = np.meshgrid(X, Y)

# '''形状检查'''
# # print("==============================================================================")
# # print(X.shape)
# # print(Y.shape)
# # print(Z.shape)
# # print("==============================================================================")


# '''绘制'''
# ax.scatter(x, y, z)
# # ax.plot_surface(x, y, z_, rstride=1, cstride=1, cmap=plt.get_cmap('rainbow'))

# # 加标签(坐标轴)
# ax.set_zlabel('I', fontdict={'size': 15, 'color': 'red'})
# ax.set_ylabel('U', fontdict={'size': 15, 'color': 'blue'})
# ax.set_xlabel('Thickness', fontdict={'size': 15, 'color': 'green'})

# plt.show()

# ax.set_zlabel('I', fontdict={'size': 15, 'color': 'red'})
# ax.set_ylabel('Thickness', fontdict={'size': 15, 'color': 'blue'})
# ax.set_xlabel('layer', fontdict={'size': 15, 'color': 'green'})

# show()


'''保存模型'''
save_path = r'D:\anaconda\project\Spectral_analysis\model_save\3D\U&T&I_1L_weight.h5'
model.save_weights(save_path)
# model.save(save_path)  # 创建 HDF5 文件 'my_model.h5'
# model = load_model('my_model.h5')






# '''模型可视化(Graphviz)'''
# from keras.utils import plot_model
# plot_model(model, to_file='model.png')

过程

step=100000 左右

1. 加入Dropout

loss大幅上升

2. leakyrelu换为普通relu

无明显影响

3. 加入RBFLayer

无明显影响

4. 增加层数

无明显影响

5. 更换优化器

(1)RMSprop

loss值很大波动, 但其总趋势收敛, 在100000轮次收敛于30左右

穷举法都试了一边, 发现模型进入瓶颈了, 猜测只能通过更换结构进行新的尝试(这里以未来人的身份先吐槽一下前两天的自己, 为啥不好好看看Adagrad…这个真的很适合QAQ)

6. 加深网络结构, 加大参数量

由：
在这里插入图片描述
改为：

实际并未收敛…

7. 于前几层内加入tanh激活函数层

在这里插入图片描述 130k轮后模型收敛至loss=89，收敛效率较大下降, 但最终是否会和往常一样陷入局部最优解尚未可知

200k轮后收敛于loss=30, 貌似还有收敛的空间, 继续进行第三轮100k轮训练,

250k轮后模型收敛于loss=11

跑了两个五十万轮, 基本上是确定收敛到8.9这样子了
在这里插入图片描述

8. 查阅了相关资料后发现优化器Adagrad在处理分布不均匀的数据时会有不错的表现

由是对优化器进行了调整, 并重新采用了较为简单的网络结构即多层感知机配合BN层和LeakyRelu函数对网络模型进行搭建，完整代码如下(这里输入维度变为三个, 也就是完整版的回归模型)：

'''导入的CSV文件的前三列分别为层数、TiO2层厚度和外加电压, 此为神经网络的输入
   第四列为所激发的光电流大小，此为网络输入(或者说预测量)'''

import csv
import numpy as np
import matplotlib.pyplot as plt
from keras.layers import Layer
from keras import backend as K
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import SGD
from keras.optimizers import adam
from keras.layers import Activation
import keras

from pylab import *
from mpl_toolkits.mplot3d import Axes3D

# 文件保存路径
path = './model_save/4D/'


'''导入csv文件'''
csv_train = np.loadtxt(open("./data/mess/train1.csv","rb"),delimiter=",",skiprows=0)#返回的数据为ndarry
print('train.csv导入成功!')

csv_test = np.loadtxt(open("./data/mess/test.csv","rb"),delimiter=",",skiprows=0)#返回的数据为ndarry
print('test.csv导入成功!')
# data_shape = csv_data.shape #返回数据的维度 
# data_dim = csv_data.ndim #ndarry的秩
# [m, n] = data_shape # 返回数据的行数和列数
# print("cav_data.dim = ",data_dim)
# print("cav_data.shape = ",data_shape)
# print("cav_data m ={0}, cav_data m ={1}".format(m, n))

'''选取训练集及测试集'''
# 训练集
X_train = csv_train[:,0:3]# [:,np.newaxis]
Y_train = csv_train[:,3]
# 测试集
X_test = csv_test[:,0:3]
Y_test = csv_test[:,3]

'''检查输入形状'''
# print("==============================================================================")
# print(X_train[:, 0].shape)
# print(X_train)
# print("==============================================================================")
# print(Y_train)
# print(Y_train.shape)

'''绘制原始数据图像'''
# plt.scatter(X_train,Y_train, c='r', marker='x')
# plt.xlabel('X_train')
# plt.ylabel('Y_train')
# plt.show()

'''设置网络结构'''
# 创建RBF层
'''
class RBFLayer(Layer):
    def __init__(self, units, gamma, **kwargs):
        super(RBFLayer, self).__init__(**kwargs)
        self.units = units
        self.gamma = K.cast_to_floatx(gamma)

    def build(self, input_shape):
        self.mu = self.add_weight(name='mu',
                                  shape=(int(input_shape[1]), self.units),
                                  initializer='uniform',
                                  trainable=True)
        super(RBFLayer, self).build(input_shape)

    def call(self, inputs):
        diff = K.expand_dims(inputs) - self.mu
        l2 = K.sum(K.pow(diff,2), axis=1)
        res = K.exp(-1 * self.gamma * l2)
        return res

    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.units)
'''

model = Sequential()

'''第一层'''
model.add(Dense(units=128, input_dim = 3))
model.add(keras.layers.normalization.BatchNormalization())
model.add(Activation(keras.layers.advanced_activations.LeakyReLU(alpha=0.2)))

'''第二层'''
model.add(Dense(units=32, input_dim = 128))
model.add(keras.layers.normalization.BatchNormalization())
model.add(Activation(keras.layers.advanced_activations.LeakyReLU(alpha=0.2)))
# model.add(Activation('tanh'))

'''第三层'''
# model.add(RBFLayer(20, 0.5))
# model.add(keras.layers.Dropout(0.5))
model.add(Dense(units=32, input_dim=32))
model.add(keras.layers.normalization.BatchNormalization())
model.add(Activation(keras.layers.advanced_activations.LeakyReLU(alpha=0.2)))
# model.add(Activation('tanh'))

'''第四层'''
model.add(Dense(units=8, input_dim = 32, kernel_initializer=keras.initializers.random_normal(stddev=0.01)))
model.add(keras.layers.normalization.BatchNormalization())
model.add(Activation(keras.layers.advanced_activations.LeakyReLU(alpha=0.2)))

'''第五层'''
model.add(Dense(units=1, input_dim = 8, kernel_initializer=keras.initializers.random_normal(stddev=0.01)))
# model.add(keras.layers.normalization.BatchNormalization())
# model.add(Activation(keras.layers.advanced_activations.LeakyReLU(alpha=0.2)))

'''激活函数'''
# model.add(Activation('elu'))            # 一阶段收敛于 <cost = 120>
# model.add(Activation('selu'))           # 一阶段收敛于 <cost = 111>
# model.add(Activation('softplus'))       # 一阶段收敛于 <cost = 131>
# model.add(Activation('softsign'))       # 一阶段收敛于 <cost = 284>
# model.add(Activation('relu'))           # 一阶段收敛于 <cost = 131>
# model.add(Activation('tanh'))           # 一阶段收敛于 <cost = 281>
# model.add(Activation('sigmoid'))        # 一阶段收敛于 <cost = 293>
# model.add(Activation('hard_sigmoid'))   # 一阶段收敛于 <cost = 293>
# model.add(Activation('exponential'))    # 一阶段收敛于 <cost = 131>
model.add(Activation('linear'))           # 一阶段收敛于 <cost = 0.24>

'''绘制网络结构'''
model.summary()

'''选择优化器'''
Adam = keras.optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=True)  # 二阶段在<step = 100000> 收敛于 <cost = 22 >
# SGD 因为更新比较频繁，会造成 cost function 有严重的震荡
sgd = SGD(lr=5)                                                                                         # Nan
RMSprop = keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0)                          # 二阶段在<step = 100000> 收敛于 <cost = 10000>
Adagrad = keras.optimizers.Adagrad(lr=0.00001, epsilon=None, decay=0.0)                                    # 二阶段在<step = 100000> 收敛于 <cost = 50000>
Adadelta = keras.optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0)                         # 二阶段在<step = 70000>  收敛于 <cost = 250> 
Adamax = keras.optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)           # 二阶段在<step = 100000> 收敛于 <cost = 22 >
Nadam = keras.optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004)  # 二阶段在<step = 110000> 收敛于 <cost = 30 > 

# 设置优化器和损失函数
model.compile(optimizer=Adagrad, loss='mse')

# 导入模型
model.load_weights(path + 'loss=4.202981233596802.h5')

'''训练'''
for step in range(10001):
    loss_train = model.train_on_batch(X_train, Y_train)
    loss_test = model.evaluate(X_test, Y_test, verbose=0)
    # print(model.evaluate(X_test,Y_test))
    if step % 100 == 0:
        print("The step is ", step , "..............." + '[loss_train]:', loss_train)
        print("The step is ", step , "..............................................." + '[loss_test]:', loss_test)
        if loss_test < 3 :
            save_path = path + 'step=' + str(step) + '&loss=' + str(loss_test) + '.h5'
            model.save_weights(save_path)


'''保存模型'''
save_path = path + 'loss=' + str(loss_test) + '.h5'
model.save_weights(save_path)


'''plt绘图'''
# # 2D
# # y_pred = model.predict(X_train)
# # plt.figure(figsize=(16, 9))
# # plt.subplot(1,3,1)
# # plt.scatter(X_train[:, 0], Y_train)
# # plt.plot(X_train[:, 0], y_pred, 'r-', lw=3)

# # plt.subplot(1,3,2)
# # plt.scatter(X_train[:, 1], Y_train)
# # plt.plot(X_train[:, 1], y_pred, 'r-', lw=3)

# # plt.subplot(1,3,3)
# # plt.scatter(X_train[:, 2], Y_train)
# # plt.plot(X_train[:, 2], y_pred, 'r-', lw=3)
# # plt.show()

# # 3D
# Z_pred = model.predict(X_train)
# # print(Z_pred.shape)

# fig = figure(figsize=(16, 9))
# ax = Axes3D(fig)

# X = X_train[:, 0]
# Y = X_train[:, 1]
# Z = csv_train[:, 3]
# # X, Y = np.meshgrid(X, Y)

# '''形状检查'''
# # print("==============================================================================")
# # print(X.shape)
# # print(Y.shape)
# # print(Z.shape)
# # print("==============================================================================")

# '''数据切片'''
# s = slice(0, 2200, 8)
# x = X[s]
# y = Y[s]
# z = Z[s]
# z_ = Z_pred[s]
# # print(x.shape)

# '''绘图'''
# ax.scatter(x, y, z)
# ax.plot_surface(x, y, z_, rstride=1, cstride=1, cmap='hot')

# # 加标签(坐标轴)
# ax.set_zlabel('I', fontdict={'size': 15, 'color': 'red'})
# ax.set_ylabel('Thickness', fontdict={'size': 15, 'color': 'blue'})
# ax.set_xlabel('layer', fontdict={'size': 15, 'color': 'green'})

# show()








# '''模型可视化(Graphviz)'''
# # from keras.utils import plot_model
# # plot_model(model, to_file='model.png')

开始Ir设定为0.1，模型在20000轮后loss值从大约20k降至到0.1k左右，此时便可以调节学习率进行不断使得模型收敛，经过大概几十万轮和数次降低Ir数量级后可结果如下:
在这里插入图片描述

发现模型虽然可以收敛但其收敛速度缓慢，猜测是否可通过增加感知机的层数进行进一步优化

9. 保留当前层块结构并增加层数


'''此处为TF2的代码, 运行于阿里云天池实验室, 仅作为逻辑验证即结构分析使用'''
import csv
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Layer
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Activation

from pylab import *
from mpl_toolkits.mplot3d import Axes3D



'''设置路径'''
data_path = './Data/'
save_path = './Model_save/'


'''导入csv文件'''
csv_train = np.loadtxt(open(data_path + "train1.csv","rb"),delimiter=",",skiprows=0)#返回的数据为ndarry
print('train.csv导入成功!')

csv_test = np.loadtxt(open(data_path + "test.csv","rb"),delimiter=",",skiprows=0)#返回的数据为ndarry
print('test.csv导入成功!')
# data_shape = csv_data.shape #返回数据的维度 
# data_dim = csv_data.ndim #ndarry的秩
# [m, n] = data_shape # 返回数据的行数和列数
# print("cav_data.dim = ",data_dim)
# print("cav_data.shape = ",data_shape)
# print("cav_data m ={0}, cav_data m ={1}".format(m, n))

'''选取训练集及测试集'''
# 训练集
X_train = csv_train[:,0:3]# [:,np.newaxis]
Y_train = csv_train[:,3]
# 测试集
X_test = csv_test[:,0:3]
Y_test = csv_test[:,3]

'''形状检查'''
print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)


'''设置网络结构'''
model = Sequential()


model.add(Dense(units=256, input_dim = 3, use_bias=True, kernel_initializer='glorot_uniform',bias_initializer='zeros'))
model.add(tf.keras.layers.BatchNormalization())
# model.add(Activation(keras.layers.advanced_activations.LeakyReLU(alpha=0.2)))
# 增加leakyrelu可以显著降低lost
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))

model.add(Dense(units=128, input_dim = 256, use_bias=True, kernel_initializer='glorot_uniform',bias_initializer='zeros'))
model.add(tf.keras.layers.BatchNormalization())
# model.add(Activation(keras.layers.advanced_activations.LeakyReLU(alpha=0.2)))
# 增加leakyrelu可以显著降低lost
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))


model.add(Dense(units=64, input_dim = 128, use_bias=True, kernel_initializer='glorot_uniform',bias_initializer='zeros'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# model.add(Activation('tanh'))

model.add(Dense(units=32, input_dim = 64, use_bias=True, kernel_initializer='glorot_uniform',bias_initializer='zeros'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))

# model.add(RBFLayer(32, 0.5))
# dropout严重降低了回归结果
# model.add(keras.layers.Dropout(0.5))
model.add(Dense(units=16, input_dim=32, use_bias=True, kernel_initializer='glorot_uniform',bias_initializer='zeros'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# model.add(Activation('tanh'))


model.add(Dense(units=8, input_dim=16, use_bias=True, kernel_initializer='glorot_uniform',bias_initializer='zeros'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))

model.add(Dense(units=4, input_dim=8, use_bias=True, kernel_initializer='glorot_uniform',bias_initializer='zeros'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))

model.add(Dense(units=2, input_dim=4, use_bias=True, kernel_initializer='glorot_uniform',bias_initializer='zeros'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))

model.add(Dense(units=1, input_dim = 2, use_bias=False, kernel_initializer='glorot_uniform',bias_initializer='zeros'))
# model.add(keras.layers.normalization.BatchNormalization())


'''激活函数'''
# model.add(Activation('elu'))            # 一阶段收敛于 <cost = 120>
# model.add(Activation('selu'))           # 一阶段收敛于 <cost = 111>
# model.add(Activation('softplus'))       # 一阶段收敛于 <cost = 131>
# model.add(Activation('softsign'))       # 一阶段收敛于 <cost = 284>
# model.add(Activation('relu'))           # 一阶段收敛于 <cost = 131>
# model.add(Activation('tanh'))           # 一阶段收敛于 <cost = 281>
# model.add(Activation('sigmoid'))        # 一阶段收敛于 <cost = 293>
# model.add(Activation('hard_sigmoid'))   # 一阶段收敛于 <cost = 293>
# model.add(Activation('exponential'))    # 一阶段收敛于 <cost = 131>
# model.add(Activation('linear'))           # 一阶段收敛于 <cost = 0.24>

'''绘制网络结构'''
model.summary()

'''选择优化器
# SGD 因为更新比较频繁，会造成 cost function 有严重的震荡
sgd = SGD(lr=5)      
Adam = tf.keras.optimizers.Adam(lr=0.005, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=True)  # 二阶段在<step = 100000> 收敛于 <cost = 10 >                                                                                   # Nan
RMSprop = tf.keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0)                          # 二阶段在<step = 100000> 收敛于 <cost = 10000>
Adagrad = tf.keras.optimizers.Adagrad(lr=0.001, epsilon=None, decay=0.0)                                    # 二阶段在<step = 100000> 收敛于 <cost = 50000>
Adadelta = tf.keras.optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0)                         # 二阶段在<step = 70000>  收敛于 <cost = 250> 
Adamax = tf.keras.optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)           # 二阶段在<step = 100000> 收敛于 <cost = 22 >
Nadam = tf.keras.optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004)  # 二阶段在<step = 110000> 收敛于 <cost = 30 > 

# 设置优化器和损失函数
model.compile(optimizer=Adagrad, loss='mse')
'''


# 导入模型
# model.load_weights(path + '&loss=11.725552.h5')
Adagrad = tf.keras.optimizers.Adagrad(lr=0.1, epsilon=None, decay=0.0)
model.compile(optimizer=Adagrad, loss='mse')

'''训练'''
for step in range(20001):
    loss_train = model.train_on_batch(X_train, Y_train)
    loss_test = model.evaluate(X_test, Y_test, verbose=0)
    # print(model.evaluate(X_test,Y_test))
    if step % 200 == 0:
        print("The step is ", step , "..............." + '[loss_train]:', loss_train)
        print("The step is ", step , "..............................................." + '[loss_test]:', loss_test)

             # "............." + '[cost, acc_train]:', train_cost_acc,
             # "............." + '[lost, acc_test]:', loss, accuracy)
        if loss_test < 10 :
            break # 这里顺带一提, 最好多遍历几遍再确定是否要减小学习率, 而不是看到了就break，因为对于复杂情况可能会陷入局部最优解这个坑, 博主是知道数据结构(结合上面的三维图和离散的第四维度的明显下降趋势)下才这样break的
            # path = './model_weight' + 'step=' + str(step) + '&loss=' + str(loss_test) + '.h5'
            # model.save_weights(path)

model.summary()结果如下：

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense (Dense)                (None, 256)               1024      
_________________________________________________________________
batch_normalization (BatchNo (None, 256)               1024      
_________________________________________________________________
leaky_re_lu (LeakyReLU)      (None, 256)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               32896     
_________________________________________________________________
batch_normalization_1 (Batch (None, 128)               512       
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 128)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 64)                8256      
_________________________________________________________________
batch_normalization_2 (Batch (None, 64)                256       
_________________________________________________________________
leaky_re_lu_2 (LeakyReLU)    (None, 64)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 32)                2080      
_________________________________________________________________
batch_normalization_3 (Batch (None, 32)                128       
_________________________________________________________________
leaky_re_lu_3 (LeakyReLU)    (None, 32)                0         
_________________________________________________________________
dense_4 (Dense)              (None, 16)                528       
_________________________________________________________________
batch_normalization_4 (Batch (None, 16)                64        
_________________________________________________________________
leaky_re_lu_4 (LeakyReLU)    (None, 16)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 8)                 136       
_________________________________________________________________
batch_normalization_5 (Batch (None, 8)                 32        
_________________________________________________________________
leaky_re_lu_5 (LeakyReLU)    (None, 8)                 0         
_________________________________________________________________
dense_6 (Dense)              (None, 4)                 36        
_________________________________________________________________
batch_normalization_6 (Batch (None, 4)                 16        
_________________________________________________________________
leaky_re_lu_6 (LeakyReLU)    (None, 4)                 0         
_________________________________________________________________
dense_7 (Dense)              (None, 2)                 10        
_________________________________________________________________
batch_normalization_7 (Batch (None, 2)                 8         
_________________________________________________________________
leaky_re_lu_7 (LeakyReLU)    (None, 2)                 0         
_________________________________________________________________
dense_8 (Dense)              (None, 1)                 2         
=================================================================
Total params: 47,008
Trainable params: 45,988
Non-trainable params: 1,020
_________________________________________________________________

首先先开大学习率进行查找
结果如下

The step is  0 ...............[loss_train]: 114611.71
The step is  0 ...............................................[loss_test]: 172391.3114601418
The step is  200 ...............[loss_train]: 83820.58
The step is  200 ...............................................[loss_test]: 203178.7780597828
The step is  400 ...............[loss_train]: 56734.734
The step is  400 ...............................................[loss_test]: 107056.16631260625
The step is  600 ...............[loss_train]: 39401.14
The step is  600 ...............................................[loss_test]: 105926.1129466516
The step is  800 ...............[loss_train]: 26855.354
The step is  800 ...............................................[loss_test]: 37229.10588156735
The step is  1000 ...............[loss_train]: 18211.143
The step is  1000 ...............................................[loss_test]: 25702.297907793964
The step is  1200 ...............[loss_train]: 12521.096
The step is  1200 ...............................................[loss_test]: 20885.482175332527
The step is  1400 ...............[loss_train]: 8289.523
The step is  1400 ...............................................[loss_test]: 20825.459784613715
The step is  1600 ...............[loss_train]: 5598.1313
The step is  1600 ...............................................[loss_test]: 7263.409276891638
The step is  1800 ...............[loss_train]: 3779.045
The step is  1800 ...............................................[loss_test]: 11323.699178059896
The step is  2000 ...............[loss_train]: 2734.4216
The step is  2000 ...............................................[loss_test]: 10499.821085611979
The step is  2200 ...............[loss_train]: 2066.2224
The step is  2200 ...............................................[loss_test]: 11231.576365152994
The step is  2400 ...............[loss_train]: 1399.5436
The step is  2400 ...............................................[loss_test]: 4327.667751736111
The step is  2600 ...............[loss_train]: 942.9171
The step is  2600 ...............................................[loss_test]: 999.5631623444734
The step is  2800 ...............[loss_train]: 690.4152
The step is  2800 ...............................................[loss_test]: 958.5449817798756
The step is  3000 ...............[loss_train]: 552.9158
The step is  3000 ...............................................[loss_test]: 1946.2712932162815
The step is  3200 ...............[loss_train]: 438.0123
The step is  3200 ...............................................[loss_test]: 322.64597264042607
The step is  3400 ...............[loss_train]: 381.48965
The step is  3400 ...............................................[loss_test]: 878.8487818682635
The step is  3600 ...............[loss_train]: 327.04526
The step is  3600 ...............................................[loss_test]: 386.65829693829573
The step is  3800 ...............[loss_train]: 286.7559
The step is  3800 ...............................................[loss_test]: 161.48108376397028
The step is  4000 ...............[loss_train]: 271.89587
The step is  4000 ...............................................[loss_test]: 94.23103749310529
The step is  4200 ...............[loss_train]: 280.47284
The step is  4200 ...............................................[loss_test]: 539.7342062702885
The step is  4400 ...............[loss_train]: 228.70996
The step is  4400 ...............................................[loss_test]: 480.16451263427734
The step is  4600 ...............[loss_train]: 210.3707
The step is  4600 ...............................................[loss_test]: 138.39223879354972
The step is  4800 ...............[loss_train]: 211.48886
The step is  4800 ...............................................[loss_test]: 1558.2459713971173
The step is  5000 ...............[loss_train]: 202.70021
The step is  5000 ...............................................[loss_test]: 610.9397009390372
The step is  5200 ...............[loss_train]: 189.43668
The step is  5200 ...............................................[loss_test]: 189.83002500180845
The step is  5400 ...............[loss_train]: 170.43565
The step is  5400 ...............................................[loss_test]: 263.44775955765334
The step is  5600 ...............[loss_train]: 167.36116
The step is  5600 ...............................................[loss_test]: 199.97994754932546
The step is  5800 ...............[loss_train]: 154.43687
The step is  5800 ...............................................[loss_test]: 114.9971363456161
The step is  6000 ...............[loss_train]: 154.35269
The step is  6000 ...............................................[loss_test]: 268.57427427503796
The step is  6200 ...............[loss_train]: 138.18042
The step is  6200 ...............................................[loss_test]: 98.96797434488933
The step is  6400 ...............[loss_train]: 132.2034
The step is  6400 ...............................................[loss_test]: 60.25527487860786
The step is  6600 ...............[loss_train]: 126.0921
The step is  6600 ...............................................[loss_test]: 230.91299975359883
The step is  6800 ...............[loss_train]: 120.14387
The step is  6800 ...............................................[loss_test]: 179.14771440294055
The step is  7000 ...............[loss_train]: 113.09017
The step is  7000 ...............................................[loss_test]: 274.64273615236635
The step is  7200 ...............[loss_train]: 106.633606
The step is  7200 ...............................................[loss_test]: 75.57680504410355
The step is  7400 ...............[loss_train]: 102.05847
The step is  7400 ...............................................[loss_test]: 164.7102032414189
The step is  7600 ...............[loss_train]: 98.3386
The step is  7600 ...............................................[loss_test]: 60.37751501577872
The step is  7800 ...............[loss_train]: 93.270706
The step is  7800 ...............................................[loss_test]: 87.26740183653655
The step is  8000 ...............[loss_train]: 90.41298
The step is  8000 ...............................................[loss_test]: 87.52722210354275
The step is  8200 ...............[loss_train]: 86.37759
The step is  8200 ...............................................[loss_test]: 37.38890177232248

基本可以确定该模型可以在10000轮内收敛至loss<50，接下来就继续逐渐降低学习率进行逐步逼近即可，记录如下

学习率降为0.01, 轮次设定为10000
结果如下：

The step is  0 ...............[loss_train]: 86.42706
The step is  0 ...............................................[loss_test]: 176584.0962818287
The step is  200 ...............[loss_train]: 55.15376
The step is  200 ...............................................[loss_test]: 36.63536894762957
The step is  400 ...............[loss_train]: 43.339535
The step is  400 ...............................................[loss_test]: 25.461906759827226
The step is  600 ...............[loss_train]: 35.98868
The step is  600 ...............................................[loss_test]: 22.652673491725213
The step is  800 ...............[loss_train]: 30.745687
The step is  800 ...............................................[loss_test]: 21.072192280380815
The step is  1000 ...............[loss_train]: 26.83507
The step is  1000 ...............................................[loss_test]: 20.187618140821105
The step is  1200 ...............[loss_train]: 23.76442
The step is  1200 ...............................................[loss_test]: 19.8514552337152
The step is  1400 ...............[loss_train]: 21.318758
The step is  1400 ...............................................[loss_test]: 18.98354634090706
The step is  1600 ...............[loss_train]: 19.318905
The step is  1600 ...............................................[loss_test]: 18.71392482298392
The step is  1800 ...............[loss_train]: 17.67323
The step is  1800 ...............................................[loss_test]: 17.41641167799632
The step is  2000 ...............[loss_train]: 16.340149
The step is  2000 ...............................................[loss_test]: 17.108064192312735
The step is  2200 ...............[loss_train]: 15.204404
The step is  2200 ...............................................[loss_test]: 16.666790767952246
The step is  2400 ...............[loss_train]: 14.216499
The step is  2400 ...............................................[loss_test]: 16.24462084196232
The step is  2600 ...............[loss_train]: 13.379973
The step is  2600 ...............................................[loss_test]: 15.805169149681374
The step is  2800 ...............[loss_train]: 12.676864
The step is  2800 ...............................................[loss_test]: 15.627896874039262
The step is  3000 ...............[loss_train]: 12.05489
The step is  3000 ...............................................[loss_test]: 15.302822227831241
The step is  3200 ...............[loss_train]: 11.531205
The step is  3200 ...............................................[loss_test]: 14.878443188137478
The step is  3400 ...............[loss_train]: 11.06205
The step is  3400 ...............................................[loss_test]: 14.79662839130119
The step is  3600 ...............[loss_train]: 10.656072
The step is  3600 ...............................................[loss_test]: 14.529879874653286
The step is  3800 ...............[loss_train]: 10.294985
The step is  3800 ...............................................[loss_test]: 14.38862990449976
The step is  4000 ...............[loss_train]: 9.962561
The step is  4000 ...............................................[loss_test]: 14.064813642590135
The step is  4200 ...............[loss_train]: 9.668994
The step is  4200 ...............................................[loss_test]: 13.895457245685437
The step is  4400 ...............[loss_train]: 9.412963
The step is  4400 ...............................................[loss_test]: 13.285678810543484
The step is  4600 ...............[loss_train]: 9.184889
The step is  4600 ...............................................[loss_test]: 12.950146304236519
The step is  4800 ...............[loss_train]: 8.988347
The step is  4800 ...............................................[loss_test]: 12.570631839610913
The step is  5000 ...............[loss_train]: 8.812597
The step is  5000 ...............................................[loss_test]: 12.327483592209992
The step is  5200 ...............[loss_train]: 8.647274
The step is  5200 ...............................................[loss_test]: 12.095677896782204
The step is  5400 ...............[loss_train]: 8.49862
The step is  5400 ...............................................[loss_test]: 11.935940117747695
The step is  5600 ...............[loss_train]: 8.360638
The step is  5600 ...............................................[loss_test]: 11.762868885640744
The step is  5800 ...............[loss_train]: 8.252315
The step is  5800 ...............................................[loss_test]: 11.65299234787623
The step is  6000 ...............[loss_train]: 8.142995
The step is  6000 ...............................................[loss_test]: 11.606743737503335
The step is  6200 ...............[loss_train]: 8.045058
The step is  6200 ...............................................[loss_test]: 11.445402070328042
The step is  6400 ...............[loss_train]: 7.9554386
The step is  6400 ...............................................[loss_test]: 11.466319931877983
The step is  6600 ...............[loss_train]: 7.8543243
The step is  6600 ...............................................[loss_test]: 12.178227601227936
The step is  6800 ...............[loss_train]: 7.764409
The step is  6800 ...............................................[loss_test]: 11.409689333703783
The step is  7000 ...............[loss_train]: 7.699924
The step is  7000 ...............................................[loss_test]: 12.132889319349218
The step is  7200 ...............[loss_train]: 7.6150947
The step is  7200 ...............................................[loss_test]: 11.036518141075417
The step is  7400 ...............[loss_train]: 7.551992
The step is  7400 ...............................................[loss_test]: 11.125180884643838
The step is  7600 ...............[loss_train]: 7.474767
The step is  7600 ...............................................[loss_test]: 10.238509796283862
The step is  7800 ...............[loss_train]: 7.432384
The step is  7800 ...............................................[loss_test]: 10.522431135177612
The step is  8000 ...............[loss_train]: 7.380323
The step is  8000 ...............................................[loss_test]: 10.418629246729392
The step is  8200 ...............[loss_train]: 7.3203545
The step is  8200 ...............................................[loss_test]: 9.820853198016131
The step is  8400 ...............[loss_train]: 7.278713
The step is  8400 ...............................................[loss_test]: 9.517102943526375
The step is  8600 ...............[loss_train]: 7.2392435
The step is  8600 ...............................................[loss_test]: 10.838831124482331
The step is  8800 ...............[loss_train]: 7.1980114
The step is  8800 ...............................................[loss_test]: 10.582803063922459
The step is  9000 ...............[loss_train]: 7.160175
The step is  9000 ...............................................[loss_test]: 10.492271679419058
The step is  9200 ...............[loss_train]: 7.1177893
The step is  9200 ...............................................[loss_test]: 9.498841475557398
The step is  9400 ...............[loss_train]: 7.0815673
The step is  9400 ...............................................[loss_test]: 9.490555502750256
The step is  9600 ...............[loss_train]: 7.0470953
The step is  9600 ...............................................[loss_test]: 9.387842151853773
The step is  9800 ...............[loss_train]: 7.004253
The step is  9800 ...............................................[loss_test]: 9.185716176474536
The step is  10000 ...............[loss_train]: 6.983739
The step is  10000 ...............................................[loss_test]: 8.704513786015687

可以看到还在明显的收敛，所以保持Ir不变再开10000轮，结果如下

The step is  0 ...............[loss_train]: 6.9829164
The step is  0 ...............................................[loss_test]: 124498.90631216543
The step is  200 ...............[loss_train]: 8.10008
The step is  200 ...............................................[loss_test]: 25.2269318457003
The step is  400 ...............[loss_train]: 7.5502853
The step is  400 ...............................................[loss_test]: 18.174842097141124
The step is  600 ...............[loss_train]: 7.345558
The step is  600 ...............................................[loss_test]: 12.517702800256234
The step is  800 ...............[loss_train]: 7.216028
The step is  800 ...............................................[loss_test]: 11.535340238500524
The step is  1000 ...............[loss_train]: 7.089348
The step is  1000 ...............................................[loss_test]: 11.44769095932996
The step is  1200 ...............[loss_train]: 7.011389
The step is  1200 ...............................................[loss_test]: 10.270000312063429
The step is  1400 ...............[loss_train]: 6.9484954
The step is  1400 ...............................................[loss_test]: 8.921192584214387
The step is  1600 ...............[loss_train]: 6.8936195
The step is  1600 ...............................................[loss_test]: 8.472521826072976
The step is  1800 ...............[loss_train]: 6.8382845
The step is  1800 ...............................................[loss_test]: 8.96128922480124
The step is  2000 ...............[loss_train]: 6.7917943
The step is  2000 ...............................................[loss_test]: 8.723530018771136
The step is  2200 ...............[loss_train]: 6.748647
The step is  2200 ...............................................[loss_test]: 8.844898210631477
The step is  2400 ...............[loss_train]: 6.711681
The step is  2400 ...............................................[loss_test]: 8.574164946873983
The step is  2600 ...............[loss_train]: 6.6803555
The step is  2600 ...............................................[loss_test]: 8.301508846106353
The step is  2800 ...............[loss_train]: 6.6487784
The step is  2800 ...............................................[loss_test]: 7.853043971238313
The step is  3000 ...............[loss_train]: 6.620329
The step is  3000 ...............................................[loss_test]: 7.830964693316707
The step is  3200 ...............[loss_train]: 6.5954256
The step is  3200 ...............................................[loss_test]: 7.630005408216406
The step is  3400 ...............[loss_train]: 6.570092
The step is  3400 ...............................................[loss_test]: 7.443931003411611
The step is  3600 ...............[loss_train]: 6.5469995
The step is  3600 ...............................................[loss_test]: 7.332441828869007
The step is  3800 ...............[loss_train]: 6.525951
The step is  3800 ...............................................[loss_test]: 7.098393744892544
The step is  4000 ...............[loss_train]: 6.505974
The step is  4000 ...............................................[loss_test]: 6.934731631367295
The step is  4200 ...............[loss_train]: 6.487751
The step is  4200 ...............................................[loss_test]: 6.873793182549654
The step is  4400 ...............[loss_train]: 6.470075
The step is  4400 ...............................................[loss_test]: 6.820729240223214
The step is  4600 ...............[loss_train]: 6.4532843
The step is  4600 ...............................................[loss_test]: 6.749174435933431
The step is  4800 ...............[loss_train]: 6.437518
The step is  4800 ...............................................[loss_test]: 6.657903397524798
The step is  5000 ...............[loss_train]: 6.420953
The step is  5000 ...............................................[loss_test]: 6.6118698252571955
The step is  5200 ...............[loss_train]: 6.4070225
The step is  5200 ...............................................[loss_test]: 6.562360595773767
The step is  5400 ...............[loss_train]: 6.393041
The step is  5400 ...............................................[loss_test]: 6.535672415185858
The step is  5600 ...............[loss_train]: 6.380245
The step is  5600 ...............................................[loss_test]: 6.494779123200311
The step is  5800 ...............[loss_train]: 6.365892
The step is  5800 ...............................................[loss_test]: 6.502580373375504
The step is  6000 ...............[loss_train]: 6.3532867
The step is  6000 ...............................................[loss_test]: 6.425201546262811
The step is  6200 ...............[loss_train]: 6.3409605
The step is  6200 ...............................................[loss_test]: 6.394948252925166
The step is  6400 ...............[loss_train]: 6.3265924
The step is  6400 ...............................................[loss_test]: 6.300970377745451
The step is  6600 ...............[loss_train]: 6.313385
The step is  6600 ...............................................[loss_test]: 6.31483358365518
The step is  6800 ...............[loss_train]: 6.300455
The step is  6800 ...............................................[loss_test]: 6.294367648937084
The step is  7000 ...............[loss_train]: 6.2876782
The step is  7000 ...............................................[loss_test]: 6.195465485254924
The step is  7200 ...............[loss_train]: 6.2750783
The step is  7200 ...............................................[loss_test]: 6.167593545383877
The step is  7400 ...............[loss_train]: 6.2630615
The step is  7400 ...............................................[loss_test]: 6.127459212585732
The step is  7600 ...............[loss_train]: 6.251576
The step is  7600 ...............................................[loss_test]: 6.081679441310741
The step is  7800 ...............[loss_train]: 6.238767
The step is  7800 ...............................................[loss_test]: 6.118286468364574
The step is  8000 ...............[loss_train]: 6.2280426
The step is  8000 ...............................................[loss_test]: 6.024300456047058
The step is  8200 ...............[loss_train]: 6.21642
The step is  8200 ...............................................[loss_test]: 5.953339185979631
The step is  8400 ...............[loss_train]: 6.2062006
The step is  8400 ...............................................[loss_test]: 5.910111387570699
The step is  8600 ...............[loss_train]: 6.1953826
The step is  8600 ...............................................[loss_test]: 5.871508668970178
The step is  8800 ...............[loss_train]: 6.1841655
The step is  8800 ...............................................[loss_test]: 5.8409152472460715
The step is  9000 ...............[loss_train]: 6.171872
The step is  9000 ...............................................[loss_test]: 5.822817193137275
The step is  9200 ...............[loss_train]: 6.160478
The step is  9200 ...............................................[loss_test]: 5.7752892661977695
The step is  9400 ...............[loss_train]: 6.1494985
The step is  9400 ...............................................[loss_test]: 5.740575900784245
The step is  9600 ...............[loss_train]: 6.1369925
The step is  9600 ...............................................[loss_test]: 5.748277935716841
The step is  9800 ...............[loss_train]: 6.125249
The step is  9800 ...............................................[loss_test]: 5.827146790645741
The step is  10000 ...............[loss_train]: 6.11406
The step is  10000 ...............................................[loss_test]: 5.662381264898512

收敛的依旧很明显, 可以再续杯一次(step += 10000)
[有点后悔学习率直接降了一个数量级了QAQ, 下次在本地训练的时候可以换成Ir = 0.02, 开它30000轮]
结果如下：

The step is  0 ...............[loss_train]: 6.113809
The step is  0 ...............................................[loss_test]: 74464.96134867491
The step is  200 ...............[loss_train]: 7.4602675
The step is  200 ...............................................[loss_test]: 29.92432438885724
The step is  400 ...............[loss_train]: 6.777582
The step is  400 ...............................................[loss_test]: 13.023062992978979
The step is  600 ...............[loss_train]: 6.553012
The step is  600 ...............................................[loss_test]: 12.073077700756214
The step is  800 ...............[loss_train]: 6.4018126
The step is  800 ...............................................[loss_test]: 9.314167848339787
The step is  1000 ...............[loss_train]: 6.307288
The step is  1000 ...............................................[loss_test]: 7.409950070910984
The step is  1200 ...............[loss_train]: 6.2511005
The step is  1200 ...............................................[loss_test]: 6.793920146094428
The step is  1400 ...............[loss_train]: 6.194964
The step is  1400 ...............................................[loss_test]: 6.177425419842756
The step is  1600 ...............[loss_train]: 6.159448
The step is  1600 ...............................................[loss_test]: 5.941014484122947
The step is  1800 ...............[loss_train]: 6.114718
The step is  1800 ...............................................[loss_test]: 5.90687581344887
The step is  2000 ...............[loss_train]: 6.086493
The step is  2000 ...............................................[loss_test]: 5.755285660425822
The step is  2200 ...............[loss_train]: 6.066393
The step is  2200 ...............................................[loss_test]: 5.590765710230227
The step is  2400 ...............[loss_train]: 6.0466113
The step is  2400 ...............................................[loss_test]: 5.527593824598524
The step is  2600 ...............[loss_train]: 6.0272145
The step is  2600 ...............................................[loss_test]: 5.481305769196263
The step is  2800 ...............[loss_train]: 6.009251
The step is  2800 ...............................................[loss_test]: 5.39416327741411
The step is  3000 ...............[loss_train]: 5.986756
The step is  3000 ...............................................[loss_test]: 5.324613496109292
The step is  3200 ...............[loss_train]: 5.9663205
The step is  3200 ...............................................[loss_test]: 5.252431878337154
The step is  3400 ...............[loss_train]: 5.944916
The step is  3400 ...............................................[loss_test]: 5.21808111888391
The step is  3600 ...............[loss_train]: 5.925144
The step is  3600 ...............................................[loss_test]: 5.162776152292888
The step is  3800 ...............[loss_train]: 5.9260387
The step is  3800 ...............................................[loss_test]: 5.124254010341786
The step is  4000 ...............[loss_train]: 5.907517
The step is  4000 ...............................................[loss_test]: 5.053566848790204
The step is  4200 ...............[loss_train]: 5.896279
The step is  4200 ...............................................[loss_test]: 5.008880564460048
The step is  4400 ...............[loss_train]: 5.8747034
The step is  4400 ...............................................[loss_test]: 4.971017649880162
The step is  4600 ...............[loss_train]: 5.867584
The step is  4600 ...............................................[loss_test]: 4.97145022727825
The step is  4800 ...............[loss_train]: 5.8515553
The step is  4800 ...............................................[loss_test]: 4.9247304885475724
The step is  5000 ...............[loss_train]: 5.853008
The step is  5000 ...............................................[loss_test]: 4.873373799853855
The step is  5200 ...............[loss_train]: 5.8374043
The step is  5200 ...............................................[loss_test]: 4.84392174968013
The step is  5400 ...............[loss_train]: 5.8179545
The step is  5400 ...............................................[loss_test]: 4.825992456188908
The step is  5600 ...............[loss_train]: 5.816919
The step is  5600 ...............................................[loss_test]: 4.960121079727456
The step is  5800 ...............[loss_train]: 5.815625
The step is  5800 ...............................................[loss_test]: 4.864058117071788
The step is  6000 ...............[loss_train]: 5.794406
The step is  6000 ...............................................[loss_test]: 4.727302915520138
The step is  6200 ...............[loss_train]: 5.7906203
The step is  6200 ...............................................[loss_test]: 4.630874850131847
The step is  6400 ...............[loss_train]: 5.782653
The step is  6400 ...............................................[loss_test]: 4.634813489737334
The step is  6800 ...............[loss_train]: 5.771205
The step is  6800 ...............................................[loss_test]: 4.5297522279951306
The step is  7000 ...............[loss_train]: 5.7540708
The step is  7000 ...............................................[loss_test]: 4.593969795438978
The step is  7200 ...............[loss_train]: 5.7515583
The step is  7200 ...............................................[loss_test]: 4.738888449139065
The step is  7400 ...............[loss_train]: 5.7449126
The step is  7400 ...............................................[loss_test]: 4.690183379031994
The step is  7600 ...............[loss_train]: 5.735485
The step is  7600 ...............................................[loss_test]: 4.6057106256484985
The step is  7800 ...............[loss_train]: 5.7293587
The step is  7800 ...............................................[loss_test]: 4.497223915877165
The step is  8000 ...............[loss_train]: 5.725987
The step is  8000 ...............................................[loss_test]: 4.648918041476497
The step is  8200 ...............[loss_train]: 5.715697
The step is  8200 ...............................................[loss_test]: 4.566120553899695
The step is  8400 ...............[loss_train]: 5.709465
The step is  8400 ...............................................[loss_test]: 4.5503166958137795
The step is  8600 ...............[loss_train]: 5.700506
The step is  8600 ...............................................[loss_test]: 4.492288523250156
The step is  8800 ...............[loss_train]: 5.6951985
The step is  8800 ...............................................[loss_test]: 4.453176644113329
The step is  9000 ...............[loss_train]: 5.6912746
The step is  9000 ...............................................[loss_test]: 4.381203682334335
The step is  9200 ...............[loss_train]: 5.6852994
The step is  9200 ...............................................[loss_test]: 4.302691402258696
The step is  9400 ...............[loss_train]: 5.6860147
The step is  9400 ...............................................[loss_test]: 4.259407895582694
The step is  9600 ...............[loss_train]: 5.6731906
The step is  9600 ...............................................[loss_test]: 4.197927722224483
The step is  9800 ...............[loss_train]: 5.6625967
The step is  9800 ...............................................[loss_test]: 4.210703657733069
The step is  10000 ...............[loss_train]: 5.6571145
The step is  10000 ...............................................[loss_test]: 4.1902374558978615

Ir = 0.01 跑了40000轮了…看看结果.

The step is  0 ...............[loss_train]: 5.656614
The step is  0 ...............................................[loss_test]: 63664.60655848185
The step is  200 ...............[loss_train]: 7.11886
The step is  200 ...............................................[loss_test]: 52.09049730830722
The step is  400 ...............[loss_train]: 6.4600625
The step is  400 ...............................................[loss_test]: 9.230565380167079
The step is  600 ...............[loss_train]: 6.1862617
The step is  600 ...............................................[loss_test]: 7.453303297360738
The step is  800 ...............[loss_train]: 6.0408626
The step is  800 ...............................................[loss_test]: 5.200981718522531
The step is  1000 ...............[loss_train]: 5.965068
The step is  1000 ...............................................[loss_test]: 4.875279819523847
The step is  1200 ...............[loss_train]: 5.908588
The step is  1200 ...............................................[loss_test]: 4.743478435057181
The step is  1400 ...............[loss_train]: 5.8607306
The step is  1400 ...............................................[loss_test]: 4.672507754078618
The step is  1600 ...............[loss_train]: 5.8314376
The step is  1600 ...............................................[loss_test]: 4.563611352885211
The step is  1800 ...............[loss_train]: 5.8040433
The step is  1800 ...............................................[loss_test]: 4.489517644599632
The step is  2000 ...............[loss_train]: 5.786041
The step is  2000 ...............................................[loss_test]: 4.4442360401153564
The step is  2200 ...............[loss_train]: 5.7753854
The step is  2200 ...............................................[loss_test]: 4.408069910826506
The step is  2400 ...............[loss_train]: 5.754279
The step is  2400 ...............................................[loss_test]: 4.439098349323979
The step is  2600 ...............[loss_train]: 5.741812
The step is  2600 ...............................................[loss_test]: 4.3816952308019
The step is  2800 ...............[loss_train]: 5.7278595
The step is  2800 ...............................................[loss_test]: 4.320185467048928
The step is  3000 ...............[loss_train]: 5.7178035
The step is  3000 ...............................................[loss_test]: 4.267243645809315
The step is  3200 ...............[loss_train]: 5.7073455
The step is  3200 ...............................................[loss_test]: 4.231665818779557
The step is  3400 ...............[loss_train]: 5.6986403
The step is  3400 ...............................................[loss_test]: 4.206608145325272
The step is  3600 ...............[loss_train]: 5.6875467
The step is  3600 ...............................................[loss_test]: 4.169648960784629
The step is  3800 ...............[loss_train]: 5.677865
The step is  3800 ...............................................[loss_test]: 4.150309984330778
The step is  4000 ...............[loss_train]: 5.6691937
The step is  4000 ...............................................[loss_test]: 4.1209083354031595
The step is  4200 ...............[loss_train]: 5.66471
The step is  4200 ...............................................[loss_test]: 4.105338900177567
The step is  4400 ...............[loss_train]: 5.6526175
The step is  4400 ...............................................[loss_test]: 4.073122616167422
The step is  4600 ...............[loss_train]: 5.647765
The step is  4600 ...............................................[loss_test]: 4.0576327862562955
The step is  4800 ...............[loss_train]: 5.6425214
The step is  4800 ...............................................[loss_test]: 4.033924257313764
The step is  5000 ...............[loss_train]: 5.6352935
The step is  5000 ...............................................[loss_test]: 4.021246839452673
The step is  5200 ...............[loss_train]: 5.6252394
The step is  5200 ...............................................[loss_test]: 3.984381507944178
The step is  5400 ...............[loss_train]: 5.6231766
The step is  5400 ...............................................[loss_test]: 3.9664067780530012
The step is  5600 ...............[loss_train]: 5.6154747
The step is  5600 ...............................................[loss_test]: 3.94403839552844
The step is  5800 ...............[loss_train]: 5.60753
The step is  5800 ...............................................[loss_test]: 3.9218364424175687
The step is  6000 ...............[loss_train]: 5.597889
The step is  6000 ...............................................[loss_test]: 3.901428116692437
The step is  6200 ...............[loss_train]: 5.593694
The step is  6200 ...............................................[loss_test]: 3.8766607929159096
The step is  6400 ...............[loss_train]: 5.585779
The step is  6400 ...............................................[loss_test]: 3.861407372686598
The step is  6600 ...............[loss_train]: 5.5833907
The step is  6600 ...............................................[loss_test]: 3.847714212205675
The step is  6800 ...............[loss_train]: 5.5749383
The step is  6800 ...............................................[loss_test]: 3.841452404304787
The step is  7000 ...............[loss_train]: 5.5737963
The step is  7000 ...............................................[loss_test]: 3.8388270183845803
The step is  7200 ...............[loss_train]: 5.566976
The step is  7200 ...............................................[loss_test]: 3.8146474493874445
The step is  7400 ...............[loss_train]: 5.561958
The step is  7400 ...............................................[loss_test]: 3.806270864274767
The step is  7600 ...............[loss_train]: 5.5528736
The step is  7600 ...............................................[loss_test]: 3.7607763166780823
The step is  7800 ...............[loss_train]: 5.551815
The step is  7800 ...............................................[loss_test]: 3.772628837161594
The step is  8000 ...............[loss_train]: 5.545028
The step is  8000 ...............................................[loss_test]: 3.766728259898998
The step is  8200 ...............[loss_train]: 5.5421486
The step is  8200 ...............................................[loss_test]: 3.7382217424887196
The step is  8400 ...............[loss_train]: 5.5393515
The step is  8400 ...............................................[loss_test]: 3.708954506450229
The step is  8600 ...............[loss_train]: 5.53146
The step is  8600 ...............................................[loss_test]: 3.685774811991939
The step is  8800 ...............[loss_train]: 5.5273795
The step is  8800 ...............................................[loss_test]: 3.668190426296658
The step is  9000 ...............[loss_train]: 5.5238485
The step is  9000 ...............................................[loss_test]: 3.6644358811555087
The step is  9200 ...............[loss_train]: 5.519959
The step is  9200 ...............................................[loss_test]: 3.6394993552455195
The step is  9400 ...............[loss_train]: 5.515499
The step is  9400 ...............................................[loss_test]: 3.6224264016857854
The step is  9600 ...............[loss_train]: 5.5084605
The step is  9600 ...............................................[loss_test]: 3.5936923887994556
The step is  9800 ...............[loss_train]: 5.5047555
The step is  9800 ...............................................[loss_test]: 3.5875721088162176
The step is  10000 ...............[loss_train]: 5.5000877
The step is  10000 ...............................................[loss_test]: 3.568454658543622