def fit(x_f, t_f, x0, t0, u0, x_lb, t_lb, x_ub, t_ub, col_weights, u_weights, tf_iter, newton_iter):
#Can adjust batch size for collocation points, here we set it to N_f
batch_sz = N_f
n_batches = N_f // batch_sz
start_time = time.time()
#create optimizer s for the network weights, collocation point mask, and initial boundary mask
tf_optimizer = tf.keras.optimizers.Adam(learning_rate = 0.005, beta_1=.99)
tf_optimizer_weights = tf.keras.optimizers.Adam(learning_rate = 0.005, beta_1=.99)
tf_optimizer_u = tf.keras.optimizers.Adam(learning_rate = 0.005, beta_1=.99)
print("starting Adam training")
mse0Array = np.array([])
msebArray = np.array([])
msefArray = np.array([])
lossAdam = np.array([])
# For mini-batch (if used)
for epoch in range(tf_iter):
for i in range(n_batches):
x0_batch = x0
t0_batch = t0
u0_batch = u0
x_f_batch = x_f[i*batch_sz:(i*batch_sz + batch_sz),]
t_f_batch = t_f[i*batch_sz:(i*batch_sz + batch_sz),]
loss_value, mse_0, mse_b, mse_f, grads, grads_col, grads_u, g_u, g_f = grad(u_model, x_f_batch, t_f_batch, x0_batch, t0_batch, u0_batch, x_lb, t_lb, x_ub, t_ub, col_weights, u_weights)
tf_optimizer.apply_gradients(zip(grads, u_model.trainable_variables))
tf_optimizer_weights.apply_gradients(zip([-grads_col, -grads_u], [col_weights, u_weights]))
mse0Array = np.append(mse0Array, mse_0)
msebArray = np.append(msebArray, mse_b)
msefArray = np.append(msefArray, mse_f)
lossAdam = np.append(lossAdam, loss_value)
#print("mse0: ",mse0Array)
#print("mseb: ", msebArray)
#print("msef: ", msefArray)
#print("loss: ", lossAdam)
if epoch % 1 == 0:
elapsed = time.time() - start_time
print('It: %d, Time: %.2f' % (epoch, elapsed))
tf.print(f"mse_0: {mse_0} mse_b {mse_b} mse_f: {mse_f} total loss: {loss_value}")
start_time = time.time()
epochArray = np.arange(1,tf_iter+1,1)
fig = plt.figure()
plt.title('Adam Loss')
ax1 = fig.add_subplot(2, 2, 1)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)
ax4 = fig.add_subplot(2, 2, 4)
ax1.plot(epochArray, mse0Array, label = 'mse0', linewidth=1)
ax2.plot(epochArray, msebArray, label='mseb', linewidth=1)
ax3.plot(epochArray, msefArray, label='msef', linewidth=1)
ax4.plot(epochArray, lossAdam, label='total loss(Adam)', linewidth=1)
ax1.legend()
ax1.set_title('mse_0')
ax2.legend()
ax2.set_title('mse_b')
# ax2.set_xlabel('Epoch')
# ax2.set_ylabel('loss')
ax3.legend()
ax3.set_title('mse_f')
ax4.legend()
ax4.set_title('loss_value')
plt.show()
plt.figure()
plt.title('Adam Loss')
plt.plot(epochArray, mse0Array, label = 'mse0', linewidth=1)
plt.plot(epochArray, msebArray, label='mseb', linewidth=1)
plt.plot(epochArray, msefArray, label='msef', linewidth=1)
plt.plot(epochArray, lossAdam, label='total loss(Adam)', linewidth=1)
plt.legend()
plt.show()
# # plt.figure(12)
# plt.subplot(221)
# plt.plot(epochArray,mse0Array)
# plt.subplot(222)
# plt.plot(epochArray, msebArray)
# plt.subplot(223)
# plt.plot(epochArray, msefArray)
# plt.subplot(224)
# plt.plot(epochArray, lossAdam)
#l-bfgs-b optimization
print("Starting L-BFGS training")
loss_and_flat_grad = get_loss_and_flat_grad(x_f_batch, t_f_batch, x0_batch, t0_batch, u0_batch, x_lb, t_lb, x_ub, t_ub, col_weights, u_weights)
lbfgs(loss_and_flat_grad,
get_weights(u_model),
Struct(), maxIter=newton_iter, learningRate=0.8)
首先对四个loss分开初始化,如下:
mse0Array = np.array([])
msebArray = np.array([])
msefArray = np.array([])
lossAdam = np.array([])
然后把每个epoch生成的loss拼起来,如下:
mse0Array = np.append(mse0Array, mse_0)
msebArray = np.append(msebArray, mse_b)
msefArray = np.append(msefArray, mse_f)
lossAdam = np.append(lossAdam, loss_value)
最后,在epoch的for循环外面画图。
假如要分开画四个图,就用subplot,代码块如下:
epochArray = np.arange(1,tf_iter+1,1)
fig = plt.figure()
plt.title('Adam Loss')
ax1 = fig.add_subplot(2, 2, 1)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)
ax4 = fig.add_subplot(2, 2, 4)
ax1.plot(epochArray, mse0Array, label = 'mse0', linewidth=1)
ax2.plot(epochArray, msebArray, label='mseb', linewidth=1)
ax3.plot(epochArray, msefArray, label='msef', linewidth=1)
ax4.plot(epochArray, lossAdam, label='total loss(Adam)', linewidth=1)
ax1.legend()
ax1.set_title('mse_0')
ax2.legend()
ax2.set_title('mse_b')
# ax2.set_xlabel('Epoch')
# ax2.set_ylabel('loss')
ax3.legend()
ax3.set_title('mse_f')
ax4.legend()
ax4.set_title('loss_value')
plt.show()
效果图如下:
如果要合起来在一个图,代码块如下:
plt.figure()
plt.title('Adam Loss')
plt.plot(epochArray, mse0Array, label = 'mse0', linewidth=1)
plt.plot(epochArray, msebArray, label='mseb', linewidth=1)
plt.plot(epochArray, msefArray, label='msef', linewidth=1)
plt.plot(epochArray, lossAdam, label='total loss(Adam)', linewidth=1)
plt.legend()
plt.show()
图像如下:
LBFGS代码块如下:
#pulled from https://github.com/yaroslavvb/stuff/blob/master/eager_lbfgs/eager_lbfgs.py
import tensorflow as tf
import numpy as np
import time
import matplotlib.pyplot as plt
def dot(a, b):
"""Dot product function since TensorFlow doesn't have one."""
return tf.reduce_sum(a*b)
def verbose_func(s):
print(s)
final_loss = None
times = []
def lbfgs(opfunc, x, state, maxIter = 100, learningRate = 1, do_verbose = True):
"""port of lbfgs.lua, using TensorFlow eager mode.
"""
lbfgsLoss = np.array([]) ####################################
global final_loss, times
maxEval = maxIter*1.25
tolFun = 1e-5
tolX = 1e-9
nCorrection = 50
isverbose = False
# verbose function
if isverbose:
verbose = verbose_func
else:
verbose = lambda x: None
f, g = opfunc(x)
f_hist = [f]
currentFuncEval = 1
state.funcEval = state.funcEval + 1
p = g.shape[0]
# check optimality of initial point
tmp1 = tf.abs(g)
if tf.reduce_sum(tmp1) <= tolFun:
verbose("optimality condition below tolFun")
return x, f_hist
# optimize for a max of maxIter iterations
nIter = 0
times = []
while nIter < maxIter:
start_time = time.time()
# keep track of nb of iterations
nIter = nIter + 1
state.nIter = state.nIter + 1
############################################################
## compute gradient descent direction
############################################################
if state.nIter == 1:
d = -g
old_dirs = []
old_stps = []
Hdiag = 1
else:
# do lbfgs update (update memory)
y = g - g_old
s = d*t
ys = dot(y, s)
if ys > 1e-10:
# updating memory
if len(old_dirs) == nCorrection:
# shift history by one (limited-memory)
del old_dirs[0]
del old_stps[0]
# store new direction/step
old_dirs.append(s)
old_stps.append(y)
# update scale of initial Hessian approximation
Hdiag = ys/dot(y, y)
# compute the approximate (L-BFGS) inverse Hessian
# multiplied by the gradient
k = len(old_dirs)
# need to be accessed element-by-element, so don't re-type tensor:
ro = [0]*nCorrection
for i in range(k):
ro[i] = 1/dot(old_stps[i], old_dirs[i])
# iteration in L-BFGS loop collapsed to use just one buffer
# need to be accessed element-by-element, so don't re-type tensor:
al = [0]*nCorrection
q = -g
for i in range(k-1, -1, -1):
al[i] = dot(old_dirs[i], q) * ro[i]
q = q - al[i]*old_stps[i]
# multiply by initial Hessian
r = q*Hdiag
for i in range(k):
be_i = dot(old_stps[i], r) * ro[i]
r += (al[i]-be_i)*old_dirs[i]
d = r
# final direction is in r/d (same object)
g_old = g
f_old = f
############################################################
## compute step length
############################################################
# directional derivative
gtd = dot(g, d)
# check that progress can be made along that direction
if gtd > -tolX:
verbose("Can not make progress along direction.")
break
# reset initial guess for step size
if state.nIter == 1:
tmp1 = tf.abs(g)
t = min(1, 1/tf.reduce_sum(tmp1))
else:
t = learningRate
x += t*d
if nIter != maxIter:
# re-evaluate function only if not in last iteration
# the reason we do this: in a stochastic setting,
# no use to re-evaluate that function here
f, g = opfunc(x)
lsFuncEval = 1
f_hist.append(f)
# update func eval
currentFuncEval = currentFuncEval + lsFuncEval
state.funcEval = state.funcEval + lsFuncEval
############################################################
## check conditions
############################################################
if nIter == maxIter:
break
if currentFuncEval >= maxEval:
# max nb of function evals
print('max nb of function evals')
break
tmp1 = tf.abs(g)
if tf.reduce_sum(tmp1) <=tolFun:
# check optimality
print('optimality condition below tolFun')
break
tmp1 = tf.abs(d*t)
if tf.reduce_sum(tmp1) <= tolX:
# step size below tolX
print('step size below tolX')
break
if tf.abs(f,f_old) < tolX:
# function value changing less than tolX
print('function value changing less than tolX'+str(tf.abs(f-f_old)))
break
if do_verbose:
if nIter % 1 == 0:
print("Step %3d loss %6.5f "%(nIter, f.numpy()))
if nIter == maxIter - 1:
final_loss = f.numpy()
lbfgsLoss = np.append(lbfgsLoss, f.numpy())##########################
print("lbfgsLoss: ",lbfgsLoss)###########################
iterArray = np.arange(1,nIter,1)###############
plt.figure()######################
plt.title('LBFGS Loss')
plt.plot(iterArray, lbfgsLoss)###################
plt.show()#################
# save state
state.old_dirs = old_dirs
state.old_stps = old_stps
state.Hdiag = Hdiag
state.g_old = g_old
state.f_old = f_old
state.t = t
state.d = d
return x, f_hist, currentFuncEval
# dummy/Struct gives Lua-like struct object with 0 defaults
class dummy(object):
pass
class Struct(dummy):
def __getattribute__(self, key):
if key == '__dict__':
return super(dummy, self).__getattribute__('__dict__')
return self.__dict__.get(key, 0)
首先同样要初始化建立一个新的数组,如下:
lbfgsLoss = np.array([])
然后拼起来画图,注意位置,拼起来是在loop里面,画图是在loop的外面
lbfgsLoss = np.append(lbfgsLoss, f.numpy())
#print("lbfgsLoss: ",lbfgsLoss)
iterArray = np.arange(1,nIter,1)
plt.figure()
plt.title('LBFGS Loss')
plt.plot(iterArray, lbfgsLoss)
plt.show()
顺便附上Adam和LBFGS都是跑了10000次的结果: