Python-个人笔记-画Adam和LBFGS的loss（自适应_AC方程）

pdc31czy

已于 2022-11-05 14:28:49 修改

阅读量640

点赞数

分类专栏： TensorFlow Python 文章标签： python

于 2022-11-04 15:26:46 首次发布

本文链接：https://blog.csdn.net/pdc31czy/article/details/127689730

版权

Python 同时被 2 个专栏收录

11 篇文章 0 订阅

订阅专栏

TensorFlow

8 篇文章 1 订阅

订阅专栏

def fit(x_f, t_f, x0, t0, u0, x_lb, t_lb, x_ub, t_ub, col_weights, u_weights, tf_iter, newton_iter):

    #Can adjust batch size for collocation points, here we set it to N_f
    batch_sz = N_f
    n_batches =  N_f // batch_sz

    start_time = time.time()
    #create optimizer s for the network weights, collocation point mask, and initial boundary mask
    tf_optimizer = tf.keras.optimizers.Adam(learning_rate = 0.005, beta_1=.99)
    tf_optimizer_weights = tf.keras.optimizers.Adam(learning_rate = 0.005, beta_1=.99)
    tf_optimizer_u = tf.keras.optimizers.Adam(learning_rate = 0.005, beta_1=.99)

    print("starting Adam training")

    mse0Array = np.array([])
    msebArray = np.array([])
    msefArray = np.array([])
    lossAdam = np.array([])
    # For mini-batch (if used)
    for epoch in range(tf_iter):
        for i in range(n_batches):

            x0_batch = x0
            t0_batch = t0
            u0_batch = u0

            x_f_batch = x_f[i*batch_sz:(i*batch_sz + batch_sz),]
            t_f_batch = t_f[i*batch_sz:(i*batch_sz + batch_sz),]

            loss_value, mse_0, mse_b, mse_f, grads, grads_col, grads_u, g_u, g_f = grad(u_model, x_f_batch, t_f_batch, x0_batch, t0_batch,  u0_batch, x_lb, t_lb, x_ub, t_ub, col_weights, u_weights)

            tf_optimizer.apply_gradients(zip(grads, u_model.trainable_variables))
            tf_optimizer_weights.apply_gradients(zip([-grads_col, -grads_u], [col_weights, u_weights]))


            mse0Array = np.append(mse0Array, mse_0)
            msebArray = np.append(msebArray, mse_b)
            msefArray = np.append(msefArray, mse_f)
            lossAdam = np.append(lossAdam, loss_value)

            #print("mse0: ",mse0Array)
            #print("mseb: ", msebArray)
            #print("msef: ", msefArray)
            #print("loss: ", lossAdam)

        if epoch % 1 == 0:
            elapsed = time.time() - start_time
            print('It: %d, Time: %.2f' % (epoch, elapsed))
            tf.print(f"mse_0: {mse_0}  mse_b  {mse_b}  mse_f: {mse_f}   total loss: {loss_value}")
            start_time = time.time()

    epochArray = np.arange(1,tf_iter+1,1)
    fig = plt.figure()
    plt.title('Adam Loss')
    ax1 = fig.add_subplot(2, 2, 1)
    ax2 = fig.add_subplot(2, 2, 2)
    ax3 = fig.add_subplot(2, 2, 3)
    ax4 = fig.add_subplot(2, 2, 4)

    ax1.plot(epochArray, mse0Array, label = 'mse0', linewidth=1)
    ax2.plot(epochArray, msebArray, label='mseb', linewidth=1)
    ax3.plot(epochArray, msefArray, label='msef', linewidth=1)
    ax4.plot(epochArray, lossAdam, label='total loss(Adam)', linewidth=1)

    ax1.legend()
    ax1.set_title('mse_0')
    ax2.legend()
    ax2.set_title('mse_b')
    # ax2.set_xlabel('Epoch')
    # ax2.set_ylabel('loss')
    ax3.legend()
    ax3.set_title('mse_f')
    ax4.legend()
    ax4.set_title('loss_value')
    plt.show()

    plt.figure()
    plt.title('Adam Loss')
    plt.plot(epochArray, mse0Array, label = 'mse0', linewidth=1)
    plt.plot(epochArray, msebArray, label='mseb', linewidth=1)
    plt.plot(epochArray, msefArray, label='msef', linewidth=1)
    plt.plot(epochArray, lossAdam, label='total loss(Adam)', linewidth=1)
    plt.legend()
    plt.show()


    # # plt.figure(12)
    # plt.subplot(221)
    # plt.plot(epochArray,mse0Array)
    # plt.subplot(222)
    # plt.plot(epochArray, msebArray)
    # plt.subplot(223)
    # plt.plot(epochArray, msefArray)
    # plt.subplot(224)
    # plt.plot(epochArray, lossAdam)

    #l-bfgs-b optimization
    print("Starting L-BFGS training")

    loss_and_flat_grad = get_loss_and_flat_grad(x_f_batch, t_f_batch, x0_batch, t0_batch, u0_batch, x_lb, t_lb, x_ub, t_ub, col_weights, u_weights)

    lbfgs(loss_and_flat_grad,
      get_weights(u_model),
      Struct(), maxIter=newton_iter, learningRate=0.8)

首先对四个loss分开初始化，如下：

    mse0Array = np.array([])
    msebArray = np.array([])
    msefArray = np.array([])
    lossAdam = np.array([])

然后把每个epoch生成的loss拼起来，如下：

            mse0Array = np.append(mse0Array, mse_0)
            msebArray = np.append(msebArray, mse_b)
            msefArray = np.append(msefArray, mse_f)
            lossAdam = np.append(lossAdam, loss_value)

最后，在epoch的for循环外面画图。
假如要分开画四个图，就用subplot，代码块如下：

    epochArray = np.arange(1,tf_iter+1,1)
    fig = plt.figure()
    plt.title('Adam Loss')
    ax1 = fig.add_subplot(2, 2, 1)
    ax2 = fig.add_subplot(2, 2, 2)
    ax3 = fig.add_subplot(2, 2, 3)
    ax4 = fig.add_subplot(2, 2, 4)

    ax1.plot(epochArray, mse0Array, label = 'mse0', linewidth=1)
    ax2.plot(epochArray, msebArray, label='mseb', linewidth=1)
    ax3.plot(epochArray, msefArray, label='msef', linewidth=1)
    ax4.plot(epochArray, lossAdam, label='total loss(Adam)', linewidth=1)

    ax1.legend()
    ax1.set_title('mse_0')
    ax2.legend()
    ax2.set_title('mse_b')
    # ax2.set_xlabel('Epoch')
    # ax2.set_ylabel('loss')
    ax3.legend()
    ax3.set_title('mse_f')
    ax4.legend()
    ax4.set_title('loss_value')
    plt.show()

效果图如下：
Adam Loss

如果要合起来在一个图，代码块如下：

    plt.figure()
    plt.title('Adam Loss')
    plt.plot(epochArray, mse0Array, label = 'mse0', linewidth=1)
    plt.plot(epochArray, msebArray, label='mseb', linewidth=1)
    plt.plot(epochArray, msefArray, label='msef', linewidth=1)
    plt.plot(epochArray, lossAdam, label='total loss(Adam)', linewidth=1)
    plt.legend()
    plt.show()

图像如下：
在这里插入图片描述

LBFGS代码块如下：

#pulled from https://github.com/yaroslavvb/stuff/blob/master/eager_lbfgs/eager_lbfgs.py

import tensorflow as tf
import numpy as np
import time
import matplotlib.pyplot as plt

def dot(a, b):
  """Dot product function since TensorFlow doesn't have one."""
  return tf.reduce_sum(a*b)

def verbose_func(s):
  print(s)

final_loss = None
times = []
def lbfgs(opfunc, x, state, maxIter = 100, learningRate = 1, do_verbose = True):
  """port of lbfgs.lua, using TensorFlow eager mode.
  """

  lbfgsLoss = np.array([]) ####################################
  global final_loss, times
  
  maxEval = maxIter*1.25
  tolFun = 1e-5
  tolX = 1e-9
  nCorrection = 50
  isverbose = False

  # verbose function
  if isverbose:
    verbose = verbose_func
  else:
    verbose = lambda x: None
    
  f, g = opfunc(x)

  f_hist = [f]
  currentFuncEval = 1
  state.funcEval = state.funcEval + 1
  p = g.shape[0]

  # check optimality of initial point
  tmp1 = tf.abs(g)
  if tf.reduce_sum(tmp1) <= tolFun:
    verbose("optimality condition below tolFun")
    return x, f_hist

  # optimize for a max of maxIter iterations
  nIter = 0
  times = []
  while nIter < maxIter:
    start_time = time.time()
    
    # keep track of nb of iterations
    nIter = nIter + 1
    state.nIter = state.nIter + 1

    ############################################################
    ## compute gradient descent direction
    ############################################################
    if state.nIter == 1:
      d = -g
      old_dirs = []
      old_stps = []
      Hdiag = 1
    else:
      # do lbfgs update (update memory)
      y = g - g_old
      s = d*t
      ys = dot(y, s)
      
      if ys > 1e-10:
        # updating memory
        if len(old_dirs) == nCorrection:
          # shift history by one (limited-memory)
          del old_dirs[0]
          del old_stps[0]

        # store new direction/step
        old_dirs.append(s)
        old_stps.append(y)

        # update scale of initial Hessian approximation
        Hdiag = ys/dot(y, y)

      # compute the approximate (L-BFGS) inverse Hessian 
      # multiplied by the gradient
      k = len(old_dirs)

      # need to be accessed element-by-element, so don't re-type tensor:
      ro = [0]*nCorrection
      for i in range(k):
        ro[i] = 1/dot(old_stps[i], old_dirs[i])
        

      # iteration in L-BFGS loop collapsed to use just one buffer
      # need to be accessed element-by-element, so don't re-type tensor:
      al = [0]*nCorrection

      q = -g
      for i in range(k-1, -1, -1):
        al[i] = dot(old_dirs[i], q) * ro[i]
        q = q - al[i]*old_stps[i]

      # multiply by initial Hessian
      r = q*Hdiag
      for i in range(k):
        be_i = dot(old_stps[i], r) * ro[i]
        r += (al[i]-be_i)*old_dirs[i]
        
      d = r
      # final direction is in r/d (same object)

    g_old = g
    f_old = f
    
    ############################################################
    ## compute step length
    ############################################################
    # directional derivative
    gtd = dot(g, d)

    # check that progress can be made along that direction
    if gtd > -tolX:
      verbose("Can not make progress along direction.")
      break

    # reset initial guess for step size
    if state.nIter == 1:
      tmp1 = tf.abs(g)
      t = min(1, 1/tf.reduce_sum(tmp1))
    else:
      t = learningRate



    x += t*d

    if nIter != maxIter:
    # re-evaluate function only if not in last iteration
    # the reason we do this: in a stochastic setting,
    # no use to re-evaluate that function here
      f, g = opfunc(x)

    lsFuncEval = 1
    f_hist.append(f)


    # update func eval
    currentFuncEval = currentFuncEval + lsFuncEval
    state.funcEval = state.funcEval + lsFuncEval

    ############################################################
    ## check conditions
    ############################################################
    if nIter == maxIter:
      break

    if currentFuncEval >= maxEval:
      # max nb of function evals
      print('max nb of function evals')
      break

    tmp1 = tf.abs(g)
    if tf.reduce_sum(tmp1) <=tolFun:
      # check optimality
      print('optimality condition below tolFun')
      break
    
    tmp1 = tf.abs(d*t)
    if tf.reduce_sum(tmp1) <= tolX:
      # step size below tolX
      print('step size below tolX')
      break
    
    if tf.abs(f,f_old) < tolX:
      # function value changing less than tolX
      print('function value changing less than tolX'+str(tf.abs(f-f_old)))
      break

    if do_verbose:
      if nIter % 1 == 0:
        print("Step %3d loss %6.5f "%(nIter, f.numpy()))


    if nIter == maxIter - 1:
      final_loss = f.numpy()

    lbfgsLoss = np.append(lbfgsLoss, f.numpy())##########################
    print("lbfgsLoss: ",lbfgsLoss)###########################

  iterArray = np.arange(1,nIter,1)###############
  plt.figure()######################
  plt.title('LBFGS Loss')
  plt.plot(iterArray, lbfgsLoss)###################
  plt.show()#################

  # save state
  state.old_dirs = old_dirs
  state.old_stps = old_stps
  state.Hdiag = Hdiag
  state.g_old = g_old
  state.f_old = f_old
  state.t = t
  state.d = d

  return x, f_hist, currentFuncEval

# dummy/Struct gives Lua-like struct object with 0 defaults
class dummy(object):
  pass

class Struct(dummy):
  def __getattribute__(self, key):
    if key == '__dict__':
      return super(dummy, self).__getattribute__('__dict__')
    return self.__dict__.get(key, 0)

首先同样要初始化建立一个新的数组，如下：

lbfgsLoss = np.array([])

然后拼起来画图，注意位置，拼起来是在loop里面，画图是在loop的外面

    lbfgsLoss = np.append(lbfgsLoss, f.numpy())
    #print("lbfgsLoss: ",lbfgsLoss)

  iterArray = np.arange(1,nIter,1)
  plt.figure()
  plt.title('LBFGS Loss')
  plt.plot(iterArray, lbfgsLoss)
  plt.show()

在这里插入图片描述
顺便附上Adam和LBFGS都是跑了10000次的结果：