tensorflow2.x学习笔记四：自动微分机制(tf.GradientTape)

最新推荐文章于 2023-01-13 16:55:36 发布

耐心的小黑

最新推荐文章于 2023-01-13 16:55:36 发布

阅读量2.1k

点赞数

分类专栏： # TensorFlow2.x学习笔记文章标签： tensorflow 深度学习神经网络

原文链接：https://github.com/lyhue1991/eat_tensorflow2_in_30_days/blob/master/2-3%2C%E8%87%AA%E5%8A%A8%E5%BE%AE%E5%88%86%E6%9C%BA%E5%88%B6.md

版权

TensorFlow2.x学习笔记专栏收录该内容

31 篇文章 39 订阅 ¥19.90 ¥99.00

订阅专栏

超级会员免费看

本文介绍了Tensorflow2.x中利用tf.GradientTape进行自动微分的方法，包括如何求导数、二阶导数，以及结合优化器求解最小值。通过示例展示了在正向传播过程中记录操作，反向传播自动计算梯度的便利性。

摘要由CSDN通过智能技术生成

神经网络通常依赖反向传播求梯度来更新网络参数，求梯度过程通常是一件非常复杂而容易出错的事情。而深度学习框架可以帮助我们自动地完成这种求梯度运算。Tensorflow一般使用梯度磁带tf.GradientTape来记录正向运算过程，然后反播磁带自动得到梯度值。这种利用tf.GradientTape求微分的方法叫做Tensorflow的自动微分机制。

一、利用梯度磁带求导数

对变量求导数

import tensorflow as tf
import numpy as np 
'''
f(x) = a*x**2 + b*x + c的导数
'''
x = tf.Variable(0.0,name = "x",dtype = tf.float32)
a = tf.constant(1.0)
b = tf.constant(-2.0)
c = tf.constant(1.0)

with tf.GradientTape() as tape:
    y = a*tf.pow(x,2) + b*x + c
    
dy_dx = tape.gradient(y,x)
print(dy_dx)

输出：

tf.Tensor(-2.0, shape=(), dtype=float32)

对常量求导数

# 对常量张量也可以求导，需要增加watch

with tf.GradientTape() as tape:
    tape.watch([a,b,c])
    y = a*tf.pow(x,2) + b*x + c
    
dy_dx,dy_da,dy_db,dy_dc = tape.gradient(y,[x,a,b,c])
print(dy_da)
print(dy_dc)

输出：

tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(1.0, shape=(), dtype=float32)

求二阶导数

# 可以求二阶导数
with tf.GradientTape() as tape2:
    with tf.GradientTape() as tape1:   
        y = a*tf.pow(x,2) + b*x + c
    dy_dx = tape1.gradient(y,x)   
dy2_dx2 = tape2.gradient(dy_dx,x)

print(dy2_dx2)

输出：

tf.Tensor(2.0, shape=(), dtype=float32)

在autograph中进行求导，需要注意的是，一定要对自变量x加上watch，否则计算结果为None

@tf.function
def f(x):   
    a = tf.constant(1.0)
    b = tf.constant(-2.0)
    c = tf.constant(1.0)
    

    with tf.GradientTape() as tape:
        tape.watch(x)
        y = a*tf.pow(x,2)+b*x+c
    dy_dx = tape.gradient(y,x) 
    
    return((dy_dx,y))

tf.print(f(tf.constant(0.0)))
tf.print(f(tf.constant(1.0)))

输出：

  (-2, 1)
  (0, 0)

二、利用梯度磁带和优化器求最小值

使用optimizer.apply_gradients

x = tf.Variable(0.0,name = "x",dtype = tf.float32)
a = tf.constant(1.0)
b = tf.constant(-2.0)
c = tf.constant(1.0)

optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)
for _ in range(1000):
    with tf.GradientTape() as tape:
        y = a*tf.pow(x,2) + b*x + c
    dy_dx = tape.gradient(y,x)
    optimizer.apply_gradients(grads_and_vars=[(dy_dx,x)])
    
tf.print("y =",y,"; x =",x)

#输出：y = 0 ; x = 0.999998569

在autograph中使用optimizer.apply_gradients

x = tf.Variable(0.0,name = "x",dtype = tf.float32)
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)

@tf.function
def minimizef():
    a = tf.constant(1.0)
    b = tf.constant(-2.0)
    c = tf.constant(1.0)
    y = a*tf.pow(x,2) + b*x + c
    '''
    #注意autograph时使用tf.range(1000)而不是range(1000)
    '''
    for _ in tf.range(1000): 
        with tf.GradientTape() as tape:
            y = a*tf.pow(x,2) + b*x + c
        dy_dx = tape.gradient(y,x)
        optimizer.apply_gradients(grads_and_vars=[(dy_dx,x)])
        
    return y

tf.print(minimizef())
tf.print(x)
#输出：0 0.999998569

使用optimizer.minimize，相当于先用tape求gradient,再apply_gradient

x = tf.Variable(0.0,name = "x",dtype = tf.float32)

#注意f()无参数
def f():   
    a = tf.constant(1.0)
    b = tf.constant(-2.0)
    c = tf.constant(1.0)
    y = a*tf.pow(x,2)+b*x+c
    return(y)

optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)   
for _ in range(1000):
    optimizer.minimize(f,[x])   
    
tf.print("y =",f(),"; x =",x)
#输出：y = 0 ; x = 0.999998569

在autograph中使用optimizer.minimize，相当于先用tape求gradient,再apply_gradient

x = tf.Variable(0.0,name = "x",dtype = tf.float32)
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)   

@tf.function
def f():   
    a = tf.constant(1.0)
    b = tf.constant(-2.0)
    c = tf.constant(1.0)
    y = a*tf.pow(x,2)+b*x+c
    return(y)

@tf.function
def train(epoch):  
    for _ in tf.range(epoch):  
        optimizer.minimize(f,[x])
    return(f())


tf.print(train(1000))
tf.print(x)
#输出：y = 0 ; x = 0.999998569