Tensorflow(十二) —— Tensor的限幅
1. 主要方法
1、clip_by_value
2、relu
3、clip_by_norm
4、gradient clipping
2. clip_by_value方法
# ************ clip_by_value
a = tf.range(10)
print("a:",a.numpy())
"""
把值限制在2-8之间
"""
b1 = tf.maximum(a,2)
b2 = tf.minimum(b1,8)
print("b2:",b2.numpy())
c = tf.clip_by_value(a,2,8)
"""
2代表最小值
8代表最大值
"""
print("c:",c.numpy())
3. relu函数
# ************ relu函数
"""
f(x) = max(x,0)
"""
a = tf.range(0,11)-5
print("a:",a.numpy())
b = tf.nn.relu(a)
print("b:",b.numpy())
c = tf.maximum(a,0)
print("c:",c.numpy())
d = tf.clip_by_value(a,0,tf.cast(tf.constant(np.inf),dtype=tf.int32))
print("d:",d.numpy())
4. clip_by_norm方法
"""
根据范数裁剪
gradient clipping
方向不变,等比例缩放
(x1,x2,...)/||(x1,x2,...)|| * 15
方向不变 值缩小
只改变向量的模,不改变向量的方向
"""
a = tf.random.normal([2,2],mean = 10)
print("a:",a.numpy())
print("a_norm:",tf.norm(a).numpy())
b = tf.clip_by_norm(a,15)
print("b:",b.numpy())
print("b_norm:",tf.norm(b).numpy())
5. Gradient Clipping方法
"""
1、gradient exploding 梯度爆炸
2、gradient vanishing 梯度消失
"""
"""
grds = [g_w1,g_w2,....]
对norm等比缩放 [2 4 8] => [1 2 4]
new_grads,total_norm = tf.clip_by_global_norm(grads,25)
new_grads为新的梯度
total_norm为没有做clip之前的整体norm
25 = ||g_w1||+||g_w2||+...
"""
"""
梯度的范数在[0,20]可以接受
0.0001 为梯度消失
"""
6. Gradient Clipping 实战 (不设置梯度限幅)
import tensorflow as tf
import os
from tensorflow.keras import datasets
from tensorflow import keras
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
# 解决tensorflow输出无关信息问题
os.environ['TF_CPP_MIN_LOG_LECEL'] = "2"
"""
2:表示只输出与报错相关的信息
0:输出一切信息
1:输出部分信息
"""
# 加载数据并转换类型
(x,y),_ = datasets.mnist.load_data()
x = tf.convert_to_tensor(x,dtype = tf.float32)
y = tf.convert_to_tensor(y,dtype = tf.int32)
# 数据分割成不同batch
# 先组合成为数据切片在切割成不同batch
train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128) # 分成128个batch
# 查看数据集
train_iter = iter(train_db)
sample = next(train_iter)
# 定义权值和偏置为:Tensor类型的Variable
"""
各层神经元数据流向[b,784] => [b,256] => [b,128] => [b,10]
"""
w1 = tf.Variable(tf.random.truncated_normal([784,256],stddev=0.1))
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncated_normal([256,128],stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncated_normal([128,10],stddev=0.1))
b3 = tf.Variable(tf.zeros([10]))
# 定义学习率
lr = tf.constant(1e-3)
# 训练每一层epoch
for epoch in range(10):
# 训练 每一个batch
for step,(x,y) in enumerate(train_db):
# x特征归一化及像素点平铺化
x = tf.reshape(x,[-1,28*28])
# 记录变量的梯度信息
with tf.GradientTape() as tape:
# 计算第一次输出h1
h1 = tf.nn.relu(x@w1 + b1)
# 计算第二层输出h2
h2 = tf.nn.relu(h1@w2 + b2)
# 计算第三层输出out
out = h2@w3 + b3
# 对y进行onehot编码
y_onehot = tf.one_hot(y,depth = 10)
# 计算损失函数
loss = tf.reduce_mean(tf.square(out - y_onehot))
# 计算梯度
gradients = tape.gradient(loss,[w1,b1,w2,b2,w3,b3])
for g in gradients:
print(tf.norm(g).numpy())
# 权值更新
w1.assign_sub(lr*gradients[0])
b1.assign_sub(lr*gradients[1])
w2.assign_sub(lr*gradients[2])
b2.assign_sub(lr*gradients[3])
w3.assign_sub(lr*gradients[4])
b3.assign_sub(lr*gradients[5])
if step % 100 == 0:
print(epoch,step,"loss:",float(loss))
7. Gradient Clipping 实战 (设置梯度限幅)
# ******************* Gradient Clipping 实践 (设置梯度限幅)
import tensorflow as tf
import os
from tensorflow.keras import datasets
from tensorflow import keras
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
# 解决tensorflow输出无关信息问题
os.environ['TF_CPP_MIN_LOG_LECEL'] = "2"
"""
2:表示只输出与报错相关的信息
0:输出一切信息
1:输出部分信息
"""
# 加载数据并转换类型
(x,y),_ = datasets.mnist.load_data()
x = tf.convert_to_tensor(x,dtype = tf.float32)
y = tf.convert_to_tensor(y,dtype = tf.int32)
# 数据分割成不同batch
# 先组合成为数据切片在切割成不同batch
train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128) # 分成128个batch
# 查看数据集
train_iter = iter(train_db)
sample = next(train_iter)
# 定义权值和偏置为:Tensor类型的Variable
"""
各层神经元数据流向[b,784] => [b,256] => [b,128] => [b,10]
"""
w1 = tf.Variable(tf.random.truncated_normal([784,256],stddev=0.1))
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncated_normal([256,128],stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncated_normal([128,10],stddev=0.1))
b3 = tf.Variable(tf.zeros([10]))
# 定义学习率
lr = tf.constant(1e-3)
# 训练每一层epoch
for epoch in range(10):
# 训练 每一个batch
for step,(x,y) in enumerate(train_db):
# x特征归一化及像素点平铺化
x = tf.reshape(x,[-1,28*28])
# 记录变量的梯度信息
with tf.GradientTape() as tape:
# 计算第一次输出h1
h1 = tf.nn.relu(x@w1 + b1)
# 计算第二层输出h2
h2 = tf.nn.relu(h1@w2 + b2)
# 计算第三层输出out
out = h2@w3 + b3
# 对y进行onehot编码
y_onehot = tf.one_hot(y,depth = 10)
# 计算损失函数
loss = tf.reduce_mean(tf.square(out - y_onehot))
# 计算梯度
gradients = tape.gradient(loss,[w1,b1,w2,b2,w3,b3])
gradients,_ = tf.clip_by_global_norm(gradients,15)
for g in gradients:
print(tf.norm(g).numpy())
# 权值更新
w1.assign_sub(lr*gradients[0])
b1.assign_sub(lr*gradients[1])
w2.assign_sub(lr*gradients[2])
b2.assign_sub(lr*gradients[3])
w3.assign_sub(lr*gradients[4])
b3.assign_sub(lr*gradients[5])
if step % 100 == 0:
print(epoch,step,"loss:",float(loss))
本文为参考龙龙老师的“深度学习与TensorFlow 2入门实战“课程书写的学习笔记
by CyrusMay 2022 04 16