参数管理
tensorflow 版
我们首先看一下具有单隐藏层的多层感知机
import tensorflow as tf
net = tf.keras.models.Sequential([
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(4, activation=tf.nn.relu),
tf.keras.layers.Dense(1),
])
X = tf.random.uniform((2, 4))
net(X)
<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[-1.1537211],
[-0.9759821]], dtype=float32)>
参数访问
print(net.layers[2].weights)
[<tf.Variable ‘dense_1/kernel:0’ shape=(4, 1) dtype=float32, numpy=
array([[ 0.23318756],
[-1.0404987 ],
[-0.48204058],
[ 1.0638199 ]], dtype=float32)>, <tf.Variable ‘dense_1/bias:0’ shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>]
目标参数
print(type(net.layers[2].weights[1]))
print(net.layers[2].weights[1])
print(tf.convert_to_tensor(net.layers[2].weights[1]))
<class ‘tensorflow.python.ops.resource_variable_ops.ResourceVariable’>
<tf.Variable ‘dense_1/bias:0’ shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>
tf.Tensor([0.], shape=(1,), dtype=float32)
一次性访问所有参数
print(net.layers[1].weights)
print(net.get_weights())
net.get_weights()[1]
[<tf.Variable ‘dense/kernel:0’ shape=(4, 4) dtype=float32, numpy=
array([[ 0.22265357, 0.69465953, 0.08816117, 0.5380345 ],
[-0.24957252, 0.506622 , -0.31157494, 0.04067379],
[-0.10570258, -0.00699437, -0.7334142 , -0.3944154 ],
[ 0.727436 , 0.53911453, -0.8468247 , -0.64365757]],
dtype=float32)>, <tf.Variable ‘dense/bias:0’ shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>]
[array([[ 0.22265357, 0.69465953, 0.08816117, 0.5380345 ],
[-0.24957252, 0.506622 , -0.31157494, 0.04067379],
[-0.10570258, -0.00699437, -0.7334142 , -0.3944154 ],
[ 0.727436 , 0.53911453, -0.8468247 , -0.64365757]],
dtype=float32), array([0., 0., 0., 0.], dtype=float32), array([[ 0.23318756],
[-1.0404987 ],
[-0.48204058],
[ 1.0638199 ]], dtype=float32), array([0.], dtype=float32)]
从嵌套块收集参数
def block1(name):
return tf.keras.Sequential([
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(4, activation=tf.nn.relu)],
name=name)
def block2():
net = tf.keras.Sequential()
for i in range(4):
# 在这里嵌套
net.add(block1(name=f'block-{i}'))
return net
rgnet = tf.keras.Sequential()
rgnet.add(block2())
rgnet.add(tf.keras.layers.Dense(1))
rgnet(X)
<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[0.01737203],
[0.01305231]], dtype=float32)>
打印报表
print(rgnet.summary())
因为层是分层嵌套的,所以我们也可以像通过嵌套列表索引一样访问它们。 下面,我们访问第一个主要的块中、第二个子块的第一层的偏置项。
rgnet.layers[0].layers[1].layers[1].weights[1]
参数初始化
内置初始化
net = tf.keras.models.Sequential([
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(
4, activation=tf.nn.relu,
kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.01),
bias_initializer=tf.zeros_initializer()),
tf.keras.layers.Dense(1)])
net(X)
net.weights[0], net.weights[1]
#########################################################
net = tf.keras.models.Sequential([
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(
4, activation=tf.nn.relu,
kernel_initializer=tf.keras.initializers.Constant(1),
bias_initializer=tf.zeros_initializer()),
tf.keras.layers.Dense(1),
])
net(X)
net.weights[0], net.weights[1]
##########################################################
net = tf.keras.models.Sequential([
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(
4,
activation=tf.nn.relu,
kernel_initializer=tf.keras.initializers.GlorotUniform()),
tf.keras.layers.Dense(
1, kernel_initializer=tf.keras.initializers.Constant(1)),
])
net(X)
print(net.layers[1].weights[0])
print(net.layers[2].weights[0])
###############################################################
自定义初始化
class MyInit(tf.keras.initializers.Initializer):
def __call__(self, shape, dtype=None):
data=tf.random.uniform(shape, -10, 10, dtype=dtype)
factor=(tf.abs(data) >= 5)
factor=tf.cast(factor, tf.float32)
return data * factor
net = tf.keras.models.Sequential([
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(
4,
activation=tf.nn.relu,
kernel_initializer=MyInit()),
tf.keras.layers.Dense(1),
])
net(X)
print(net.layers[1].weights[0])
<tf.Variable ‘dense_13/kernel:0’ shape=(4, 4) dtype=float32, numpy=
array([[ 6.8615303, -0. , 7.944557 , -0. ],
[-0. , 7.2525196, 9.074877 , -0. ],
[ 9.185171 , -5.4909086, 5.9858913, -0. ],
[ 7.4548893, 0. , -0. , 9.655563 ]], dtype=float32)>
注意,我们始终可以直接设置参数。
net.layers[1].weights[0][:].assign(net.layers[1].weights[0] + 1)
net.layers[1].weights[0][0, 0].assign(42)
net.layers[1].weights[0]
参数绑定
# tf.keras的表现有点不同。它会自动删除重复层
shared = tf.keras.layers.Dense(4, activation=tf.nn.relu)
net = tf.keras.models.Sequential([
tf.keras.layers.Flatten(),
shared,
shared,
tf.keras.layers.Dense(1),
])
net(X)
# 检查参数是否不同
print(len(net.layers) == 3)
小结
- 我们有几种方法可以访问、初始化和绑定模型参数。
- 我们可以使用自定义初始化方法。