tensorflow参数访问

tensorflow 版

我们首先看一下具有单隐藏层的多层感知机

import tensorflow as tf

net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(4, activation=tf.nn.relu),
    tf.keras.layers.Dense(1),
])

X = tf.random.uniform((2, 4))
net(X)

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[-1.1537211],
[-0.9759821]], dtype=float32)>

参数访问

print(net.layers[2].weights)

[<tf.Variable ‘dense_1/kernel:0’ shape=(4, 1) dtype=float32, numpy=
array([[ 0.23318756],
[-1.0404987 ],
[-0.48204058],
[ 1.0638199 ]], dtype=float32)>, <tf.Variable ‘dense_1/bias:0’ shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>]

目标参数

print(type(net.layers[2].weights[1]))
print(net.layers[2].weights[1])
print(tf.convert_to_tensor(net.layers[2].weights[1]))

<class ‘tensorflow.python.ops.resource_variable_ops.ResourceVariable’>
<tf.Variable ‘dense_1/bias:0’ shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>
tf.Tensor([0.], shape=(1,), dtype=float32)

一次性访问所有参数

print(net.layers[1].weights)
print(net.get_weights())
net.get_weights()[1]

[<tf.Variable ‘dense/kernel:0’ shape=(4, 4) dtype=float32, numpy=
array([[ 0.22265357, 0.69465953, 0.08816117, 0.5380345 ],
[-0.24957252, 0.506622 , -0.31157494, 0.04067379],
[-0.10570258, -0.00699437, -0.7334142 , -0.3944154 ],
[ 0.727436 , 0.53911453, -0.8468247 , -0.64365757]],
dtype=float32)>, <tf.Variable ‘dense/bias:0’ shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>]

[array([[ 0.22265357, 0.69465953, 0.08816117, 0.5380345 ],
[-0.24957252, 0.506622 , -0.31157494, 0.04067379],
[-0.10570258, -0.00699437, -0.7334142 , -0.3944154 ],
[ 0.727436 , 0.53911453, -0.8468247 , -0.64365757]],
dtype=float32), array([0., 0., 0., 0.], dtype=float32), array([[ 0.23318756],
[-1.0404987 ],
[-0.48204058],
[ 1.0638199 ]], dtype=float32), array([0.], dtype=float32)]

从嵌套块收集参数

def block1(name):
    return tf.keras.Sequential([
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(4, activation=tf.nn.relu)],
        name=name)

def block2():
    net = tf.keras.Sequential()
    for i in range(4):
        # 在这里嵌套
        net.add(block1(name=f'block-{i}'))
    return net

rgnet = tf.keras.Sequential()
rgnet.add(block2())
rgnet.add(tf.keras.layers.Dense(1))
rgnet(X)

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[0.01737203],
[0.01305231]], dtype=float32)>

打印报表

print(rgnet.summary())

因为层是分层嵌套的,所以我们也可以像通过嵌套列表索引一样访问它们。 下面,我们访问第一个主要的块中、第二个子块的第一层的偏置项。

rgnet.layers[0].layers[1].layers[1].weights[1]

参数初始化

内置初始化

net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(
        4, activation=tf.nn.relu,
        kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.01),
        bias_initializer=tf.zeros_initializer()),
    tf.keras.layers.Dense(1)])

net(X)
net.weights[0], net.weights[1]
#########################################################
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(
        4, activation=tf.nn.relu,
        kernel_initializer=tf.keras.initializers.Constant(1),
        bias_initializer=tf.zeros_initializer()),
    tf.keras.layers.Dense(1),
])

net(X)
net.weights[0], net.weights[1]
##########################################################
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(
        4,
        activation=tf.nn.relu,
        kernel_initializer=tf.keras.initializers.GlorotUniform()),
    tf.keras.layers.Dense(
        1, kernel_initializer=tf.keras.initializers.Constant(1)),
])

net(X)
print(net.layers[1].weights[0])
print(net.layers[2].weights[0])
###############################################################

自定义初始化

class MyInit(tf.keras.initializers.Initializer):
    def __call__(self, shape, dtype=None):
        data=tf.random.uniform(shape, -10, 10, dtype=dtype)
        factor=(tf.abs(data) >= 5)
        factor=tf.cast(factor, tf.float32)
        return data * factor

net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(
        4,
        activation=tf.nn.relu,
        kernel_initializer=MyInit()),
    tf.keras.layers.Dense(1),
])

net(X)
print(net.layers[1].weights[0])

<tf.Variable ‘dense_13/kernel:0’ shape=(4, 4) dtype=float32, numpy=
array([[ 6.8615303, -0. , 7.944557 , -0. ],
[-0. , 7.2525196, 9.074877 , -0. ],
[ 9.185171 , -5.4909086, 5.9858913, -0. ],
[ 7.4548893, 0. , -0. , 9.655563 ]], dtype=float32)>

注意,我们始终可以直接设置参数。

net.layers[1].weights[0][:].assign(net.layers[1].weights[0] + 1)
net.layers[1].weights[0][0, 0].assign(42)
net.layers[1].weights[0]

参数绑定

# tf.keras的表现有点不同。它会自动删除重复层
shared = tf.keras.layers.Dense(4, activation=tf.nn.relu)
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    shared,
    shared,
    tf.keras.layers.Dense(1),
])

net(X)
# 检查参数是否不同
print(len(net.layers) == 3)

小结

  • 我们有几种方法可以访问、初始化和绑定模型参数。
  • 我们可以使用自定义初始化方法。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值