Tensorflow 教程笔记---Custom layers

import tensorflow as tf

tf.enable_eager_execution()

Layers: common sets of useful operations

# In the tf.keras.layers package, layers are objects. To construct a layer,
# simply construct the object. Most layers take as a first argument the number
# of output dimensions / channels.
layer1 = tf.keras.layers.Dense(100)


# The number of input dimensions is often unnecessary, as it can be inferred
# the first time the layer is used, but it can be provided if you want to 
# specify it manually, which is useful in some complex models.
layer2 = tf.keras.layers.Dense(10, input_shape=(None, 5))
layer2
<tensorflow.python.keras.layers.core.Dense at 0x1d403ade320>
layer2.variables
[]
layer2.kernel
---------------------------------------------------------------------------

AttributeError                            Traceback (most recent call last)

<ipython-input-18-db3d8823a28a> in <module>
----> 1 layer2.kernel


AttributeError: 'Dense' object has no attribute 'kernel'
layer2.bias
---------------------------------------------------------------------------

AttributeError                            Traceback (most recent call last)

<ipython-input-19-febf25214da9> in <module>
----> 1 layer2.bias


AttributeError: 'Dense' object has no attribute 'bias'
layer2(tf.zeros([10, 5]))
<tf.Tensor: id=63, shape=(10, 10), dtype=float32, numpy=
array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)>
layer2
<tensorflow.python.keras.layers.core.Dense at 0x1d403ade320>
layer2.variables
[<tf.Variable 'dense_4/kernel:0' shape=(5, 10) dtype=float32, numpy=
 array([[ 0.3380643 , -0.41412264,  0.24270028, -0.37291318,  0.03570825,
         -0.4322303 ,  0.06443989, -0.5971137 ,  0.5325952 , -0.00772375],
        [ 0.4133622 ,  0.59908754, -0.09755713, -0.26567465,  0.08671927,
         -0.13092297, -0.25983834,  0.5665671 ,  0.19225448, -0.09533703],
        [ 0.00357974, -0.41751042, -0.22635683,  0.4641264 , -0.09682369,
         -0.40676865, -0.54707146,  0.28351438, -0.49605015,  0.12302268],
        [-0.5200846 ,  0.4993586 ,  0.24658775,  0.09775537,  0.38734215,
          0.37952524,  0.3262958 ,  0.06654131, -0.58187467,  0.5794472 ],
        [-0.01694435, -0.19808507, -0.62873423,  0.5921106 , -0.5697904 ,
          0.1135602 , -0.00901598, -0.01096767, -0.3696053 , -0.36232582]],
       dtype=float32)>,
 <tf.Variable 'dense_4/bias:0' shape=(10,) dtype=float32, numpy=array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)>]
layer2.kernel
<tf.Variable 'dense_4/kernel:0' shape=(5, 10) dtype=float32, numpy=
array([[ 0.3380643 , -0.41412264,  0.24270028, -0.37291318,  0.03570825,
        -0.4322303 ,  0.06443989, -0.5971137 ,  0.5325952 , -0.00772375],
       [ 0.4133622 ,  0.59908754, -0.09755713, -0.26567465,  0.08671927,
        -0.13092297, -0.25983834,  0.5665671 ,  0.19225448, -0.09533703],
       [ 0.00357974, -0.41751042, -0.22635683,  0.4641264 , -0.09682369,
        -0.40676865, -0.54707146,  0.28351438, -0.49605015,  0.12302268],
       [-0.5200846 ,  0.4993586 ,  0.24658775,  0.09775537,  0.38734215,
         0.37952524,  0.3262958 ,  0.06654131, -0.58187467,  0.5794472 ],
       [-0.01694435, -0.19808507, -0.62873423,  0.5921106 , -0.5697904 ,
         0.1135602 , -0.00901598, -0.01096767, -0.3696053 , -0.36232582]],
      dtype=float32)>
layer2.bias
<tf.Variable 'dense_4/bias:0' shape=(10,) dtype=float32, numpy=array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)>
layer2.trainable_variables
[<tf.Variable 'dense_4/kernel:0' shape=(5, 10) dtype=float32, numpy=
 array([[ 0.3380643 , -0.41412264,  0.24270028, -0.37291318,  0.03570825,
         -0.4322303 ,  0.06443989, -0.5971137 ,  0.5325952 , -0.00772375],
        [ 0.4133622 ,  0.59908754, -0.09755713, -0.26567465,  0.08671927,
         -0.13092297, -0.25983834,  0.5665671 ,  0.19225448, -0.09533703],
        [ 0.00357974, -0.41751042, -0.22635683,  0.4641264 , -0.09682369,
         -0.40676865, -0.54707146,  0.28351438, -0.49605015,  0.12302268],
        [-0.5200846 ,  0.4993586 ,  0.24658775,  0.09775537,  0.38734215,
          0.37952524,  0.3262958 ,  0.06654131, -0.58187467,  0.5794472 ],
        [-0.01694435, -0.19808507, -0.62873423,  0.5921106 , -0.5697904 ,
          0.1135602 , -0.00901598, -0.01096767, -0.3696053 , -0.36232582]],
       dtype=float32)>,
 <tf.Variable 'dense_4/bias:0' shape=(10,) dtype=float32, numpy=array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)>]

Implementing custom layers

自己编写的类,继承自tf.keras.layers.Layer,需要实现__init__()、build()、call()。
在调用该层时执行build()和call()

class MyDenseLayer(tf.keras.layers.Layer):
  def __init__(self, num_outputs):
    print("############__init__############")
    super(MyDenseLayer, self).__init__()
    self.num_outputs = num_outputs
    
  def build(self, input_shape):
    print("############build############")
    self.kernel = self.add_variable("kernel", 
                                    shape=[int(input_shape[-1]), 
                                           self.num_outputs])
    
  def call(self, input):
    print("############call############")
    return tf.matmul(input, self.kernel)

print("111111111111111111111")
layer = MyDenseLayer(10)
#print(layer.kernel) 此时print不出来
print("22222222222222222222")
print(layer(tf.zeros([10, 5])))
print("33333333333333333333333")
print(layer.kernel)
print("44444444444444444444444")
print(layer.trainable_variables)
111111111111111111111
############__init__############
22222222222222222222
############build############
############call############
tf.Tensor(
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]], shape=(10, 10), dtype=float32)
33333333333333333333333
<tf.Variable 'my_dense_layer_5/kernel:0' shape=(5, 10) dtype=float32, numpy=
array([[ 0.4840123 ,  0.578486  ,  0.33115482,  0.05877882, -0.3149869 ,
        -0.15885803, -0.2778893 ,  0.481198  , -0.28650752, -0.3946336 ],
       [ 0.219378  ,  0.05505645,  0.33497918,  0.60524505,  0.48022622,
        -0.47166032,  0.15060335, -0.00927895,  0.4026441 ,  0.55857354],
       [ 0.49057072,  0.33128828, -0.4022272 ,  0.39897317,  0.29955244,
         0.4085986 ,  0.30902743, -0.03370214, -0.55020535, -0.24665636],
       [ 0.06957155, -0.3660396 , -0.47823787, -0.07370228, -0.04078197,
        -0.6093094 ,  0.5738266 ,  0.32338107, -0.48439446,  0.59740096],
       [ 0.5742478 , -0.15213162,  0.23062384, -0.21270904,  0.0367111 ,
        -0.01983857, -0.1665029 ,  0.13589978, -0.13847286,  0.28859895]],
      dtype=float32)>
44444444444444444444444
[<tf.Variable 'my_dense_layer_5/kernel:0' shape=(5, 10) dtype=float32, numpy=
array([[ 0.4840123 ,  0.578486  ,  0.33115482,  0.05877882, -0.3149869 ,
        -0.15885803, -0.2778893 ,  0.481198  , -0.28650752, -0.3946336 ],
       [ 0.219378  ,  0.05505645,  0.33497918,  0.60524505,  0.48022622,
        -0.47166032,  0.15060335, -0.00927895,  0.4026441 ,  0.55857354],
       [ 0.49057072,  0.33128828, -0.4022272 ,  0.39897317,  0.29955244,
         0.4085986 ,  0.30902743, -0.03370214, -0.55020535, -0.24665636],
       [ 0.06957155, -0.3660396 , -0.47823787, -0.07370228, -0.04078197,
        -0.6093094 ,  0.5738266 ,  0.32338107, -0.48439446,  0.59740096],
       [ 0.5742478 , -0.15213162,  0.23062384, -0.21270904,  0.0367111 ,
        -0.01983857, -0.1665029 ,  0.13589978, -0.13847286,  0.28859895]],
      dtype=float32)>]

Models: composing layers

class ResnetIdentityBlock(tf.keras.Model):
  def __init__(self, kernel_size, filters):
    super(ResnetIdentityBlock, self).__init__(name='')
    filters1, filters2, filters3 = filters

    self.conv2a = tf.keras.layers.Conv2D(filters1, (1, 1))#输出通道数,卷积核大小1*1
    self.bn2a = tf.keras.layers.BatchNormalization()

    self.conv2b = tf.keras.layers.Conv2D(filters2, kernel_size, padding='same')
    #默认的填充方式是valid,而这里指定为same
    #貌似same是指如果被卷积的位置不够会在输入末端进行填充,再进行卷积;
    #padding='valid'不会填充。
    self.bn2b = tf.keras.layers.BatchNormalization()

    self.conv2c = tf.keras.layers.Conv2D(filters3, (1, 1))
    self.bn2c = tf.keras.layers.BatchNormalization()

  def call(self, input_tensor, training=False):
    x = self.conv2a(input_tensor)
    x = self.bn2a(x, training=training)
    x = tf.nn.relu(x)

    x = self.conv2b(x)
    x = self.bn2b(x, training=training)
    x = tf.nn.relu(x)

    x = self.conv2c(x)
    x = self.bn2c(x, training=training)

    x += input_tensor
    return tf.nn.relu(x)

    
block = ResnetIdentityBlock(1, [1, 2, 3])
print(block(tf.zeros([1, 2, 3, 3])))
print([x.name for x in block.trainable_variables])
tf.Tensor(
[[[[0. 0. 0.]
   [0. 0. 0.]
   [0. 0. 0.]]

  [[0. 0. 0.]
   [0. 0. 0.]
   [0. 0. 0.]]]], shape=(1, 2, 3, 3), dtype=float32)
['resnet_identity_block/conv2d/kernel:0', 'resnet_identity_block/conv2d/bias:0', 'resnet_identity_block/batch_normalization/gamma:0', 'resnet_identity_block/batch_normalization/beta:0', 'resnet_identity_block/conv2d_1/kernel:0', 'resnet_identity_block/conv2d_1/bias:0', 'resnet_identity_block/batch_normalization_1/gamma:0', 'resnet_identity_block/batch_normalization_1/beta:0', 'resnet_identity_block/conv2d_2/kernel:0', 'resnet_identity_block/conv2d_2/bias:0', 'resnet_identity_block/batch_normalization_2/gamma:0', 'resnet_identity_block/batch_normalization_2/beta:0']

卷积输入输出维度: (batch,length,width,number_of_channels)

与caffe和pytorch(batch,number_of_channels,length,width)不同

class MyBlock(tf.keras.Model):
  def __init__(self, kernel_size, filters):
    super(MyBlock, self).__init__(name='')
    filters1, filters2 = filters
    print(filters1,filters2)

    self.conv2a = tf.keras.layers.Conv2D(filters1, (2, 2))#输出通道数,卷积核大小1*1
    #stride默认为(1,1)
                                         
    self.bn2a = tf.keras.layers.BatchNormalization()

    self.conv2b = tf.keras.layers.Conv2D(filters2, (2, 2), padding='same')
    #默认的填充方式是valid,而这里指定为same
    #貌似same是指如果被卷积的位置不够会在输入末端进行填充,再进行卷积;
    #padding='valid'不会填充。
    self.bn2b = tf.keras.layers.BatchNormalization()

  

  def call(self, input_tensor, training=False):
    print(input_tensor.shape)
    out1 = self.conv2a(input_tensor)
    #out1 = self.bn2a(out1, training=training)
    #print(out1)

    print(input_tensor.shape)

    out2= self.conv2b(input_tensor)
    #out2 = self.bn2b(out2, training=training)
    #print(out2)


    
    return out1,out2

    
block = MyBlock(1, [4,5])
out1,out2=block(tf.zeros([2, 4, 4, 3]))
print("out1.shape=",out1.shape)
print("out2.shape=",out2.shape)


4 5
(2, 4, 4, 3)
(2, 4, 4, 3)
out1.shape= (2, 3, 3, 4)
out2.shape= (2, 4, 4, 5)

tf.keras.Sequential

my_seq = tf.keras.Sequential([tf.keras.layers.Conv2D(1, (1, 1)),
                               tf.keras.layers.BatchNormalization(),
                               tf.keras.layers.Conv2D(2, 1, 
                                                      padding='same'),
                               tf.keras.layers.BatchNormalization(),
                               tf.keras.layers.Conv2D(3, (1, 1)),
                               tf.keras.layers.BatchNormalization()])
my_seq(tf.zeros([1, 2, 3, 3]))
<tf.Tensor: id=1701, shape=(1, 2, 3, 3), dtype=float32, numpy=
array([[[[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]]]], dtype=float32)>
要在TensorFlow中实现Cross-Attention模块,您可以按照以下步骤进行操作: 1. 导入所需的库和模块: ```python import tensorflow as tf from tensorflow.keras.layers import Layer, Dense ``` 2. 创建一个自定义的CrossAttention层: ```python class CrossAttention(Layer): def __init__(self, units): super(CrossAttention, self).__init__() self.units = units def build(self, input_shape): self.W1 = self.add_weight(shape=(input_shape[0][-1], self.units), initializer='random_normal', trainable=True) self.W2 = self.add_weight(shape=(input_shape[1][-1], self.units), initializer='random_normal', trainable=True) self.b = self.add_weight(shape=(self.units,), initializer='zeros', trainable=True) super(CrossAttention, self).build(input_shape) def call(self, inputs): query, value = inputs q = tf.matmul(query, self.W1) # Query的线性变换 k = tf.matmul(value, self.W2) # Value的线性变换 scores = tf.matmul(q, tf.transpose(k, [0, 2, 1])) # 计算注意力分数 attention_weights = tf.nn.softmax(scores) # 对注意力分数进行softmax归一化 output = tf.matmul(attention_weights, value) + self.b # 加权求和 return output ``` 3. 使用CrossAttention层: ```python # 创建模型 input_query = tf.keras.Input(shape=(query_len, input_dim)) input_value = tf.keras.Input(shape=(value_len, input_dim)) cross_attention = CrossAttention(units=hidden_dim) output = cross_attention([input_query, input_value]) model = tf.keras.Model(inputs=[input_query, input_value], outputs=output) ``` 在上述代码中,我们首先定义了一个自定义的CrossAttention层,其中build()函数用于创建权重。然后,在call()函数中,我们按照Cross-Attention的计算公式进行操作:通过线性变换获得Query和Value的表示,计算注意力分数,使用softmax归一化注意力分数,最后对Value进行加权求和。最后,我们使用这个CrossAttention层构建了一个模型,并将输入数据传递给该模型以获取输出。 请注意,上述代码仅为示例,您可能需要根据自己的具体需求进行修改和调整。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值