tensorflow 实现weight norm

讲述权重归一化在文本在conv1d上和全连接层dense上的应用,参考了simple-effective-text-matching的实现

 

import numpy as np
import tensorflow as tf

def gelu(x):
    return 0.5 * x * (1 + tf.nn.tanh(x * 0.7978845608 * (1 + 0.044715 * x * x)))


def get_weight(shape, gain=np.sqrt(2), weight_norm=True, fan_in=None, name="weight"):
    if fan_in is None:
        fan_in = np.prod(shape[:-1])

    std = gain / np.sqrt(fan_in)  # He init

    w = tf.get_variable(name, shape=shape, initializer=tf.initializers.random_normal(0, std),
                        dtype=tf.float32)

    if weight_norm:
        # 这里采用初始化器为1,所有在下面乘以*g的那行代码等于没有相乘,得到的结果一样(可以返回ww张量进行查看)
        g = tf.get_variable("{}_g".format(name), shape=(1, ) * (len(shape) - 1) + (shape[-1], ),
                            initializer=tf.ones_initializer)
        w_norm = tf.sqrt(tf.reduce_sum(tf.square(w), axis=list(range(len(shape) - 1)), keepdims=True))

        # ww = w / tf.maximum(w_norm, 1e-7)
        w = w / tf.maximum(w_norm, 1e-7) * g
    return w

def apply_bias(x, name='bias'):
    b = tf.get_variable(name, shape=[x.get_shape()[-1]], initializer=tf.zeros_initializer)
    b = tf.cast(b, x.dtype)
    b = tf.reshape(b, [1] * len(x.get_shape()[:-1]) + [x.get_shape().as_list()[-1]])
    return x + b


def dense(x, units, activation=None, name="dense"):
    """

    :param x: float tensor of shape [..., origin_units]
    :param units: int, 全连接神经单元数
    :param activation: tensorflow的激活函数,比如tf.nn.relu
    :param name: str, 全连接变量域的名称
    :return: float tensor of shape [..., units]
    """

    with tf.variable_scope(name):
        fan_in = x.shape[-1].value
        new_shape = tf.concat([tf.shape(x)[:-1], tf.constant([units])], axis=0)
        x = tf.reshape(x, (-1, fan_in))

        gain = np.sqrt(2) if activation is tf.nn.relu else 1.0
        w = get_weight([fan_in, units], gain=gain)
        out = apply_bias(tf.matmul(x, w))
        out = tf.reshape(out, new_shape)
        if activation:
            if activation is tf.nn.relu:
                activation = gelu
            out = activation(out)
        return out

def conv1d(x, filters, kernel_size, activation=None, name='conv1d'):
    """

    :param x: float tensor of shape [batch, seq_x, embedding_size], 代表着文本进行embedding之后的tensor
    :param filters:  int, 代表着卷积的数量
    :param kernel_size:  int, 代表着卷积核的大小(因为是一维卷积,所以代表着height)
    :param activation: tf内置的激活函数
    :param name: str, 整个卷积操作的变量域名称
    :return:  float tensor of shape [batch, seq_x, filters]
    """
    with tf.variable_scope(name):
        gain = np.sqrt(2) if activation is tf.nn.relu else 1
        x = tf.expand_dims(x, 1)
        w = get_weight([kernel_size, x.shape[-1].value, filters], gain=gain)
        w = tf.expand_dims(w, 0)
        out = tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME')
        out = tf.squeeze(out, [1])
        out = apply_bias(out)
        if activation:
            out = activation(out)
        return out

 

 

  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值