tensorflow 实现weight norm

最新推荐文章于 2024-04-27 10:06:06 发布

醉意流年go

最新推荐文章于 2024-04-27 10:06:06 发布

阅读量1.4k

点赞数

分类专栏： tensorflow 深度学习deep learning

本文链接：https://blog.csdn.net/u010626747/article/details/107406603

版权

tensorflow 同时被 2 个专栏收录

25 篇文章 0 订阅

订阅专栏

深度学习deep learning

20 篇文章 0 订阅

订阅专栏

讲述权重归一化在文本在conv1d上和全连接层dense上的应用，参考了simple-effective-text-matching的实现

import numpy as np
import tensorflow as tf

def gelu(x):
    return 0.5 * x * (1 + tf.nn.tanh(x * 0.7978845608 * (1 + 0.044715 * x * x)))


def get_weight(shape, gain=np.sqrt(2), weight_norm=True, fan_in=None, name="weight"):
    if fan_in is None:
        fan_in = np.prod(shape[:-1])

    std = gain / np.sqrt(fan_in)  # He init

    w = tf.get_variable(name, shape=shape, initializer=tf.initializers.random_normal(0, std),
                        dtype=tf.float32)

    if weight_norm:
        # 这里采用初始化器为1，所有在下面乘以*g的那行代码等于没有相乘，得到的结果一样（可以返回ww张量进行查看）
        g = tf.get_variable("{}_g".format(name), shape=(1, ) * (len(shape) - 1) + (shape[-1], ),
                            initializer=tf.ones_initializer)
        w_norm = tf.sqrt(tf.reduce_sum(tf.square(w), axis=list(range(len(shape) - 1)), keepdims=True))

        # ww = w / tf.maximum(w_norm, 1e-7)
        w = w / tf.maximum(w_norm, 1e-7) * g
    return w

def apply_bias(x, name='bias'):
    b = tf.get_variable(name, shape=[x.get_shape()[-1]], initializer=tf.zeros_initializer)
    b = tf.cast(b, x.dtype)
    b = tf.reshape(b, [1] * len(x.get_shape()[:-1]) + [x.get_shape().as_list()[-1]])
    return x + b


def dense(x, units, activation=None, name="dense"):
    """

    :param x: float tensor of shape [..., origin_units]
    :param units: int, 全连接神经单元数
    :param activation: tensorflow的激活函数,比如tf.nn.relu
    :param name: str, 全连接变量域的名称
    :return: float tensor of shape [..., units]
    """

    with tf.variable_scope(name):
        fan_in = x.shape[-1].value
        new_shape = tf.concat([tf.shape(x)[:-1], tf.constant([units])], axis=0)
        x = tf.reshape(x, (-1, fan_in))

        gain = np.sqrt(2) if activation is tf.nn.relu else 1.0
        w = get_weight([fan_in, units], gain=gain)
        out = apply_bias(tf.matmul(x, w))
        out = tf.reshape(out, new_shape)
        if activation:
            if activation is tf.nn.relu:
                activation = gelu
            out = activation(out)
        return out

def conv1d(x, filters, kernel_size, activation=None, name='conv1d'):
    """

    :param x: float tensor of shape [batch, seq_x, embedding_size], 代表着文本进行embedding之后的tensor
    :param filters:  int, 代表着卷积的数量
    :param kernel_size:  int, 代表着卷积核的大小（因为是一维卷积，所以代表着height）
    :param activation: tf内置的激活函数
    :param name: str， 整个卷积操作的变量域名称
    :return:  float tensor of shape [batch, seq_x, filters]
    """
    with tf.variable_scope(name):
        gain = np.sqrt(2) if activation is tf.nn.relu else 1
        x = tf.expand_dims(x, 1)
        w = get_weight([kernel_size, x.shape[-1].value, filters], gain=gain)
        w = tf.expand_dims(w, 0)
        out = tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME')
        out = tf.squeeze(out, [1])
        out = apply_bias(out)
        if activation:
            out = activation(out)
        return out

醉意流年go

关注

0
点赞
踩
5

收藏

觉得还不错? 一键收藏
0
评论
tensorflow 实现weight norm

讲述权重归一化在文本在conv1d上和全连接层dense上的应用，参考了simple-effective-text-matching的实现import numpy as npimport tensorflow as tfdef gelu(x): return 0.5 * x * (1 + tf.nn.tanh(x * 0.7978845608 * (1 + 0.044715 * x * x)))def get_weight(shape, gain=np.sqrt(2), w..
复制链接

扫一扫