花呗问答匹配(part2 相关函数准备)

当前时间是2021/6/16,matchzoo作为非常强大的文本匹配库,当前未更新到TF2.4以上版本,无法使用本机3090加速,为此我将源码反向推导实现,使用TF2.4形式实现这些模型

"""
作者英俊
QQ 2227495940
所有权:西安建筑科技大学草堂校区 信控楼704实验室
"""
"暂定只能扣13个模型出来"
'暂定只能扣13个模型出来'

模型创建

# 开始导入各种模型和层
from tensorflow.keras import Model,Input
from tensorflow.keras.layers import concatenate,Concatenate,Dense,Embedding
from time import time # 计算运行时间
from tensorflow.keras.layers import Conv1D,Dropout,GlobalMaxPool1D
from tensorflow.keras.layers import Dot,Lambda
from tensorflow.keras.layers import Flatten,Concatenate,MaxPool1D,MaxPooling1D,Dropout
from tensorflow.keras.layers import Activation,Layer
from tensorflow.keras.layers import Reshape
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.layers import Softmax
import typing

GPU加速

import tensorflow as tf
print(tf.__version__)
gpus = tf.config.list_physical_devices("GPU")

gpus
print(gpus)

if gpus:
    gpu0 = gpus[0] #如果有多个GPU,仅使用第0个GPU
    tf.config.experimental.set_memory_growth(gpu0, True) #设置GPU显存用量按需使用
    # 或者也可以设置GPU显存为固定使用量(例如:4G)
    #tf.config.experimental.set_virtual_device_configuration(gpu0,
    #    [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4096)]) 
    tf.config.set_visible_devices([gpu0],"GPU") 
2.4.0
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

评估所需的函数

import datetime # 计算时间
#打印时间
def printbar():
    nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    print("\n"+"=========="*8 + "%s"%nowtime)
#mac系统上pytorch和matplotlib在jupyter中同时跑需要更改环境变量
#os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

import os
import datetime
stamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
logdir = os.path.join('model', 'autograph', stamp)
#检查运行结果,并且可视化
%matplotlib inline
%config InlineBackend.figure_format = 'svg'
 
import matplotlib.pyplot as plt
 
def plot_metric(history, metric):
    train_metrics = history.history[metric]
    val_metrics = history.history['val_'+metric]
    epochs = range(1, len(train_metrics) + 1)
    plt.plot(epochs, train_metrics, 'bo--')
    plt.plot(epochs, val_metrics, 'ro-')
    plt.title('Training and validation '+ metric)
    plt.xlabel("Epochs")
    plt.ylabel(metric)
    plt.legend(["train_"+metric, 'val_'+metric])
    plt.show()
 

从matchzoo中抠出来的代码

def _make_multi_layer_perceptron_layer() -> keras.layers.Layer:
        # TODO: do not create new layers for a second call
        if not True:
            raise AttributeError(
                'Parameter `with_multi_layer_perception` not set.')

        def _wrapper(x):
            activation = 'relu'
            for _ in range(3):
                x = keras.layers.Dense(128,
                                       activation=activation)(x)
            return keras.layers.Dense(64,
                                      activation=activation)(x)

        return _wrapper
def _kernel_layer(mu: float, sigma: float) ->keras.layers.Layer:
    """
    Gaussian kernel layer in KNRM.

    :param mu: Float, mean of the kernel.
    :param sigma: Float, sigma of the kernel.
    :return: `keras.layers.Layer`.
    """

    def kernel(x):
        return tf.math.exp(-0.5 * (x - mu) * (x - mu) / sigma / sigma)

    return Activation(kernel)
def _xor_match(x):
    t1 = x[0]
    t2 = x[1]
    t1_shape = t1.get_shape()
    t2_shape = t2.get_shape()
    t1_expand = tf.stack([t1] * t2_shape[1], 2)
    t2_expand = tf.stack([t2] * t1_shape[1], 1)
    out_bool = tf.equal(t1_expand, t2_expand)
    out = tf.cast(out_bool, tf.float32)
    return out
def _hadamard_dot(x):
        x1 = x[0]
        x2 = x[1]
        out = x1 * x2
        return out
def attention_layer(attention_input: typing.Any,
                        attention_mask: typing.Any = None
                        ) ->  keras.layers.Layer:
        dense_input = Dense(1, use_bias=False)(attention_input)
        
#         dense_input = Lambda(
#                 lambda x: x + (1.0 - attention_mask) * -10000.0,
#                 name="attention_mask"
#             )(dense_input)
        
        if attention_mask is not None:
            # Since attention_mask is 1.0 for positions we want to attend and
            # 0.0 for masked positions, this operation will create a tensor
            # which is 0.0 for positions we want to attend and -10000.0 for
            # masked positions.
            
          
            #这里是原来的函数
            """
            dense_input = keras.layers.Lambda(
                lambda x: x + (1.0 - attention_mask) * -10000.0,
                name="attention_mask"
            )(dense_input)
            
            """
            
            """"""""""""""""""""""""""""""""""""""""""""""""""""""
            print('='*20)
            # 这个的关键应该就是把Lambda原有表达式,抽出来以函数的形式写出来,然后再用函数替换原来的位置
            def reshape_(x):
                lambda x: x + (1.0 - attention_mask) * -10000.0
                return x
            
            
            # shape = [B, L, 1]
            dense_input = Lambda(
                reshape_,
                name="attention_mask"
            )(dense_input)
            """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
        # shape = [B, L, 1]
        attention_probs =Lambda(
            lambda x: tf.nn.softmax(x, axis=1),
            output_shape=lambda s: (s[0], s[1], s[2]),
            name="attention_probs"
        )(dense_input)
        return attention_probs
def _sentence_encoder(
        input_: typing.Any,
        lstm_num_units: int,
        drop_rate: float
    ) -> typing.Any:
        """
        Stack three BiLSTM MaxPooling blocks as a hierarchical structure.
        Concatenate the output of three blocs as the input sentence embedding.
        Each BiLSTM layer reads the input sentence as the input.
        Each BiLSTM layer except the first one is initialized(the initial
        hidden state and the cell state) with the final state of the previous
        layer.
        """
        emb1 = keras.layers.Bidirectional(
            keras.layers.LSTM(
                units=lstm_num_units,
                return_sequences=True,
                return_state=True,
                dropout=drop_rate,
                recurrent_dropout=drop_rate),
            merge_mode='concat')(input_)
        emb1_maxpooling = keras.layers.GlobalMaxPooling1D()(emb1[0])

        emb2 = keras.layers.Bidirectional(
            keras.layers.LSTM(
                units=lstm_num_units,
                return_sequences=True,
                return_state=True,
                dropout=drop_rate,
                recurrent_dropout=drop_rate),
            merge_mode='concat')(input_, initial_state=emb1[1:5])
        emb2_maxpooling = keras.layers.GlobalMaxPooling1D()(emb2[0])

        emb3 = keras.layers.Bidirectional(
            keras.layers.LSTM(
                units=lstm_num_units,
                return_sequences=True,
                return_state=True,
                dropout=drop_rate,
                recurrent_dropout=drop_rate),
            merge_mode='concat')(input_, initial_state=emb2[1:5])
        emb3_maxpooling = keras.layers.GlobalMaxPooling1D()(emb3[0])

        emb = keras.layers.Concatenate(axis=1)(
            [emb1_maxpooling, emb2_maxpooling, emb3_maxpooling])

        return emb
def _classifier(
        input_: typing.Any,
        mlp_num_layers: int,
        mlp_num_units: list,
        drop_rate: float,
        leaky_relu_alpah: float
    ) -> typing.Any:
        for i in range(mlp_num_layers - 1):
            input_ = keras.layers.Dropout(rate=drop_rate)(input_)
            input_ = keras.layers.Dense(mlp_num_units[i])(input_)
            input_ = keras.layers.LeakyReLU(alpha=leaky_relu_alpah)(input_)

        return input_
def _expand_dim(inp: tf.Tensor, axis: int) -> keras.layers.Layer:
        """
        Wrap keras.backend.expand_dims into a Lambda layer.

        :param inp: input tensor to expand the dimension
        :param axis: the axis of new dimension
        """
        return keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=axis))(inp)
def _make_atten_mask_layer() -> keras.layers.Layer:
        """
        Make mask layer for attention weight matrix so that
        each word won't pay attention to <PAD> timestep.
        """
        return keras.layers.Lambda(
            lambda weight_mask: weight_mask[0] + (1.0 - weight_mask[1]) * -1e7,
            name="atten_mask")
def _avg(texts: tf.Tensor, mask: tf.Tensor) -> tf.Tensor:
        """
        Compute the mean of each text according to their real length

        :param texts: np.array with shape [B, T, H]
        :param lengths: np.array with shape [B, T, ],
            where 1 means valid, 0 means pad
        """
        mask = _expand_dim(mask, axis=2)
        new_texts = keras.layers.Multiply()([texts, mask])

        # timestep-wise division, exclude the PAD number when calc avg
        text_avg = keras.layers.Lambda(
            lambda text_mask:
                tf.reduce_sum(text_mask[0], axis=1) / tf.reduce_sum(text_mask[1], axis=1),
        )([new_texts, mask])

        return text_avg
def _max(texts: tf.Tensor, mask: tf.Tensor) -> tf.Tensor:
        """
        Compute the max of each text according to their real length

        :param texts: np.array with shape [B, T, H]
        :param lengths: np.array with shape [B, T, ],
            where 1 means valid, 0 means pad
        """
        mask = _expand_dim(mask, axis=2)
        new_texts = keras.layers.Multiply()([texts, mask])

        text_max = keras.layers.Lambda(
            lambda x: tf.reduce_max(x, axis=1),
        )(new_texts)

        return text_max
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

南楚巫妖

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值