tensorflow keras crf实现

最新推荐文章于 2022-03-19 18:32:36 发布

唐僧爱吃唐僧肉

最新推荐文章于 2022-03-19 18:32:36 发布

阅读量1.8k

点赞数 1

分类专栏：深度学习函数学习

本文链接：https://blog.csdn.net/znevegiveup1/article/details/118102998

版权

深度学习函数学习专栏收录该内容

52 篇文章 1 订阅

订阅专栏

参照了大佬大佬对应网站的写法
CRF对应的定义

import tensorflow_addons as tfa
import tensorflow.keras.backend as K


class CRF(tf.keras.layers.Layer):
    """
    Conditional Random Field layer (tf.keras)
    `CRF` can be used as the last layer in a network (as a classifier). Input shape (features)
    must be equal to the number of classes the CRF can predict (a linear layer is recommended).
    Args:
        num_labels (int): the number of labels to tag each temporal input.
    Input shape:
        nD tensor with shape `(batch_size, sentence length, num_classes)`.
    Output shape:
        nD tensor with shape: `(batch_size, sentence length, num_classes)`.
    Masking
        This layer supports keras masking for input data with a variable number
        of timesteps. To introduce masks to your data,
        use an embedding layer with the `mask_zero` parameter
        set to `True` or add a Masking Layer before this Layer
    """

    def __init__(self, sparse_target=True, **kwargs):
        self.transitions = None
        super(CRF, self).__init__(**kwargs)
        self.sparse_target = sparse_target
        self.sequence_lengths = None
        self.mask = None
        self.output_dim = None

    def get_config(self):
        config = {
            "output_dim": self.output_dim,
            "transitions": K.eval(self.transitions),
        }
        base_config = super(CRF, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

    def build(self, input_shape):
        self.output_dim = input_shape[-1]
        # assert len(input_shape) == 3
        self.transitions = self.add_weight(
            name="transitions",
            shape=[self.output_dim, self.output_dim],
            initializer="glorot_uniform",
            trainable=True
        )

    def call(self, inputs, mask=None, training=None):
        if mask is not None:
            self.sequence_lengths = K.sum(K.cast(mask, 'int32'), axis=-1)
            self.mask = mask
        else:
            self.sequence_lengths = K.sum(K.ones_like(inputs[:, :, 0], dtype='int32'), axis=-1)
        if training:
            return inputs
        viterbi_sequence, _ = tfa.text.crf_decode(
            inputs, self.transitions, self.sequence_lengths
        )
        # tensorflow requires TRUE and FALSE branch has the same dtype
        return K.cast(viterbi_sequence, inputs.dtype)

    def loss(self, y_true, y_pred):
        if len(K.int_shape(y_true)) == 3:
            y_true = K.argmax(y_true, axis=-1)
        if len(y_pred.shape) == 2:
            y_pred = K.one_hot(K.cast(y_pred, 'int32'), self.output_dim)
        log_likelihood, _ = tfa.text.crf_log_likelihood(
            y_pred,
            y_true,
            self.sequence_lengths,
            transition_params=self.transitions,
        )
        return tf.reduce_mean(-log_likelihood)

    def compute_output_shape(self, input_shape):
        return input_shape[:2] + (self.out_dim,)

    def compute_mask(self, inputs, mask=None):
        return mask

    # use crf decode to estimate accuracy
    def accuracy(self, y_true, y_pred):
        mask = self.mask
        if len(K.int_shape(y_true)) == 3:
            y_true = K.argmax(y_true, axis=-1)
        if len(y_pred.shape) == 3:
            y_pred, _ = tfa.text.crf_decode(
                y_pred, self.transitions, self.sequence_lengths
            )
        y_true = K.cast(y_true, y_pred.dtype)
        is_equal = K.equal(y_true, y_pred)
        is_equal = K.cast(is_equal, y_pred.dtype)
        if mask is None:
            return K.sum(is_equal) / K.sum(self.sequence_lengths)
        else:
            mask = K.cast(mask, y_pred.dtype)
            return K.sum(is_equal * mask) / K.sum(mask)

测试样例如下(不使用混合精度)：

from tensorflow.keras.layers import Input, Embedding, Bidirectional, GRU, Dense
from tensorflow.keras.models import Model
import tensorflow as tf
from tf2crf import CRF

tf.random.set_seed(200)


def test():
    inputs = Input(shape=(None,), dtype='int32')
    output = Embedding(100, 40, trainable=True, mask_zero=False)(inputs)
    output = Bidirectional(GRU(64, return_sequences=True))(output)
    output = Dense(9, activation=None)(output)
    crf = CRF(dtype='float32')
    output = crf(output)
    model = Model(inputs, output)
    model.compile(loss=crf.loss, optimizer='adam', metrics=[crf.accuracy])

    x = [[5, 2, 3] * 3] * 10
    y = [[1, 2, 3] * 3] * 10

    model.fit(x=x, y=y, epochs=10, batch_size=4)
    model.save('model')


if __name__ == '__main__':
    test()

使用混合精度之后

from tensorflow.keras.layers import Input, Embedding, Bidirectional, GRU, Dense
from tensorflow.keras.models import Model
from tf2crf import CRF
from tensorflow.keras.mixed_precision import experimental as mixed_precision
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_policy(policy)

def test():
    inputs = Input(shape=(None,), dtype='int32')
    output = Embedding(100, 40, trainable=True, mask_zero=True)(inputs)
    output = Bidirectional(GRU(64, return_sequences=True))(output)
    output = Dense(9, activation=None)(output)
    crf = CRF(dtype='float32')
    output = crf(output)
    model = Model(inputs, output)
    model.compile(loss=crf.loss, optimizer='adam', metrics=[crf.accuracy])

    x = [[5, 2, 3] * 3] * 10
    y = [[1, 2, 3] * 3] * 10

    model.fit(x=x, y=y, epochs=2, batch_size=2)
    model.save('model')


if __name__ == '__main__':
    test()

下面解读一下这段对应的crf代码
首先我认为非常精彩的部分在于sequence_lengths的计算过程

if mask is not None:
    self.sequence_lengths = K.sum(K.cast(mask, 'int32'), axis=-1)
    self.mask = mask
else:
    self.sequence_lengths = K.sum(K.ones_like(inputs[:, :, 0], dtype='int32'), axis=-1)

比如输入的inputs = (None,None,768)，这里的inputs[:,:,0] = (None,None)，K.ones_like(inputs[:,:,0])与inputs[:,:,0]形状一致的全1的tensor类型，接着将最后一个维度(也就是inputs第二个维度)的数值相加，得到一个形状为(batch_size,)的长度tensor，比如batch_size = 5，这里存放的可以是[3,6,7,8,9]，代表第一个句子长度为3，第二个句子长度为6，第三个句子长度为7，第四个句子长度为8，第五个句子长度为9。
这里的巧妙之处就在于使用一个K.ones_like去将原先的形状用1填满，然后使用K.sum将最后一维的1全部相加，达到最优的效果

唐僧爱吃唐僧肉

关注

1
点赞
踩
12

收藏

觉得还不错? 一键收藏
4
评论
tensorflow keras crf实现

import tensorflow_addons as tfaimport tensorflow as tfimport numpy as npinputs=tf.random.truncated_normal([2,10,5])target=tf.convert_to_tensor(np.random.randint(5,size=(2,10)),dtype=tf.int32)out=tf.keras.layers.Softmax(inputs)lens=tf.convert_to_ten
复制链接

扫一扫

专栏目录