tensorflow keras crf实现

参照了大佬大佬对应网站的写法
CRF对应的定义

import tensorflow_addons as tfa
import tensorflow.keras.backend as K


class CRF(tf.keras.layers.Layer):
    """
    Conditional Random Field layer (tf.keras)
    `CRF` can be used as the last layer in a network (as a classifier). Input shape (features)
    must be equal to the number of classes the CRF can predict (a linear layer is recommended).
    Args:
        num_labels (int): the number of labels to tag each temporal input.
    Input shape:
        nD tensor with shape `(batch_size, sentence length, num_classes)`.
    Output shape:
        nD tensor with shape: `(batch_size, sentence length, num_classes)`.
    Masking
        This layer supports keras masking for input data with a variable number
        of timesteps. To introduce masks to your data,
        use an embedding layer with the `mask_zero` parameter
        set to `True` or add a Masking Layer before this Layer
    """

    def __init__(self, sparse_target=True, **kwargs):
        self.transitions = None
        super(CRF, self).__init__(**kwargs)
        self.sparse_target = sparse_target
        self.sequence_lengths = None
        self.mask = None
        self.output_dim = None

    def get_config(self):
        config = {
            "output_dim": self.output_dim,
            "transitions": K.eval(self.transitions),
        }
        base_config = super(CRF, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

    def build(self, input_shape):
        self.output_dim = input_shape[-1]
        # assert len(input_shape) == 3
        self.transitions = self.add_weight(
            name="transitions",
            shape=[self.output_dim, self.output_dim],
            initializer="glorot_uniform",
            trainable=True
        )

    def call(self, inputs, mask=None, training=None):
        if mask is not None:
            self.sequence_lengths = K.sum(K.cast(mask, 'int32'), axis=-1)
            self.mask = mask
        else:
            self.sequence_lengths = K.sum(K.ones_like(inputs[:, :, 0], dtype='int32'), axis=-1)
        if training:
            return inputs
        viterbi_sequence, _ = tfa.text.crf_decode(
            inputs, self.transitions, self.sequence_lengths
        )
        # tensorflow requires TRUE and FALSE branch has the same dtype
        return K.cast(viterbi_sequence, inputs.dtype)

    def loss(self, y_true, y_pred):
        if len(K.int_shape(y_true)) == 3:
            y_true = K.argmax(y_true, axis=-1)
        if len(y_pred.shape) == 2:
            y_pred = K.one_hot(K.cast(y_pred, 'int32'), self.output_dim)
        log_likelihood, _ = tfa.text.crf_log_likelihood(
            y_pred,
            y_true,
            self.sequence_lengths,
            transition_params=self.transitions,
        )
        return tf.reduce_mean(-log_likelihood)

    def compute_output_shape(self, input_shape):
        return input_shape[:2] + (self.out_dim,)

    def compute_mask(self, inputs, mask=None):
        return mask

    # use crf decode to estimate accuracy
    def accuracy(self, y_true, y_pred):
        mask = self.mask
        if len(K.int_shape(y_true)) == 3:
            y_true = K.argmax(y_true, axis=-1)
        if len(y_pred.shape) == 3:
            y_pred, _ = tfa.text.crf_decode(
                y_pred, self.transitions, self.sequence_lengths
            )
        y_true = K.cast(y_true, y_pred.dtype)
        is_equal = K.equal(y_true, y_pred)
        is_equal = K.cast(is_equal, y_pred.dtype)
        if mask is None:
            return K.sum(is_equal) / K.sum(self.sequence_lengths)
        else:
            mask = K.cast(mask, y_pred.dtype)
            return K.sum(is_equal * mask) / K.sum(mask)

测试样例如下(不使用混合精度):

from tensorflow.keras.layers import Input, Embedding, Bidirectional, GRU, Dense
from tensorflow.keras.models import Model
import tensorflow as tf
from tf2crf import CRF

tf.random.set_seed(200)


def test():
    inputs = Input(shape=(None,), dtype='int32')
    output = Embedding(100, 40, trainable=True, mask_zero=False)(inputs)
    output = Bidirectional(GRU(64, return_sequences=True))(output)
    output = Dense(9, activation=None)(output)
    crf = CRF(dtype='float32')
    output = crf(output)
    model = Model(inputs, output)
    model.compile(loss=crf.loss, optimizer='adam', metrics=[crf.accuracy])

    x = [[5, 2, 3] * 3] * 10
    y = [[1, 2, 3] * 3] * 10

    model.fit(x=x, y=y, epochs=10, batch_size=4)
    model.save('model')


if __name__ == '__main__':
    test()

使用混合精度之后

from tensorflow.keras.layers import Input, Embedding, Bidirectional, GRU, Dense
from tensorflow.keras.models import Model
from tf2crf import CRF
from tensorflow.keras.mixed_precision import experimental as mixed_precision
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_policy(policy)

def test():
    inputs = Input(shape=(None,), dtype='int32')
    output = Embedding(100, 40, trainable=True, mask_zero=True)(inputs)
    output = Bidirectional(GRU(64, return_sequences=True))(output)
    output = Dense(9, activation=None)(output)
    crf = CRF(dtype='float32')
    output = crf(output)
    model = Model(inputs, output)
    model.compile(loss=crf.loss, optimizer='adam', metrics=[crf.accuracy])

    x = [[5, 2, 3] * 3] * 10
    y = [[1, 2, 3] * 3] * 10

    model.fit(x=x, y=y, epochs=2, batch_size=2)
    model.save('model')


if __name__ == '__main__':
    test()

下面解读一下这段对应的crf代码
首先我认为非常精彩的部分在于sequence_lengths的计算过程

if mask is not None:
    self.sequence_lengths = K.sum(K.cast(mask, 'int32'), axis=-1)
    self.mask = mask
else:
    self.sequence_lengths = K.sum(K.ones_like(inputs[:, :, 0], dtype='int32'), axis=-1)

比如输入的inputs = (None,None,768),这里的inputs[:,:,0] = (None,None),K.ones_like(inputs[:,:,0])与inputs[:,:,0]形状一致的全1的tensor类型,接着将最后一个维度(也就是inputs第二个维度)的数值相加,得到一个形状为(batch_size,)的长度tensor,比如batch_size = 5,这里存放的可以是[3,6,7,8,9],代表第一个句子长度为3,第二个句子长度为6,第三个句子长度为7,第四个句子长度为8,第五个句子长度为9。
这里的巧妙之处就在于使用一个K.ones_like去将原先的形状用1填满,然后使用K.sum将最后一维的1全部相加,达到最优的效果

  • 1
    点赞
  • 12
    收藏
    觉得还不错? 一键收藏
  • 4
    评论
下面是基于BERT+CRF的命名实体识别代码,其中使用了Hugging Face的Transformers库和TensorFlow 2.0。 首先,需要安装Transformers库: ``` pip install transformers ``` 然后,可以使用以下代码实现基于BERT+CRF的命名实体识别: ``` import tensorflow as tf from transformers import BertTokenizer, TFBertModel from tensorflow.keras.layers import Input, Dense, TimeDistributed, Dropout from tensorflow.keras.models import Model from tensorflow.keras.optimizers import Adam from tensorflow_addons.layers import CRF # 加载BERT模型和tokenizer bert_model = TFBertModel.from_pretrained('bert-base-chinese') tokenizer = BertTokenizer.from_pretrained('bert-base-chinese') # 加载数据,将数据转换为BERT输入格式 def load_data(path): sentences = [] labels = [] with open(path, 'r', encoding='utf-8') as f: words = [] tags = [] for line in f: line = line.strip() if not line: if words: sentences.append(' '.join(words)) labels.append(tags) words = [] tags = [] else: word, tag = line.split() words.append(word) tags.append(tag) return sentences, labels train_sentences, train_labels = load_data('train.txt') test_sentences, test_labels = load_data('test.txt') train_encodings = tokenizer(train_sentences, truncation=True, padding=True) test_encodings = tokenizer(test_sentences, truncation=True, padding=True) train_labels = [[tag2id.get(tag, 0) for tag in tags] for tags in train_labels] test_labels = [[tag2id.get(tag, 0) for tag in tags] for tags in test_labels] # 构建模型 input_ids = Input(shape=(None,), dtype=tf.int32, name='input_ids') attention_mask = Input(shape=(None,), dtype=tf.int32, name='attention_mask') token_type_ids = Input(shape=(None,), dtype=tf.int32, name='token_type_ids') output = bert_model(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)[0] output = Dropout(0.1)(output) output = TimeDistributed(Dense(num_tags, activation='relu'))(output) crf = CRF(num_tags) output = crf(output) model = Model(inputs=[input_ids, attention_mask, token_type_ids], outputs=[output]) model.compile(optimizer=Adam(lr=1e-5), loss=crf.loss_function, metrics=[crf.accuracy]) # 训练模型 history = model.fit( x=[train_encodings['input_ids'], train_encodings['attention_mask'], train_encodings['token_type_ids']], y=train_labels, validation_data=( [test_encodings['input_ids'], test_encodings['attention_mask'], test_encodings['token_type_ids']], test_labels ), batch_size=32, epochs=10 ) # 对测试集进行预测 test_pred = model.predict([test_encodings['input_ids'], test_encodings['attention_mask'], test_encodings['token_type_ids']]) test_pred = np.argmax(test_pred, axis=-1) test_pred_labels = [[id2tag.get(id, 'O') for id in pred] for pred in test_pred] # 输出测试集上的结果 for i in range(len(test_sentences)): print(test_sentences[i]) print('True:', test_labels[i]) print('Pred:', test_pred_labels[i]) ``` 在这个代码中,我们使用了Hugging Face的Transformers库加载了BERT模型和tokenizer,并将数据转换为BERT输入格式。我们使用了TensorFlow 2.0的Keras API构建了基于BERT+CRF的命名实体识别模型,并使用了CRF层作为最后一层。最后,我们使用了Keras API训练了模型,并使用模型对测试集进行了预测,并输出了预测结果。 需要注意的是,实际应用中,需要根据具体情况进行修改。例如,需要根据数据集的标签数量修改`num_tags`的值,根据具体的数据集修改`load_data`函数,等等。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值