LSTM-CNNs-CRF算法用于做ner等nlp任务

最新推荐文章于 2024-08-14 10:17:37 发布

旭旭_哥

最新推荐文章于 2024-08-14 10:17:37 发布

阅读量6.4k

点赞数 2

分类专栏：机器学习

本文链接：https://blog.csdn.net/luoyexuge/article/details/82773498

版权

机器学习专栏收录该内容

114 篇文章 7 订阅

订阅专栏

原始论文地址：https://arxiv.org/abs/1603.01354

仔细看了下论文和相关代码实现，原理大概如下：

利用 word级别和char级别的方式作为输入：

word级别的一个序列长度： input_word=tf.placeholder([None,seqlen])，就是分完词之后的 “我在吃饭”

char级别的是 input_char=tf.placeholder([None,seqlen,maxchar_perword]) 这里的意思是在word级别中的每个word是又多少个字组成，一般英文单词的char级别有七八个的情况，中文经过分词之后基本上最多应该就4个，个别能到5个。对应的输入大概是这样

[[我]，[在]，[吃，饭]]，只不过在做模型的训练的时候最好每个batch都需要pad到同一个长度。

然后就是word 和 char级别经过embeding的方式，这里input_char经过embeding之后变为一个4维的tensor，然后经过一层二维卷积--relu--最大池化再和word级别的embeding之后的tensor做做concat，进入lstm，后面再进入crf层，原理基本上就是这样，实现在tensorflow中比较简单。

另外在网上还看到了一些其他方法，也是利用cnn、lsrm、crf的方式，原理大概如下：

只有word的形式，也就是输入input_word=tf.placeholder([None,seqlen])，分别经过bilstm和cnn层，两者output后的结果再做concat，然后进入crf层，这种实现方式我简单写了下，看下代码:

import tensorflow  as tf
import  numpy  as np
from tensorflow.contrib import rnn

class  BiLstmCnnCRF(object):
    def __init__(
            self, input_x,input_y,batch_size, num_tags, word_vocab_size,
            word_embedd_dim, grad_clip,dropout,regularization,seq_len,
            n_hidden_LSTM=200):
        self.word_vocab_size=word_vocab_size
        self.word_embedd_dim=word_embedd_dim
        self.input_x = input_x
        self.input_y = input_y
        self.batch_size=batch_size
        self.regularization=regularization
        self.dropout_keep_prob = dropout
        self.seq_len=seq_len
        self.max_sequence_in_batch = tf.constant(value=self.seq_len,dtype=tf.int32)
        self.sequence_lengths =tf.convert_to_tensor(self.batch_size * [self.max_sequence_in_batch], dtype=tf.int32)


        with tf.name_scope("word_embedding"):
            self.w_word = tf.Variable(tf.random_uniform([self.word_vocab_size, self.word_embedd_dim], -1, 1), trainable=True,
                                      name="w_word")
            self.embedded_words = tf.nn.embedding_lookup(self.w_word, self.input_x, name="embedded_words")
        with  tf.name_scope("cnn"):
            #batchsize*80*200*1

            cnn_input = tf.reshape(self.embedded_words,[-1,self.seq_len, self.word_embedd_dim,1])


            cnn_filter = tf.get_variable(name='filter',
                                         shape=[1, 1, 2, 30],
                                         initializer=tf.random_uniform_initializer(-0.01, 0.01),
                                         dtype=tf.float32)

            cnn_bias = tf.get_variable(name='cnn_bias',
                                       shape=[30],
                                       initializer=tf.random_uniform_initializer(-0.01, 0.01),
                                       dtype=tf.float32)

            # batchsize*80*100*30
            cnn_network = tf.add(tf.nn.conv2d(cnn_input ,
                                cnn_filter,
                                strides=[1, 1, 2, 1],
                                padding="VALID",
                                name="conv"),
                                 cnn_bias);

            relu_applied = tf.nn.relu(cnn_network)

            max_pool = tf.nn.max_pool(relu_applied,
                                      ksize=[1, 1, 100, 1],
                                      strides=[1, 1, 1, 1],
                                      padding='VALID')

            self.cnn_output = tf.reshape(max_pool, [-1, self.seq_len, 30])


        with tf.name_scope("biLSTM"):
            # forward LSTM cell
            lstm_fw_cell = rnn.BasicLSTMCell(n_hidden_LSTM, state_is_tuple=True)
            lstm_bw_cell = rnn.BasicLSTMCell(n_hidden_LSTM, state_is_tuple=True)
            (output_fw, output_bw), _ = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell,
                                                                        lstm_bw_cell, self.embedded_words,
                                                                        dtype=tf.float32)  # output : [batch_size, timesteps, cell_fw.output_size]
            self.biLstm = tf.concat([output_fw, output_bw], axis=-1, name="biLstm")
            self.biLstm_clip = tf.clip_by_value(self.biLstm, -grad_clip, grad_clip)
            self.biLstm_dropout = tf.nn.dropout(self.biLstm_clip, self.dropout_keep_prob)


        with tf.name_scope("concat"):
            self.outpu_concat=tf.concat([self.cnn_output,self.biLstm_dropout],axis=-1)

        with tf.name_scope("output"):
            W_out = tf.get_variable("W_out", shape=[2 * n_hidden_LSTM+30, num_tags],
                                    initializer=tf.contrib.layers.xavier_initializer())
            b_out = tf.Variable(tf.constant(0.0, shape=[num_tags]), name="b_out")

            self.biLstm_reshaped = tf.reshape(self.outpu_concat, [-1,
                                                                    2 * n_hidden_LSTM+30])  # [batch_size * timesteps , 2*n_hidden_LSTM] obtained by statement print(self.biLstm.get_shape())

            self.predictions = tf.nn.xw_plus_b(self.biLstm_reshaped, W_out, b_out,
                                               name="predictions")  # input : [batch_size * timesteps , 2*n_hidden_LSTM] * [2*n_hidden_LSTM, num_classes]  = [batch_size * timesteps , num_classes]
            self.logits = tf.reshape(self.predictions, [self.batch_size, -1, num_tags],


                                     name="logits")  # output [batch_size, max_seq_len]

            # self.logits_soft=tf.nn.softmax(logits=self.logits,name="logits_soft")
            #
            # self.pred=tf.reshape(self.logits_soft,[self.batch_size,-1],name="pred")

            labels_softmax_argmax = tf.argmax(self.logits, axis=-1)
            self.pred = tf.cast(labels_softmax_argmax, tf.int32,name="pred")


        with tf.name_scope("l2loss"):
            self.tv = tf.trainable_variables()
            self.regularization_cost = self.regularization * tf.reduce_sum([tf.nn.l2_loss(v) for v in self.tv])

        with tf.name_scope("loss"):
            log_likelihood, self.transition_params = tf.contrib.crf.crf_log_likelihood(
                self.logits, self.input_y,self.sequence_lengths)

            #+self.regularization_cost  +self.regularization_cost
            self.loss = tf.reduce_mean(-log_likelihood, name="loss")+self.regularization_cost
            self.train_op = tf.train.AdamOptimizer().minimize(self.loss)

        with  tf.name_scope("crf_pred"):
            self.viterbi_sequence, viterbi_score=tf.contrib.crf.crf_decode(self.logits, self.transition_params, self.sequence_lengths)

只是简单了写个实现方式