bert模型后接TextCNN、LSTM

使用keras_bert来搭建模型

#bert
def get_model():
    bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path)
    for l in bert_model.layers:
        l.trainable = True
    T1 = Input(shape=(None,))
    T2 = Input(shape=(None,))
    T = bert_model([T1, T2])
    T = Lambda(lambda x: x[:, 0])(T)
    output = Dense(4, activation='softmax')(T)
    model = Model([T1, T2], output)
    model.compile(
        loss='categorical_crossentropy',
        optimizer=Adam(1e-5),  
        metrics=['accuracy']
    )
    model.summary()
    return model

上面为bert模型代码,做一个四分类的任务,如果在T后面直接接TextCNN,会报错

def get_model():
    bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path)
    for l in bert_model.layers:
        l.trainable = True
    T1 = Input(shape=(None,))
    T2 = Input(shape=(None,))
    T = bert_model([T1, T2])
    convs = []
    for kernel_size in [3, 4, 5]:
        c = Conv1D(128, kernel_size, activation='relu')(T)
        c = GlobalMaxPooling1D()(c)
        convs.append(c)
    x = Concatenate()(convs)
    output = Dense(4, activation='softmax')(x)
    model = Model([T1, T2], output)
    model.compile(
        loss='categorical_crossentropy',
        optimizer=Adam(1e-5),  
        metrics=['accuracy']
    )
    model.summary()
    return model

上面为bert接TextCNN模型代码,报错如下:

TypeError: Layer conv1d_1 does not support masking, but was passed an input_mask: Tensor("model_2/Encoder-12-FeedForward-Add/All:0", shape=(?, ?), dtype=bool)

报错原因是CNN层不支持masking的输入,因此自己定义一个Nonmasking层,加入到CNN层之前:

class NonMasking(Layer):
    def __init__(self, **kwargs):
        self.supports_masking = True
        super(NonMasking, self).__init__(**kwargs)
    def build(self, input_shape):
        input_shape = input_shape
    def compute_mask(self, input, input_mask=None):
        # do not pass the mask to the next layers
        return None
    def call(self, x, mask=None):
        return x
    def get_output_shape_for(self, input_shape):
        return input_shape

def get_model():
    bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path)
    for l in bert_model.layers:
        l.trainable = True
    T1 = Input(shape=(None,))
    T2 = Input(shape=(None,))
    T = bert_model([T1, T2])
    T = NonMasking()(T)
    convs = []
    for kernel_size in [3, 4, 5]:
        c = Conv1D(128, kernel_size, activation='relu')(T)
        c = GlobalMaxPooling1D()(c)
        convs.append(c)
    x = Concatenate()(convs)
    output = Dense(4, activation='softmax')(x)
    model = Model([T1, T2], output)
    model.compile(
        loss='categorical_crossentropy',
        optimizer=Adam(1e-5),  
        metrics=['accuracy']
    )
    model.summary()
    return model

上面为bert接TextCNN模型,就不会报错了
当然也可以接LSTM等其他模型

def get_model():
    bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path)
    for l in bert_model.layers:
        l.trainable = True
    T1 = Input(shape=(None,))
    T2 = Input(shape=(None,))
    T = bert_model([T1, T2])
    x = LSTM(128, return_sequences=False)(T)
    output = Dense(4, activation='softmax')(x)
    model = Model([T1, T2], output)
    model.compile(
        loss='categorical_crossentropy',
        optimizer=Adam(1e-5),  
        metrics=['accuracy']
    )
    model.summary()
    return model
  • 9
    点赞
  • 64
    收藏
    觉得还不错? 一键收藏
  • 16
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 16
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值