小黑照葫芦画瓢乘胜追击:BertCrfForNer,LEBertSoftmaxForNer,LEBertCrfForNer

最新推荐文章于 2024-05-24 17:52:00 发布

爱喝喜茶爱吃烤冷面的小黑黑

最新推荐文章于 2024-05-24 17:52:00 发布

阅读量397

点赞数

分类专栏：小黑LE-BERT源代码抢滩登陆战文章标签： pytorch 深度学习人工智能 python

本文链接：https://blog.csdn.net/qq_37418807/article/details/124120037

版权

小黑LE-BERT源代码抢滩登陆战专栏收录该内容

8 篇文章 1 订阅

订阅专栏

1.BertCrfForNer

from transformers import BertPreTrainedModel,BertConfig,BertModel
from TorchCRF import CRF
import torch
import torch.nn as nn

class BertCrfForNer(BertPreTrainedModel):
    def __init__(self,config):
        super(BertCrfForNer,self).__init__(config)
        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size,config.num_labels)
        self.crf = CRF(num_tags = config.num_labels,batch_first = True)
        self.init_weights()
        
    def forward(self,input_ids,attention_mask,token_type_ids,labels = None):
        # input_ids:[batch_size,max_len]
        # attention_mask:[batch_size,max_len]
        # token_type_ids:[batch_size,max_len]
        
        # outputs:([batch_size,max_len,bert_dim],[batch_size,bert_dim])
        outputs = self.bert(input_ids = input_ids,attention_mask = attention_mask,token_type_ids = token_type_ids)
        # sequence_output:[batch_size,max_len,bert_dim]
        sequence_output = outputs[0]
        # sequence_output:[batch_size,max_len,bert_dim]
        sequence_output = self.dropout(sequence_output)
        # logits:[batch_size,max_len,num_tags]
        logits = self.classifier(sequence_output)
        # outputs:([batch_size,max_len,num_tags],)
        outputs = (logits,)
        
        if labels is not None:
            loss = self.crf(emissions = logits,tags = labels,mask = attention_mask)
            # outputs:(loss,[batch_size,max_len,num_tags],)
            outputs = (-1 * loss) + outputs
        # outputs:[batch_size,max_len,num_tags]
        return outputs

pretrain_model_path = 'bert-base-chinese'
input_ids = torch.randint(0,100,[4,10])
token_type_ids = torch.zeros([4,10]).long()
attention_mask = torch.ones([4,10]).long()
word_embeddings = torch.randn([4,10,5,200])
word_mask = torch.ones(4,10,5).long()
config = BertConfig.from_pretrained(pretrain_model_path,num_labels = 20)
config.word_embed_dim = 200
config.loss_type = 'ce'
config.word_vocab_size = 2162

model = BertCrfForNer(config)
outputs = model(input_ids = input_ids,
                attention_mask = attention_mask,
                token_type_ids = token_type_ids)
print('output.shape:',outputs[0].shape)

输出:

output.shape: torch.Size([4, 10, 20])

2.LEBertSoftmaxForNer

import torch
import torch.nn as nn
from transformers import BertPreTrainedModel,BertModel,BertConfig
from lebert import LEBertModel

class LEBertSoftmaxForNer(BertPreTrainedModel):
    
    def __init__(self,config):
        super(LEBertSoftmaxForNer,self).__init__(config)
        self.word_embeddings = nn.Embedding(config.word_vocab_size,config.word_embed_dim)
        self.num_labels = config.num_labels
        self.bert = LEBertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size,config.num_labels)
        self.loss_type = config.loss_type
        self.init_weights()
    
    def forward(self,input_ids,attention_mask,token_type_ids,word_ids,word_mask,ignore_index,labels = None):
        # input_ids:[batch_size,max_len]
        # attention_mask:[batch_size,max_len]
        # token_type_ids:[batch_size,max_len]
        # word_ids:[batch_size,max_len,num_words]
        # word_mask:[batch_size,max_len,num_words]
        # ignore_index:0
        
        # word_embeddings:[batch_size,max_len,num_words,word_dim]
        word_embeddings = self.word_embeddings(word_ids)
        # outputs:([batch_size,max_len,bert_dim],[batch_size,bert_dim])
        outputs = self.bert(
            input_ids = input_ids,
            attention_mask = attention_mask,
            token_type_ids = token_type_ids,
            word_embeddings = word_embeddings,
            word_mask = word_mask
        )
        
        # sequence_output:[batch_size,max_len,bert_dim]
        sequence_output = outputs[0]
        # sequence_output:[batch_size,max_len,bert_dim]
        sequence_output = self.dropout(sequence_output)
        # logits:[batch_size,max_len,num_tags]
        logits = self.classifier(sequence_output)
        # outputs:([batch_size,max_len,num_tags],)
        outputs = (logits,) + outputs[2:]
        if labels is not None:
            assert self.loss_type in ['lsr','focal','ce']
            if self.loss_type == 'lsr':
                loss_fct = LabelSmoothingCrossEntropy(ignore_index = ignore_index)
            elif self.loss_type == 'focal':
                loss_fct = FocalLoss(ignore_index = ignore_index)
            else:
                loss_fct = CrossEntropyLoss(ignore_index = ignore_index)
        if labels is not None:
            if attention_mask is not None:
                # active_loss:[batch_size*max_len]
                active_loss = attention_mask.contiguous().view(-1) == 1
                # active_logits:[每个batch有效长度总和,num_tags]
                active_logits = logits.contiguous().view(-1,self.num_labels)[active_loss]
                # active_labels:[每个batch有效长度总和]
                active_labels = labels.contiguous().view(-1)[active_loss]
                loss = loss_fct(active_logits,active_labels)
            else:
                loss = loss_fct(logits.view(-1,self.num_labels),labels.view(-1))
            # outputs:(loss,[batch_size,max_len,num_tags])
            outputs = (loss,) + outputs
        return outputs

    
pretrain_model_path = 'bert-base-chinese'
input_ids = torch.randint(0,100,[4,10])
token_type_ids = torch.zeros([4,10]).long()
attention_mask = torch.ones([4,10]).long()
word_embeddings = torch.randn([4,10,5,200])
word_mask = torch.ones(4,10,5).long()
config = BertConfig.from_pretrained(pretrain_model_path,num_labels = 20)
config.add_layer = 0
config.word_embed_dim = 200
config.loss_type = 'ce'
config.word_vocab_size = 2162
word_ids = torch.ones([4,10,3]).long()
word_mask = torch.ones([4,10,3]).long()

model = LEBertSoftmaxForNer(config)
outputs = model(input_ids = input_ids,
                attention_mask = attention_mask,
                token_type_ids = token_type_ids,
                word_ids = word_ids,
                word_mask = word_mask,
                ignore_index = 0)
print('output.shape:',outputs[0].shape)

输出:

output.shape: torch.Size([4, 10, 20])

3.LEBertCrfForNer

import torch
import torch.nn as nn
from transformers import BertConfig,BertPreTrainedModel,BertModel
from lebert import LEBertModel
from TorchCRF import CRF

class LEBertCrfForNer(BertPreTrainedModel):
    
    def __init__(self,config):
        super(LEBertCrfForNer,self).__init__(config)
        self.word_embeddings = nn.Embedding(config.word_vocab_size,config.word_embed_dim)
        self.bert = LEBertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size,config.num_labels)
        self.crf = CRF(num_tags = config.num_labels,batch_first = True)
        self.init_weights()
    
    def forward(self,input_ids,attention_mask,token_type_ids,word_ids,word_mask,labels = None):
        # input_ids:[batch_size,max_len]
        # attention_mask:[batch_size,max_len]
        # token_type_ids:[batch_size,max_len]
        # word_ids:[batch_size,max_len,num_words]
        # word_mask:[batch_size,max_len,num_words]
        # ignore_index:0  
        
        # word_embeddings:[batch_size,max_len,num_words,word_dim]
        word_embeddings = self.word_embeddings(word_ids)
        # outputs:([batch_size,max_len,bert_dim],[batch_size,bert_dim])
        outputs = self.bert(
            input_ids = input_ids,
            attention_mask = attention_mask,
            token_type_ids = token_type_ids,
            word_embeddings = word_embeddings,
            word_mask = word_mask
        )
        # sequence_output:[batch_size,max_len,bert_dim]
        sequence_output = outputs[0]
        sequence_output = self.dropout(sequence_output)
        # logits:[batch_size,max_len,num_tags]
        logits = self.classifier(sequence_output)
        # outputs:([batch_size,max_len,num_tags],)
        outputs = (logits,)
        if labels is not None:
            loss = self.crf(emissions = logits,tags = labels,mask = attention_mask)
            # outputs:(loss,[batch_size,max_len,num_tags])
            outputs = (-1 * loss,) + outputs
        return outputs
    
pretrain_model_path = 'bert-base-chinese'
input_ids = torch.randint(0,100,[4,10])
token_type_ids = torch.zeros([4,10]).long()
attention_mask = torch.ones([4,10]).long()
word_embeddings = torch.randn([4,10,5,200])
word_mask = torch.ones(4,10,5).long()
config = BertConfig.from_pretrained(pretrain_model_path,num_labels = 20)
config.add_layer = 0
config.word_embed_dim = 200
config.loss_type = 'ce'
config.word_vocab_size = 2162
word_ids = torch.ones([4,10,3]).long()
word_mask = torch.ones([4,10,3]).long()

model = LEBertCrfForNer(config)
outputs = model(input_ids = input_ids,
                attention_mask = attention_mask,
                token_type_ids = token_type_ids,
                word_ids = word_ids,
                word_mask = word_mask)
print('output.shape:',outputs[0].shape)