1.BertCrfForNer
from transformers import BertPreTrainedModel,BertConfig,BertModel
from TorchCRF import CRF
import torch
import torch.nn as nn
class BertCrfForNer(BertPreTrainedModel):
def __init__(self,config):
super(BertCrfForNer,self).__init__(config)
self.bert = BertModel(config)
self.dropout = nn.Dropout(config.hidden_dropout_prob)
self.classifier = nn.Linear(config.hidden_size,config.num_labels)
self.crf = CRF(num_tags = config.num_labels,batch_first = True)
self.init_weights()
def forward(self,input_ids,attention_mask,token_type_ids,labels = None):
# input_ids:[batch_size,max_len]
# attention_mask:[batch_size,max_len]
# token_type_ids:[batch_size,max_len]
# outputs:([batch_size,max_len,bert_dim],[batch_size,bert_dim])
outputs = self.bert(input_ids = input_ids,attention_mask = attention_mask,token_type_ids = token_type_ids)
# sequence_output:[batch_size,max_len,bert_dim]
sequence_output = outputs[0]
# sequence_output:[batch_size,max_len,bert_dim]
sequence_output = self.dropout(sequence_output)
# logits:[batch_size,max_len,num_tags]
logits = self.classifier(sequence_output)
# outputs:([batch_size,max_len,num_tags],)
outputs = (logits,)
if labels is not None:
loss = self.crf(emissions = logits,tags = labels,mask = attention_mask)
# outputs:(loss,[batch_size,max_len,num_tags],)
outputs = (-1 * loss) + outputs
# outputs:[batch_size,max_len,num_tags]
return outputs
pretrain_model_path = 'bert-base-chinese'
input_ids = torch.randint(0,100,[4,10])
token_type_ids = torch.zeros([4,10]).long()
attention_mask = torch.ones([4,10]).long()
word_embeddings = torch.randn([4,10,5,200])
word_mask = torch.ones(4,10,5).long()
config = BertConfig.from_pretrained(pretrain_model_path,num_labels = 20)
config.word_embed_dim = 200
config.loss_type = 'ce'
config.word_vocab_size = 2162
model = BertCrfForNer(config)
outputs = model(input_ids = input_ids,
attention_mask = attention_mask,
token_type_ids = token_type_ids)
print('output.shape:',outputs[0].shape)
输出:
output.shape: torch.Size([4, 10, 20])
2.LEBertSoftmaxForNer
import torch
import torch.nn as nn
from transformers import BertPreTrainedModel,BertModel,BertConfig
from lebert import LEBertModel
class LEBertSoftmaxForNer(BertPreTrainedModel):
def __init__(self,config):
super(LEBertSoftmaxForNer,self).__init__(config)
self.word_embeddings = nn.Embedding(config.word_vocab_size,config.word_embed_dim)
self.num_labels = config.num_labels
self.bert = LEBertModel(config)
self.dropout = nn.Dropout(config.hidden_dropout_prob)
self.classifier = nn.Linear(config.hidden_size,config.num_labels)
self.loss_type = config.loss_type
self.init_weights()
def forward(self,input_ids,attention_mask,token_type_ids,word_ids,word_mask,ignore_index,labels = None):
# input_ids:[batch_size,max_len]
# attention_mask:[batch_size,max_len]
# token_type_ids:[batch_size,max_len]
# word_ids:[batch_size,max_len,num_words]
# word_mask:[batch_size,max_len,num_words]
# ignore_index:0
# word_embeddings:[batch_size,max_len,num_words,word_dim]
word_embeddings = self.word_embeddings(word_ids)
# outputs:([batch_size,max_len,bert_dim],[batch_size,bert_dim])
outputs = self.bert(
input_ids = input_ids,
attention_mask = attention_mask,
token_type_ids = token_type_ids,
word_embeddings = word_embeddings,
word_mask = word_mask
)
# sequence_output:[batch_size,max_len,bert_dim]
sequence_output = outputs[0]
# sequence_output:[batch_size,max_len,bert_dim]
sequence_output = self.dropout(sequence_output)
# logits:[batch_size,max_len,num_tags]
logits = self.classifier(sequence_output)
# outputs:([batch_size,max_len,num_tags],)
outputs = (logits,) + outputs[2:]
if labels is not None:
assert self.loss_type in ['lsr','focal','ce']
if self.loss_type == 'lsr':
loss_fct = LabelSmoothingCrossEntropy(ignore_index = ignore_index)
elif self.loss_type == 'focal':
loss_fct = FocalLoss(ignore_index = ignore_index)
else:
loss_fct = CrossEntropyLoss(ignore_index = ignore_index)
if labels is not None:
if attention_mask is not None:
# active_loss:[batch_size*max_len]
active_loss = attention_mask.contiguous().view(-1) == 1
# active_logits:[每个batch有效长度总和,num_tags]
active_logits = logits.contiguous().view(-1,self.num_labels)[active_loss]
# active_labels:[每个batch有效长度总和]
active_labels = labels.contiguous().view(-1)[active_loss]
loss = loss_fct(active_logits,active_labels)
else:
loss = loss_fct(logits.view(-1,self.num_labels),labels.view(-1))
# outputs:(loss,[batch_size,max_len,num_tags])
outputs = (loss,) + outputs
return outputs
pretrain_model_path = 'bert-base-chinese'
input_ids = torch.randint(0,100,[4,10])
token_type_ids = torch.zeros([4,10]).long()
attention_mask = torch.ones([4,10]).long()
word_embeddings = torch.randn([4,10,5,200])
word_mask = torch.ones(4,10,5).long()
config = BertConfig.from_pretrained(pretrain_model_path,num_labels = 20)
config.add_layer = 0
config.word_embed_dim = 200
config.loss_type = 'ce'
config.word_vocab_size = 2162
word_ids = torch.ones([4,10,3]).long()
word_mask = torch.ones([4,10,3]).long()
model = LEBertSoftmaxForNer(config)
outputs = model(input_ids = input_ids,
attention_mask = attention_mask,
token_type_ids = token_type_ids,
word_ids = word_ids,
word_mask = word_mask,
ignore_index = 0)
print('output.shape:',outputs[0].shape)
输出:
output.shape: torch.Size([4, 10, 20])
3.LEBertCrfForNer
import torch
import torch.nn as nn
from transformers import BertConfig,BertPreTrainedModel,BertModel
from lebert import LEBertModel
from TorchCRF import CRF
class LEBertCrfForNer(BertPreTrainedModel):
def __init__(self,config):
super(LEBertCrfForNer,self).__init__(config)
self.word_embeddings = nn.Embedding(config.word_vocab_size,config.word_embed_dim)
self.bert = LEBertModel(config)
self.dropout = nn.Dropout(config.hidden_dropout_prob)
self.classifier = nn.Linear(config.hidden_size,config.num_labels)
self.crf = CRF(num_tags = config.num_labels,batch_first = True)
self.init_weights()
def forward(self,input_ids,attention_mask,token_type_ids,word_ids,word_mask,labels = None):
# input_ids:[batch_size,max_len]
# attention_mask:[batch_size,max_len]
# token_type_ids:[batch_size,max_len]
# word_ids:[batch_size,max_len,num_words]
# word_mask:[batch_size,max_len,num_words]
# ignore_index:0
# word_embeddings:[batch_size,max_len,num_words,word_dim]
word_embeddings = self.word_embeddings(word_ids)
# outputs:([batch_size,max_len,bert_dim],[batch_size,bert_dim])
outputs = self.bert(
input_ids = input_ids,
attention_mask = attention_mask,
token_type_ids = token_type_ids,
word_embeddings = word_embeddings,
word_mask = word_mask
)
# sequence_output:[batch_size,max_len,bert_dim]
sequence_output = outputs[0]
sequence_output = self.dropout(sequence_output)
# logits:[batch_size,max_len,num_tags]
logits = self.classifier(sequence_output)
# outputs:([batch_size,max_len,num_tags],)
outputs = (logits,)
if labels is not None:
loss = self.crf(emissions = logits,tags = labels,mask = attention_mask)
# outputs:(loss,[batch_size,max_len,num_tags])
outputs = (-1 * loss,) + outputs
return outputs
pretrain_model_path = 'bert-base-chinese'
input_ids = torch.randint(0,100,[4,10])
token_type_ids = torch.zeros([4,10]).long()
attention_mask = torch.ones([4,10]).long()
word_embeddings = torch.randn([4,10,5,200])
word_mask = torch.ones(4,10,5).long()
config = BertConfig.from_pretrained(pretrain_model_path,num_labels = 20)
config.add_layer = 0
config.word_embed_dim = 200
config.loss_type = 'ce'
config.word_vocab_size = 2162
word_ids = torch.ones([4,10,3]).long()
word_mask = torch.ones([4,10,3]).long()
model = LEBertCrfForNer(config)
outputs = model(input_ids = input_ids,
attention_mask = attention_mask,
token_type_ids = token_type_ids,
word_ids = word_ids,
word_mask = word_mask)
print('output.shape:',outputs[0].shape)
输出:
output.shape: torch.Size([4, 10, 20])