关于数据部分在这里:
陈丹琦 关系抽取 数据部分https://blog.csdn.net/holasyb/article/details/121549733?spm=1001.2014.3001.5501这篇聊一聊实体抽取部分的网络模型。
def run_batch(self, samples_list, try_cuda=True, training=True):
# convert samples to input tensors
tokens_tensor, attention_mask_tensor, bert_spans_tensor, spans_mask_tensor, spans_ner_label_tensor, sentence_length = self._get_input_tensors_batch(samples_list, training)
output_dict = {
'ner_loss': 0,
}
if training:
self.bert_model.train()
ner_loss, ner_logits, spans_embedding = self.bert_model(
input_ids = tokens_tensor.to(self._model_device),
spans = bert_spans_tensor.to(self._model_device),
spans_mask = spans_mask_tensor.to(self._model_device),
spans_ner_label = spans_ner_label_tensor.to(self._model_device),
attention_mask = attention_mask_tensor.to(self._model_device),
)
output_dict['ner_loss'] = ner_loss.sum()
output_dict['ner_llh'] = F.log_softmax(ner_logits, dim=-1)
else:
self.bert_model.eval()
with torch.no_grad():
ner_logits, spans_embedding, last_hidden = self.bert_model(
input_ids = tokens_tensor.to(self._model_device),
spans = bert_spans_tensor.to(self._model_device),
spans_mask = spans_mask_tensor.to(self._model_device),
spans_ner_label = None,
attention_mask = attention_mask_tensor.to(self._model_device),
)
_, predicted_label = ner_logits.max(2)
predicted_label = predicted_label.cpu().numpy()
last_hidden = last_hidden.cpu().numpy()
predicted = []
pred_prob = []
hidden = []
for i, sample in enumerate(samples_list):
ner = []
prob = []
lh = []
for j in range(len(sample['spans'])):
ner.append(predicted_label[i][j])
# prob.append(F.softmax(ner_logits[i][j], dim=-1).cpu().numpy())
prob.append(ner_logits[i][j].cpu().numpy())
lh.append(last_hidden[i][j])
predicted.append(ner)
pred_prob.append(prob)
hidden.append(lh)
output_dict['pred_ner'] = predicted
output_dict['ner_probs'] = pred_prob
output_dict['ner_last_hidden'] = hidden
return output_dict
_get_input_tensors_batch 函数:将输入的数据进一步处理,输出的结果均为Tensor
tokens_tensor:tokens 在词典中的 id,shape :[batch ,max_len]
attention_mask_tensor:attention mask,或者叫 segment_mask,用于区分padding,shape :[batch ,max_len]
bert_spans_tensor:span 起始 index,结尾 index,和长度,shape :[batch, span_num, 3]
spans_mask_tensor:span 的 mask,和长度,shape :[batch, span_num]
spans_ner_label_tensor:span 的 label,和长度,shape :[batch, span_num]
sentence_length:输入长度
#####################################################################
class BertForEntity(BertPreTrainedModel):
def __init__(self, config, num_ner_labels, head_hidden_dim=150, width_embedding_dim=150, max_span_length=8):
super().__init__(config)
self.bert = BertModel(config)
self.hidden_dropout = nn.Dropout(config.hidden_dropout_prob)
self.width_embedding = nn.Embedding(max_span_length+1, width_embedding_dim)
self.ner_classifier = nn.Sequential(
FeedForward(input_dim=config.hidden_size*2+width_embedding_dim,
num_layers=2,
hidden_dims=head_hidden_dim,
activations=F.relu,
dropout=0.2),
nn.Linear(head_hidden_dim, num_ner_labels)
)
self.init_weights()
def _get_span_embeddings(self, input_ids, spans, token_type_ids=None, attention_mask=None):
sequence_output, pooled_output = self.bert(input_ids=input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask)
sequence_output = self.hidden_dropout(sequence_output)
"""
spans: [batch_size, num_spans, 3]; 0: left_ned, 1: right_end, 2: width
spans_mask: (batch_size, num_spans, )
"""
spans_start = spans[:, :, 0].view(spans.size(0), -1)
spans_start_embedding = batched_index_select(sequence_output, spans_start)
spans_end = spans[:, :, 1].view(spans.size(0), -1)
spans_end_embedding = batched_index_select(sequence_output, spans_end)
spans_width = spans[:, :, 2].view(spans.size(0), -1)
spans_width_embedding = self.width_embedding(spans_width)
# Concatenate embeddings of left/right points and the width embedding
spans_embedding = torch.cat((spans_start_embedding, spans_end_embedding, spans_width_embedding), dim=-1)
"""
spans_embedding: (batch_size, num_spans, hidden_size*2+embedding_dim)
"""
return spans_embedding
def forward(self, input_ids, spans, spans_mask, spans_ner_label=None, token_type_ids=None, attention_mask=None):
spans_embedding = self._get_span_embeddings(input_ids, spans, token_type_ids=token_type_ids, attention_mask=attention_mask)
ffnn_hidden = []
hidden = spans_embedding
for layer in self.ner_classifier:
hidden = layer(hidden)
ffnn_hidden.append(hidden)
logits = ffnn_hidden[-1]
if spans_ner_label is not None:
loss_fct = CrossEntropyLoss(reduction='sum')
if attention_mask is not None:
active_loss = spans_mask.view(-1) == 1
active_logits = logits.view(-1, logits.shape[-1])
active_labels = torch.where(
active_loss, spans_ner_label.view(-1), torch.tensor(loss_fct.ignore_index).type_as(spans_ner_label)
)
loss = loss_fct(active_logits, active_labels)
else:
loss = loss_fct(logits.view(-1, logits.shape[-1]), spans_ner_label.view(-1))
return loss, logits, spans_embedding
else:
return logits, spans_embedding, spans_embedding
init:增加一个 span 长度的 embedding
首先,使用 bert 进行编码,然后根据 span_tensor 将每一个 span 的首尾字向量取出来,根据长度获取 span 长度的 embedding,将首尾字向量和长度 embedding 拼接到一起,于是将获得一个 shape 为 batch * spans_num * (word_embed_len * 2 + width_embed_len) 的张量,最后分类层,每一个 span 都有 label + 1 种可能。
loss:CrossEntropyLoss
关系分类待续