Focal loss
class Focal_loss(nn.Module):
def __init__(self, alpha=0.25, gamma=2, num_classes = 3, size_average=True):
"""
focal_loss损失函数, -α(1-yi)**γ *ce_loss(xi,yi)
步骤详细的实现了 focal_loss损失函数.
:param alpha: 阿尔法α,类别权重. 当α是列表时,为各类别权重,当α为常数时,类别权重为[α, 1-α, 1-α, ....],常用于 目标检测算法中抑制背景类 , retainnet中设置为0.25
:param gamma: 伽马γ, 难易样本调节参数. retainnet中设置为2
:param num_classes: 类别数量
:param size_average: 损失计算方式,默认取均值
"""
super(Focal_loss,self).__init__()
self.size_average = size_average
if isinstance(alpha,list):
assert len(alpha)==num_classes # α可以以list方式输入,size:[num_classes] 用于对不同类别精细地赋予权重
print(" --- Focal_loss alpha = {}, 将对每一类权重进行精细化赋值 --- ".format(alpha))
self.alpha = torch.Tensor(alpha)
else:
assert alpha<1 #如果α为一个常数,则降低第一类的影响,在目标检测中为第一类
print(" --- Focal_loss alpha = {} ,将对背景类进行衰减,请在目标检测任务中使用 --- ".format(alpha))
self.alpha = torch.zeros(num_classes)
self.alpha[0] += alpha
self.alpha[1:] += (1-alpha) # α 最终为 [ α, 1-α, 1-α, 1-α, 1-α, ...] size:[num_classes]
self.gamma = gamma
def forward(self, preds, labels):
"""
focal_loss损失计算
:param preds: 预测类别. size:[B,N,C] or [B,C] 分别对应与检测与分类任务, B 批次, N检测框数, C类别数
:param labels: 实际类别. size:[B,N] or [B]
:return:
"""
# assert preds.dim()==2 and labels.dim()==1
preds = preds.view(-1,preds.size(-1))
self.alpha = self.alpha.to(preds.device)
preds_logsoft = F.log_softmax(preds, dim=1) # log_softmax
preds_softmax = torch.exp(preds_logsoft) # softmax
preds_softmax = preds_softmax.gather(1,labels.view(-1,1)) # 这部分实现nll_loss ( crossempty = log_softmax + nll )
preds_logsoft = preds_logsoft.gather(1,labels.view(-1,1))
self.alpha = self.alpha.gather(0,labels.view(-1))
loss = -torch.mul(torch.pow((1-preds_softmax), self.gamma), preds_logsoft) # torch.pow((1-preds_softmax), self.gamma) 为focal loss中 (1-pt)**γ
loss = torch.mul(self.alpha, loss.t())
if self.size_average:
loss = loss.mean()
else:
loss = loss.sum()
return loss
R-dropout
from transformers import RobertaForSequenceClassification, RobertaModel, RobertaPreTrainedModel, RobertaConfig
import torch.nn as nn
import torch.nn.functional as F
# dzj
class RobertaClassificationHead(nn.Module):
"""Head for sentence-level classification tasks."""
def __init__(self, config):
super().__init__()
self.dense = nn.Linear(config.hidden_size, config.hidden_size)
classifier_dropout = (
config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob
)
self.dropout = nn.Dropout(classifier_dropout)
self.out_proj = nn.Linear(config.hidden_size, config.num_labels)
def forward(self, features, **kwargs):
x = features[:, 0, :] # take <s> token (equiv. to [CLS])
x = self.dropout(x)
x = self.dense(x)
x = torch.tanh(x)
x = self.dropout(x)
x = self.out_proj(x)
return x
# 待调参
class SimCSELoss(torch.nn.Module):
def __init__(self, temperature=0.05):
super().__init__()
self.temperature = temperature
def forward(self, z1, z2):
# 将 z1 和 z2 单位化
z1 = F.normalize(z1, dim=1)
z2 = F.normalize(z2, dim=1)
# 计算 z1 和 z2 的余弦相似度矩阵
sim_matrix = torch.matmul(z1, z2.t()) / self.temperature
# 构造同分布的标签,即每个样本的标签为它本身
# batch_size
labels = torch.arange(sim_matrix.size(0), dtype=torch.long, device=sim_matrix.device)
# 使用交叉熵损失
loss = F.cross_entropy(sim_matrix, labels)
return loss
# 提升
class Focal_loss(nn.Module):
def __init__(self, alpha=0.25, gamma=2, num_classes = 3, size_average=True):
"""
focal_loss损失函数, -α(1-yi)**γ *ce_loss(xi,yi)
步骤详细的实现了 focal_loss损失函数.
:param alpha: 阿尔法α,类别权重. 当α是列表时,为各类别权重,当α为常数时,类别权重为[α, 1-α, 1-α, ....],常用于 目标检测算法中抑制背景类 , retainnet中设置为0.25
:param gamma: 伽马γ, 难易样本调节参数. retainnet中设置为2
:param num_classes: 类别数量
:param size_average: 损失计算方式,默认取均值
"""
super(Focal_loss,self).__init__()
self.size_average = size_average
if isinstance(alpha,list):
assert len(alpha)==num_classes # α可以以list方式输入,size:[num_classes] 用于对不同类别精细地赋予权重
print(" --- Focal_loss alpha = {}, 将对每一类权重进行精细化赋值 --- ".format(alpha))
self.alpha = torch.Tensor(alpha)
else:
assert alpha<1 #如果α为一个常数,则降低第一类的影响,在目标检测中为第一类
print(" --- Focal_loss alpha = {} ,将对背景类进行衰减,请在目标检测任务中使用 --- ".format(alpha))
self.alpha = torch.zeros(num_classes)
self.alpha[0] += alpha
self.alpha[1:] += (1-alpha) # α 最终为 [ α, 1-α, 1-α, 1-α, 1-α, ...] size:[num_classes]
self.gamma = gamma
def forward(self, preds, labels):
"""
focal_loss损失计算
:param preds: 预测类别. size:[B,N,C] or [B,C] 分别对应与检测与分类任务, B 批次, N检测框数, C类别数
:param labels: 实际类别. size:[B,N] or [B]
:return:
"""
# assert preds.dim()==2 and labels.dim()==1
preds = preds.view(-1,preds.size(-1))
self.alpha = self.alpha.to(preds.device)
preds_logsoft = F.log_softmax(preds, dim=1) # log_softmax
preds_softmax = torch.exp(preds_logsoft) # softmax
preds_softmax = preds_softmax.gather(1,labels.view(-1,1)) # 这部分实现nll_loss ( crossempty = log_softmax + nll )
preds_logsoft = preds_logsoft.gather(1,labels.view(-1,1))
self.alpha = self.alpha.gather(0,labels.view(-1))
loss = -torch.mul(torch.pow((1-preds_softmax), self.gamma), preds_logsoft) # torch.pow((1-preds_softmax), self.gamma) 为focal loss中 (1-pt)**γ
loss = torch.mul(self.alpha, loss.t())
if self.size_average:
loss = loss.mean()
else:
loss = loss.sum()
return loss
class Model(RobertaPreTrainedModel):
config_class = RobertaConfig
def __init__(self, config):
super().__init__(config)
self.pretrained = RobertaModel.from_pretrained('roberta-base', num_labels=2, output_hidden_states=True)
self.fc = RobertaClassificationHead(config)
self.criterion = nn.CrossEntropyLoss()
# R-dropout
self.drop = nn.Dropout(0.1)
self.kl_loss = nn.KLDivLoss()
# Contrast
self.simcse = SimCSELoss()
# focal_loss
self.focal_loss = Focal_loss(alpha=0.15, gamma=2, num_classes = 2, size_average=True) # 0.15优秀
def forward(self, input_ids, attention_mask, labels=None):
encode_output = self.pretrained(input_ids, attention_mask)
sequence_output = encode_output.hidden_states[-1]
logits = self.fc(sequence_output)
# Dropout两次,过两次编码器!
sequence_output2 = self.pretrained(input_ids, attention_mask).hidden_states[-1] # 内置dropout是随机的,dropout两次进行数据增强
logits2 = self.fc(sequence_output2)
# 对正样本的嵌入向量应用 dropout
# z1 = self.drop(encode_output.hidden_states[-2][:,0,:])
# # 使用负采样得到负样本的嵌入向量
# z2_indices = torch.randint(0, sequence_output.shape[0], size=(sequence_output.shape[0],))
# z2 = sequence_output[:,0,:][z2_indices]
loss = None
if labels is not None:
# baseline
# b = 0.5
# loss = self.criterion(logits, labels)
# loss = self.focal_loss(logits, labels)
# r-drop
ce_loss = (self.criterion(logits, labels) + self.criterion(logits2, labels)) / 2
kl_loss1 = self.kl_loss(F.log_softmax(sequence_output[:,0,:], dim=-1), F.softmax(sequence_output2[:,0,:], dim=-1)).sum(-1)
kl_loss2 = self.kl_loss(F.log_softmax(sequence_output2[:,0,:], dim=-1), F.softmax(sequence_output[:,0,:], dim=-1)).sum(-1)
kl_loss = (kl_loss1 + kl_loss2) / 2
loss = ce_loss + 3 * kl_loss
# 对比学习
# sim_cse = self.simcse(z1, z2)
# print(sim_cse)
# loss = loss + 0.01*sim_cse
return loss, logits