文章目录
正文
代码参照某博主大佬写的。使用时有报错就自己改了改。
logits = logits[..., None]
labels = labels[..., None]
因为我的网络输出是二维的,label是一维的。所以上述改了一下。
device=logits.device
因为训练时报错说loss数据不在同一个空间,所以上面把device设置一下。全部代码如下:
import torch
from torch import nn
import torch.nn.functional as F
class FocalLoss(nn.Module):
def __init__(self, gamma=2, alpha=1, size_average=True):
super(FocalLoss, self).__init__()
self.gamma = gamma
self.alpha = alpha
self.size_average = size_average
self.elipson = 0.000001
def forward(self, logits, labels):
"""
cal culates loss
logits: batch_size * labels_length * seq_length
labels: batch_size * seq_length
"""
logits = logits[..., None]
labels = labels[..., None]
if labels.dim() > 2:
labels = labels.contiguous().view(labels.size(0), labels.size(1), -1)
labels = labels.transpose(1, 2)
labels = labels.contiguous().view(-1, labels.size(2)).squeeze()
if logits.dim() > 3:
logits = logits.contiguous().view(logits.size(0), logits.size(1), logits.size(2), -1)
logits = logits.transpose(2, 3)
logits = logits.contiguous().view(-1, logits.size(1), logits.size(3)).squeeze()
assert (logits.size(0) == labels.size(0))
assert (logits.size(2) == labels.size(1))
batch_size = logits.size(0)
labels_length = logits.size(1)
seq_length = logits.size(2)
# transpose labels into labels onehot
new_label = labels.unsqueeze(1)
label_onehot = torch.zeros([batch_size, labels_length, seq_length], device=logits.device).scatter_(1, new_label, 1)
# calculate log
log_p = F.log_softmax(logits)
pt = label_onehot * log_p
sub_pt = 1 - pt
fl = -self.alpha * (sub_pt) ** self.gamma * log_p
if self.size_average:
return fl.mean()
else:
return fl.sum()