NLLLoss & CrossEntropyLoss
crossentropyloss = softmax + log + nllloss
L
=
−
1
N
∑
i
p
i
lg
q
i
L=-\frac{1}{N}\sum_i{p_i\lg q_i}
L=−N1i∑pilgqi
- p i p_i pi 真实label
- q i q_i qi 预测lable概率
def test_NLLLoss():
m = nn.LogSoftmax(dim=-1)
loss = nn.NLLLoss() # NLLLoss内部只会对input进行取负操作,往往需要自己在外部对input进行softmax和log操作
loss1 = nn.CrossEntropyLoss() # softmax + log + nllloss
loss2 = nn.NLLLoss(reduction='none')
# input is of size N x C = 3 x 5
input = torch.randn(3, 5, requires_grad=True)
# each element in target has to have 0 <= value < C
# target = torch.empty(3, dtype=torch.long).random_(5)
target = torch.tensor([1, 0, 4])
output = loss(m(input), target)
output_mine = -(torch.gather(m(input), index=target.unsqueeze(-1), dim=1)).sum() / target.size(0)
print("NLLLoss")
print("input:", input)
print("log_softmax: ", m(input))
print("target:", target)
print("loss:", output)
print("loss1:", loss1(input, target))
print("none loss:", loss2(m(input), target))
print("mine loss:", output_mine)
"""
NLLLoss
input: tensor([[-0.3630, -2.0179, -1.9629, -0.1293, 0.7163],
[ 0.0691, -0.6838, 1.1787, -0.4356, -1.6239],
[-1.1982, 0.4505, -0.8784, 1.1047, 0.9539]], requires_grad=True)
log_softmax: tensor([[-1.7226, -3.3775, -3.3225, -1.4889, -0.6433],
[-1.6661, -2.4191, -0.5566, -2.1709, -3.3592],
[-3.2651, -1.6164, -2.9453, -0.9622, -1.1130]],
grad_fn=<LogSoftmaxBackward>)
target: tensor([1, 0, 4])
loss: tensor(2.0522, grad_fn=<NllLossBackward>)
loss1: tensor(2.0522, grad_fn=<NllLossBackward>)
none loss: tensor([3.3775, 1.6661, 1.1130], grad_fn=<NllLossBackward>)
mine loss: tensor(2.0522, grad_fn=<DivBackward0>)
"""
def test_CrossEntropyLoss():
loss = nn.CrossEntropyLoss(ignore_index=1) # CrossEntropyLoss内部会对input进行softmax和log操作
loss1 = nn.CrossEntropyLoss(reduction='none', ignore_index=1)
# input is of size N x C = 4 x 5
input = torch.randn(4, 5, requires_grad=True)
# each element in target has to have 0 <= value < C
# target = torch.empty(4, dtype=torch.long).random_(5)
target = torch.tensor([4, 2, 1, 1])
output = loss(input, target)
tar_mask = target.ne(1).float().detach()
output_mine = (-torch.gather(F.log_softmax(input, dim=-1), index=target.unsqueeze(-1), dim=1) * tar_mask.unsqueeze(1)).sum() / tar_mask.sum()
print("CrossEntropyLoss")
print("input:", input)
print("log_softmax: ", F.log_softmax(input, dim=-1))
print("target:", target)
print("loss:", output)
print("none loss:", loss1(input, target))
print("mine loss:", output_mine)
"""
CrossEntropyLoss
input: tensor([[ 0.5172, 0.4194, -1.2796, 1.0469, -1.2624],
[-0.9485, -1.3813, -0.2962, -0.2169, -0.6744],
[-1.9062, 0.0981, -2.3465, 0.1645, 0.5468],
[ 0.6164, -0.7764, -0.3201, -0.6006, 0.9538]], requires_grad=True)
log_softmax: tensor([[-1.3711, -1.4689, -3.1680, -0.8414, -3.1508],
[-1.9405, -2.3733, -1.2882, -1.2089, -1.6664],
[-3.3541, -1.3497, -3.7944, -1.2834, -0.9010],
[-1.2053, -2.5981, -2.1418, -2.4223, -0.8679]],
grad_fn=<LogSoftmaxBackward>)
target: tensor([4, 2, 1, 1])
loss: tensor(2.2195, grad_fn=<NllLossBackward>)
none loss: tensor([3.1508, 1.2882, 0.0000, 0.0000], grad_fn=<NllLossBackward>)
mine loss: tensor(2.2195, grad_fn=<DivBackward0>)
"""
BCELoss
L = − 1 N ∑ i ( p i lg q i + ( 1 − p i ) lg ( 1 − q i ) ) L=-\frac{1}{N}\sum_i{\left( p_i\lg q_i+\left( 1-p_i \right) \lg \left( 1-q_i \right)\right)} L=−N1i∑(pilgqi+(1−pi)lg(1−qi))
def test_BCELoss():
m = nn.Sigmoid()
loss = nn.BCELoss() # BCELoss内部会对input进行log操作
loss1 = nn.BCEWithLogitsLoss() # sigmoid + BCE
loss2 = nn.BCELoss(reduction='none')
input = torch.randn(5, requires_grad=True)
# target = torch.empty(3).random_(2)
target = torch.tensor([1, 0, 1, 0, 1]).float()
output = loss(m(input), target)
output_mine = -(target * m(input).log() + (1 - target) * (1 - m(input)).log()).sum() / input.size(0)
print("\nBCELoss")
print("input:", input)
print("sigmoid input:", m(input))
print("target:", target)
print("loss:", output)
print("loss1:", loss1(input, target))
print("loss2:", loss2(m(input), target))
print("mine loss:", output_mine)
"""
BCELoss
input: tensor([-1.5962, -0.2057, -0.2376, -0.1843, 0.3428], requires_grad=True)
sigmoid input: tensor([0.1685, 0.4487, 0.4409, 0.4540, 0.5849], grad_fn=<SigmoidBackward>)
target: tensor([1., 0., 1., 0., 1.])
loss: tensor(0.8674, grad_fn=<BinaryCrossEntropyBackward>)
loss1: tensor(0.8674, grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
loss2: tensor([1.7807, 0.5956, 0.8190, 0.6052, 0.5364],
grad_fn=<BinaryCrossEntropyBackward>)
mine loss: tensor(0.8674, grad_fn=<DivBackward0>)
"""
KLDivLoss
L = − ∑ i ( p i lg q i − p i lg p i ) L=-\sum_i{\left( p_i\lg q_i-p_i\lg p_i\right)} L=−i∑(pilgqi−pilgpi)
def test_KLDivLoss():
loss = nn.KLDivLoss(reduction='batchmean') # 仅仅除以batch_size
loss1 = nn.KLDivLoss(reduction='none') # KLDivLoss内部不会对input进行softmax和log操作
input = torch.log_softmax(torch.tensor([[-2., -6., -8.], [-7., -1., -2.]], requires_grad=True), dim=-1)
target = torch.tensor([[0.8, 0.1, 0.1], [0.1, 0.7, 0.2]])
output = loss(input, target)
output_mine = torch.sum(torch.log(target)*target - target*input) / input.size(0)
print("\nKLDivLoss")
print("input:", input)
print("target:", target)
print("loss:", output)
print("none loss:", loss1(input, target))
print("mine loss:", output_mine)
"""
KLDivLoss
input: tensor([[-0.0206, -4.0206, -6.0206],
[-6.3151, -0.3151, -1.3151]], grad_fn=<LogSoftmaxBackward>)
target: tensor([[0.8000, 0.1000, 0.1000],
[0.1000, 0.7000, 0.2000]])
loss: tensor(0.3474, grad_fn=<DivBackward0>)
none loss: tensor([[-0.1620, 0.1718, 0.3718],
[ 0.4012, -0.0291, -0.0589]], grad_fn=<KlDivBackward>)
mine loss: tensor(0.3474, grad_fn=<DivBackward0>)
"""