目录
2. softmax + log + nll(one-hot)
0. 数据准备
计算二维预测值的损失。
先生成pred.shape==(4,5). label.shape==(4,)
import torch.nn.functional as F
import torch
# softmax + log: softmax输出范围是(0,1),log输出范围是(负无穷,0)
pred = torch.Tensor(4,5) # (4,5)
softmax_log_pred= F.log_softmax(pred, dim=1) # (4,5). softmax + log
# one-hot
target = torch.Tensor([1, 4, 3, 2]) # (4,)
one_hot_target = F.one_hot(target.long()) # (4,5). 不同的数值被编码成不同的01序列
pred = torch.Tensor(4,5)
pred
tensor([[0., 0., 0., 0., 0.],
[0., 0., 1., 1., 0.],
[1., 3., 2., 3., 0.],
[0., 0., 0., 0., 0.]])
softmax_log_pred= F.log_softmax(pred, dim=1) # 先softmax,再log
softmax_log_pred
tensor([[-1.6094, -1.6094, -1.6094, -1.6094, -1.6094],
[-2.1326, -2.1326, -1.1326, -1.1326, -2.1326],
[-2.9373, -0.9373, -1.9373, -0.9373, -3.9373],
[-1.6094, -1.6094, -1.6094, -1.6094, -1.6094]])
target = torch.Tensor([1, 4, 3, 2])
target
tensor([1., 4., 3., 2.]) # (4,)
one_hot_target = F.one_hot(target.long()) # (4,5) 增加一个维度
one_hot_target
tensor([[0, 1, 0, 0, 0], # 对应lable 1
[0, 0, 0, 0, 1], # 对应label 4
[0, 0, 0, 1, 0], # label 3
[0, 0, 1, 0, 0]])
1. 按公式
CELoss等价于先x=log_softmax(pred),再y=one_hot(target),再利用公式avg_loss = -sum(x*y)/n.
# batch_size=4, 有4个数据,除以4,求所有数据的平均分数,
# softmax_log_pred (4,5); one_hot_target (4,5)
res=-torch.sum(softmax_log_pred*one_hot_target)/target.shape[0] # tensor(1.6094)
2. softmax + log + nll(one-hot)
CELoss等价于log_softmax + nll. 相比较前面,nll内置了热编码操作。
# 第二种方式,softmax + log + nll(one-hot)
# softmax_log_pred (4,5); target (4,)
F.nll_loss(softmax_log_pred, target.long()) # tensor(1.6094) # 负的log likelihood损失函数
3. 直接cross_entropy
# pred (4,5); target (4,)
import torch.nn.functional as F
F.cross_entropy(pred, target.long()) # tensor(1.6094)
4. 计算四维预测值损失
pred = torch.Tensor(4,3,256,256) # shape: [4, 3, 256, 256]
softmax_log_pred= F.log_softmax(pred, dim=1) # shape: [4, 3, 256, 256]
target = torch.randint(0, 3, (4, 256, 256))
target.shape
torch.Size([4, 256, 256])
# 热编码
one_hot_target = F.one_hot(target.long()) # [4, 256, 256, 3]
one_hot_target = torch.transpose(one_hot_target, 1, 3) # [4, 3, 256, 256]
# 1,公式
# 每个数据有3个分数,有4*256*256个数据,求平均分数。
# softmax_log_pred:[4, 3, 256, 256];one_hot_target:[4, 3, 256, 256]
res=-torch.sum(softmax_log_pred*one_hot_target)/
(target.shape[0]*target.shape[2].target.shape[3]) # tensor(1.0986)
# 2, log_softmax + nll
# softmax_log_pred: [4, 3, 256, 256], target: [4, 256, 256]
F.nll_loss(softmax_log_pred, target)
tensor(1.0965)
# 3,直接CELoss
# pred.shape: [4, 3, 256, 256]; target.shape: [4, 256, 256]
F.cross_entropy(pred, target.long())
tensor(1.0965)