def compute_loss(p, targets, model):
# predictions=[tensor[batch, 3(每个位置3个预测值), 80, 80, 类别+5],[...],[...]]
# targets,传入的是1batch 图的所有框结构tensor([[图片索引,类别,中心x,中心y,宽,高])
# model,
# model,
device = targets.device #targets.device “cpu” 或者“cuda”
#print("预测结果:",p)
lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)
tcls, tbox, indices, anchors = build_targets(p, targets, model) # targets
#build_targets收集了该批训练图片中的信息
tcls, tbox, indices, anchors结构都为[tensor1, tensor2, tensor3]
其中tensor1对应模型第一部分输出, tensor2对应第二部分……
#tcls 真实目标类别0,1,2,3...., tbox 真实目标框(偏移x偏移x可能是每个格子大小作为1个单位,偏移y,比值宽,比值高), indices 图片索引, anchors每个真实框对应的锚框(每个预测结果都有3个锚框可供选择)
#indices =[ind,,] ind=[[image索引], [anchor索引], [gridy 第几个格子第几行或列],[gridx 第几个格子第几列或行]] #indices完全对应预测模型每个数据位置
#
h = model.hyp # hyperparameters 默认是data/hyp.scratch.yaml超参数设置
# Define criteria h['cls_pw']类别权重 h['obj_pw'] 目标权重 可能是概率权重
BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([h['cls_pw']])).to(device)
BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([h['obj_pw']])).to(device)
# Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
cp, cn = smooth_BCE(eps=0.0)
# Focal loss #默认未启用
g = h['fl_gamma'] # focal loss gamma
if g > 0:
BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
# Losses
nt = 0 # number of targets
np = len(p) # number of outputs
balance = [4.0, 1.0, 0.4] if np == 3 else [4.0, 1.0, 0.4, 0.1] # P3-5 or P3-6
for i, pi in enumerate(p): # layer index, layer predictions
p是这3个预测矩阵
b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
#indices[i]这里对应第i个输出方阵(总共3个)的所有训练数据
# indices的结构为[[该批图片索引,对应的anchor(0,1,2),对应的格子行,对应的格子列],[…],[…]]
tobj = torch.zeros_like(pi[..., 0], device=device) # target obj
n = b.shape[0] # number of targets
if n:
nt += n # cumulative targets
ps = pi[b, a, gj, gi] # prediction subset corresponding to targets
# Regression
pxy = ps[:, :2].sigmoid() * 2. - 0.5 #预测偏移 用于计算giou ????
pxy的范围改为图中蓝色的区域
pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i] ##预测的宽高pwh先sigmodi把范围缩小到【0,1】*2然后平方,范围变为(0,4)意思是最大只能是anchor的4倍?可以无限缩小。没有彻底理解
pbox = torch.cat((pxy, pwh), 1).to(device) # predicted box
giou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # giou(prediction, target)
#计算giou giou=1完全匹配,giou越大匹配度越高 (-1,+1]
lbox += (1.0 - giou).mean() # giou loss 框的损失 loss box
# Objectness
# Objectness
# tobj.shape = (bs, 3, w, h), if model.gr=1, then tobj is the iou with shape(bs, 3, w, h)
# tobj是shape=(bs, 3, w, h)的tensor,
# 正样本处保存预测框和gt框的iou,负样本处仍然是0,用作obj损失的真值。
tobj[b, a, gj, gi] = (1.0 - model.gr) + model.gr * giou.detach().clamp(0).type(tobj.dtype) # giou ratio
# Classification
if model.nc > 1: # cls loss (only if multiple classes)
t = torch.full_like(ps[:, 5:], cn, device=device) # targets
t[range(n), tcls[i]] = cp
lcls += BCEcls(ps[:, 5:], t) # BCE
# Append targets to text file
# with open('targets.txt', 'a') as file:
# [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
lobj += BCEobj(pi[..., 4], tobj) * balance[i] # obj loss,balance[i]是3个输出权重
s = 3 / np # output count scaling,np就是输出数为3
lbox *= h['giou'] * s
lobj *= h['obj'] * s * (1.4 if np == 4 else 1.)
lcls *= h['cls'] * s
bs = tobj.shape[0] # batch size
loss = lbox + lobj + lcls
return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach()