ComputeLoss()函数在train.py中调用。
def __call__(self, p, targets): # predictions, targets, model
device = targets.device
# 初始化loss
lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros\
(1, device=device), torch.zeros(1, device=device)
# 建立targets目标,会在下面仔细讲解
tcls, tbox, indices, anchors = self.build_targets(p, targets) # targets
# tcls [[num]] 该gt所属类别
# tbox [[x_offset,y_offset,w,h]] 存放了gt框所对应的网格的box,注意此处的
# x和y是相对于网格的偏移量
# indices [[image, anchor, grid indices]] 存放了gt对应的gird的信息,
# 包括:image对应batchsize的哪张图片,anchor,对应哪个尺度的anchor,以及所在的网格
# anch [[num,2]]#anch(与gt匹配的先验框的宽高)。
这一段代码首先是初始化loss,其中调用的self.build_targets函数需要单独学习,其主要作用是找出与该gtbox最匹配的先验框(anchor)
lcls=lbox=lobj=tensor([0.], device='cuda:0')
for i, pi in enumerate(p): # layer index, layer predictions
b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
# 初始化target obj
tobj = torch.zeros_like(pi[..., 0], device=device) # target obj
# 计算有多少个target
n = b.shape[0] # number of targets
if n:#如果存在target的话
# 首先获取target所在的网格模型预测的pred 信息
ps = pi[b, a, gj, gi] # prediction subset corresponding to targets
i=1
pi=tensor([ [ [ [ [-7.09473e-01, ..., -4.93359e+00, -1.12402e+00, -7.99805e-01],
...,
[-1.49048e-01, ..., -5.14062e+00, 1.04492e-01, -6.35254e-01]],
...,
[ [ 2.03369e-01, ..., -5.81250e+00, -9.32129e-01, -1.99805e+00],
...,
[-3.16895e-01, ..., -5.98047e+00, -6.71875e-01, -1.01562e+00]]]]], device='cuda:0', dtype=torch.float16, grad_fn=<CopyBackwards>)
b=tensor([0, 0, 0, 0, 0, 0], device='cuda:0')
a=tensor([1, 2, 1, 2, 1, 2], device='cuda:0')
gj=tensor([19, 19, 19, 19, 18, 18], device='cuda:0')
gi=tensor([22, 22, 21, 21, 22, 22], device='cuda:0')
tobj=tensor([ [ [ [0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 0.]],
...,
[ [0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 0.]]]], device='cuda:0', dtype=torch.float16)
n=6
ps=tensor([ [ 0.36084, 0.59570, 1.12500, 0.74854, -5.87500, -1.75000, 0.08350],
[ 0.66553, 0.33521, 0.64502, -0.27612, -5.83984, -1.17676, -1.39844],
[ 0.45337, 0.48633, 0.90967, 1.37305, -6.00000, -0.81738, -0.42285],
[ 0.91406, 0.01762, 1.10742, -0.56934, -6.32422, -1.28125, -2.12500],
[ 1.43652, 1.31152, 1.15234, 0.68750, -5.87500, -1.68945, -0.14526],
[ 0.80420, -0.13550, 0.23792, -1.01660, -5.81250, -1.42676, -1.19727]], device='cuda:0', dtype=torch.float16, grad_fn=<IndexBackward>)
pxy = ps[:, :2].sigmoid() * 2. - 0.5
pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
pbox = torch.cat((pxy, pwh), 1) # predicted box
# 计算ciou
iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target)
# 计算box的ciouloss
lbox += (1.0 - iou).mean() # iou loss
为了更好的理解pxy = ps[:, :2].sigmoid() * 2. - 0.5,将其进行了拆分输出
ps[:, :2]=tensor([ [ 0.3608, 0.5957],
[ 0.6655, 0.3352],
[ 0.4534, 0.4863],
[ 0.9141, 0.0176],
[ 1.4365, 1.3115],
[ 0.8042, -0.1355]], device='cuda:0', dtype=torch.float16)
ps[:, :2].sigmoid()=tensor([ [0.5894, 0.6445],
[0.6606, 0.5830],
[0.6113, 0.6191],
[0.7139, 0.5044],
[0.8081, 0.7876],
[0.6909, 0.4661]], device='cuda:0', dtype=torch.float16)
pxy=tensor([ [0.67871, 0.78906],
[0.82129, 0.66602],
[0.72266, 0.73828],
[0.92773, 0.50879],
[1.11621, 1.07520],
[0.88184, 0.43213]], device='cuda:0', dtype=torch.float16, grad_fn=<SubBackward0>)
为了更好的理解pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i],也对其进行了拆分输出
build_targets()函数返回的anchors作用于这里
此时的anchors[1]=tensor([ [3.8750, 2.8125],
[3.6875, 7.4375],
[3.8750, 2.8125],
[3.6875, 7.4375],
[3.8750, 2.8125],
[3.6875, 7.4375]], device='cuda:0')
ps[:, 2:4]=tensor([ [ 1.1250, 0.7485],
[ 0.6450, -0.2761],
[ 0.9097, 1.3730],
[ 1.1074, -0.5693],
[ 1.1523, 0.6875],
[ 0.2379, -1.0166]], device='cuda:0', dtype=torch.float16)
ps[:, 2:4].sigmoid()=tensor([ [0.7549, 0.6787],
[0.6558, 0.4314],
[0.7129, 0.7979],
[0.7515, 0.3613],
[0.7598, 0.6655],
[0.5591, 0.2656]], device='cuda:0', dtype=torch.float16)
pwh=tensor([ [8.83265, 5.18230],
[6.34285, 5.53656],
[7.87730, 7.16138],
[8.32932, 3.88410],
[8.94728, 4.98292],
[4.61045, 2.09906]], device='cuda:0', grad_fn=<MulBackward0>)
pbox=tensor([ [0.67871, 0.78906, 8.83265, 5.18230],
[0.82129, 0.66602, 6.34285, 5.53656],
[0.72266, 0.73828, 7.87730, 7.16138],
[0.92773, 0.50879, 8.32932, 3.88410],
[1.11621, 1.07520, 8.94728, 4.98292],
[0.88184, 0.43213, 4.61045, 2.09906]], device='cuda:0', grad_fn=<CatBackward>)
此时的tbox[1]=tensor([ [ 0.2041, 0.0555, 13.5233, 3.6464],
[ 0.2041, 0.0555, 13.5233, 3.6464],
[ 1.2041, 0.0555, 13.5233, 3.6464],
[ 1.2041, 0.0555, 13.5233, 3.6464],
[ 0.2041, 1.0555, 13.5233, 3.6464],
[ 0.2041, 1.0555, 13.5233, 3.6464]], device='cuda:0')
iou=tensor([0.50696, 0.36387, 0.35847, 0.50845, 0.52728, 0.19179], device='cuda:0', grad_fn=<SubBackward0>)
此处计算的iou为:预测框与三个负责预测的网格(其宽高为gtbox的宽高)的CIoU
lbox=tensor([0.59053], device='cuda:0', grad_fn=<AddBackward0>)
# 获取target所对应的obj,网格中存在gt目标的会被标记为iou与gt的交并比
score_iou = iou.detach().clamp(0).type(tobj.dtype)
if self.sort_obj_iou:
sort_id = torch.argsort(score_iou)
b, a, gj, gi, score_iou = b[sort_id], a[sort_id], gj[sort_id], \
gi[sort_id], score_iou[sort_id]
tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * score_iou # iou ratio
tobj.dtype=torch.float16
iou.detach()=tensor([0.50696, 0.36387, 0.35847, 0.50845, 0.52728, 0.19179], device='cuda:0', dtype=torch.float16)
iou.detach().clamp(0)=tensor([0.50696, 0.36387, 0.35847, 0.50845, 0.52728, 0.19179], device='cuda:0', dtype=torch.float16)
score_iou=tensor([0.50696, 0.36387, 0.35847, 0.50845, 0.52728, 0.19179], device='cuda:0', dtype=torch.float16)
self.sort_obj_iou = False,不进入if判断语句
self.gr=1.0
tobj[b, a, gj, gi]=tensor([0.50696, 0.36387, 0.35847, 0.50845, 0.52728, 0.19179], device='cuda:0', dtype=torch.float16)
if self.nc > 1: # cls loss (only if multiple classes)
# target所在的gird对应的cls的one hot格式
t = torch.full_like(ps[:, 5:], self.cn, device=device) # targets
t[range(n), tcls[i]] = self.cp
# 计算loss
lcls += self.BCEcls(ps[:, 5:], t) # BCE
ps[:, 5:]=tensor([ [-1.7500, 0.0835],
[-1.1768, -1.3984],
[-0.8174, -0.4229],
[-1.2812, -2.1250],
[-1.6895, -0.1453],
[-1.4268, -1.1973]], device='cuda:0', dtype=torch.float16)
self.cn=0.0
t=tensor([ [1., 0.],
[1., 0.],
[1., 0.],
[1., 0.],
[1., 0.],
[1., 0.]], device='cuda:0', dtype=torch.float16)
self.cp=1.0
lcls=tensor([1.00219], device='cuda:0', grad_fn=<AddBackward0>)
obji = self.BCEobj(pi[..., 4], tobj)
lobj += obji * self.balance[i] # obj loss
if self.autobalance:
self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()
obji=tensor(0.00761, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
self.balance[1]=1.0
lobj=tensor([0.01367], device='cuda:0', grad_fn=<AddBackward0>)
self.autobalance=False
i=2时的for循环同理,这里不再给全部出数值,其中
iou=tensor([0.46995, 0.07967, 0.39397, 0.83563, 0.33876, 0.14102, 0.41098, 0.65456, 0.16342, 0.29438, 0.47743, 0.64110], device='cuda:0', grad_fn=<SubBackward0>)
lbox=tensor([1.18213], device='cuda:0', grad_fn=<AddBackward0>)
score_iou=tensor([0.46997, 0.07965, 0.39404, 0.83545, 0.33887, 0.14099, 0.41089, 0.65479, 0.16345, 0.29443, 0.47754, 0.64111], device='cuda:0', dtype=torch.float16)
lcls=tensor([1.76345], device='cuda:0', grad_fn=<AddBackward0>)
obji=tensor(0.03073, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
lobj=tensor([0.02596], device='cuda:0', grad_fn=<AddBackward0>)
if self.autobalance:
self.balance = [x / self.balance[self.ssi] for x in self.balance]
# 提高loss各自的权重,可以在配置文件中设置
lbox *= self.hyp['box']
lobj *= self.hyp['obj']
lcls *= self.hyp['cls']
bs = tobj.shape[0] # batch size
loss各自的权重存在于配置文件data\hyps\hyp.scratch.yaml中
self.hyp['box']=0.05
self.hyp['obj']=1.0
self.hyp['cls']=0.5
lbox=tensor([0.05911], device='cuda:0', grad_fn=<MulBackward0>)
lobj=tensor([0.02596], device='cuda:0', grad_fn=<MulBackward0>)
lcls=tensor([0.02204], device='cuda:0', grad_fn=<MulBackward0>)
bs=2
return (lbox + lobj + lcls) * bs, torch.cat((lbox, lobj, lcls)).detach()
(lbox + lobj + lcls) * bs=tensor([0.2142], device='cuda:0')
torch.cat((lbox, lobj, lcls)).detach()=tensor([0.0591, 0.0260, 0.0220], device='cuda:0')
最后将这两个值返回到train.py中调用ComputeLoss()的地方