YOLOV5源代码学习之ComputeLoss中的__call__函数

ComputeLoss()函数在train.py中调用。

    def __call__(self, p, targets):  # predictions, targets, model
        device = targets.device
        # 初始化loss
        lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros\
            (1, device=device), torch.zeros(1, device=device)
        # 建立targets目标,会在下面仔细讲解
        tcls, tbox, indices, anchors = self.build_targets(p, targets)  # targets
        # tcls [[num]] 该gt所属类别
        # tbox [[x_offset,y_offset,w,h]] 存放了gt框所对应的网格的box,注意此处的
        # x和y是相对于网格的偏移量
        # indices [[image, anchor, grid indices]] 存放了gt对应的gird的信息,
        # 包括:image对应batchsize的哪张图片,anchor,对应哪个尺度的anchor,以及所在的网格
        # anch [[num,2]]#anch(与gt匹配的先验框的宽高)。

这一段代码首先是初始化loss,其中调用的self.build_targets函数需要单独学习,其主要作用是找出与该gtbox最匹配的先验框(anchor)

lcls=lbox=lobj=tensor([0.], device='cuda:0')

for i, pi in enumerate(p):  # layer index, layer predictions
    b, a, gj, gi = indices[i]  # image, anchor, gridy, gridx
    # 初始化target obj
    tobj = torch.zeros_like(pi[..., 0], device=device)  # target obj
    # 计算有多少个target
    n = b.shape[0]  # number of targets
    if n:#如果存在target的话
        # 首先获取target所在的网格模型预测的pred 信息
        ps = pi[b, a, gj, gi]  # prediction subset corresponding to targets

i=1

pi=tensor([  [  [  [  [-7.09473e-01,   ..., -4.93359e+00, -1.12402e+00, -7.99805e-01],
                            ...,
                            [-1.49048e-01,    ..., -5.14062e+00,  1.04492e-01, -6.35254e-01]],
                         ...,
                         [  [ 2.03369e-01,    ..., -5.81250e+00, -9.32129e-01, -1.99805e+00],
                            ...,
                            [-3.16895e-01,    ..., -5.98047e+00, -6.71875e-01, -1.01562e+00]]]]], device='cuda:0', dtype=torch.float16, grad_fn=<CopyBackwards>)

b=tensor([0, 0, 0, 0, 0, 0], device='cuda:0')

a=tensor([1, 2, 1, 2, 1, 2], device='cuda:0')

gj=tensor([19, 19, 19, 19, 18, 18], device='cuda:0')

gi=tensor([22, 22, 21, 21, 22, 22], device='cuda:0')

tobj=tensor([  [  [  [0., 0., 0.,  ..., 0., 0., 0.],
                            ...,
                            [0., 0., 0.,  ..., 0., 0., 0.]], 
                         ...,

                         [  [0., 0., 0.,  ..., 0., 0., 0.],
                            ...,
                            [0., 0., 0.,  ..., 0., 0., 0.]]]], device='cuda:0', dtype=torch.float16)

n=6

ps=tensor([ [ 0.36084,  0.59570,  1.12500,  0.74854, -5.87500, -1.75000,  0.08350],
                   [ 0.66553,  0.33521,  0.64502, -0.27612, -5.83984, -1.17676, -1.39844],
                   [ 0.45337,  0.48633,  0.90967,  1.37305, -6.00000, -0.81738, -0.42285],
                   [ 0.91406,  0.01762,  1.10742, -0.56934, -6.32422, -1.28125, -2.12500],
                   [ 1.43652,  1.31152,  1.15234,  0.68750, -5.87500, -1.68945, -0.14526],
                   [ 0.80420, -0.13550,  0.23792, -1.01660, -5.81250, -1.42676, -1.19727]], device='cuda:0', dtype=torch.float16, grad_fn=<IndexBackward>)

pxy = ps[:, :2].sigmoid() * 2. - 0.5
pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
pbox = torch.cat((pxy, pwh), 1)  # predicted box
# 计算ciou
iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True)  # iou(prediction, target)
# 计算box的ciouloss
lbox += (1.0 - iou).mean()  # iou loss

为了更好的理解pxy = ps[:, :2].sigmoid() * 2. - 0.5,将其进行了拆分输出

ps[:, :2]=tensor([  [ 0.3608,  0.5957],
                            [ 0.6655,  0.3352],
                            [ 0.4534,  0.4863],
                            [ 0.9141,  0.0176],
                            [ 1.4365,  1.3115],
                            [ 0.8042, -0.1355]], device='cuda:0', dtype=torch.float16)

ps[:, :2].sigmoid()=tensor([  [0.5894, 0.6445],
                                            [0.6606, 0.5830],
                                            [0.6113, 0.6191],
                                            [0.7139, 0.5044],
                                            [0.8081, 0.7876],
                                            [0.6909, 0.4661]], device='cuda:0', dtype=torch.float16)

pxy=tensor([  [0.67871, 0.78906],
                      [0.82129, 0.66602],
                      [0.72266, 0.73828],
                      [0.92773, 0.50879],
                      [1.11621, 1.07520],
                      [0.88184, 0.43213]], device='cuda:0', dtype=torch.float16, grad_fn=<SubBackward0>)

为了更好的理解pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i],也对其进行了拆分输出

build_targets()函数返回的anchors作用于这里

此时的anchors[1]=tensor([  [3.8750, 2.8125],
                                            [3.6875, 7.4375],
                                            [3.8750, 2.8125],
                                            [3.6875, 7.4375],
                                            [3.8750, 2.8125],
                                            [3.6875, 7.4375]], device='cuda:0')

ps[:, 2:4]=tensor([  [ 1.1250,  0.7485],
                              [ 0.6450, -0.2761],
                              [ 0.9097,  1.3730],
                              [ 1.1074, -0.5693],
                              [ 1.1523,  0.6875],
                              [ 0.2379, -1.0166]], device='cuda:0', dtype=torch.float16)

ps[:, 2:4].sigmoid()=tensor([ [0.7549, 0.6787],
                                             [0.6558, 0.4314],
                                             [0.7129, 0.7979],
                                             [0.7515, 0.3613],
                                             [0.7598, 0.6655],
                                             [0.5591, 0.2656]], device='cuda:0', dtype=torch.float16)

pwh=tensor([  [8.83265, 5.18230],
                       [6.34285, 5.53656],
                       [7.87730, 7.16138],
                       [8.32932, 3.88410],
                       [8.94728, 4.98292],
                       [4.61045, 2.09906]], device='cuda:0', grad_fn=<MulBackward0>)

pbox=tensor([  [0.67871, 0.78906, 8.83265, 5.18230],
                        [0.82129, 0.66602, 6.34285, 5.53656],
                        [0.72266, 0.73828, 7.87730, 7.16138],
                        [0.92773, 0.50879, 8.32932, 3.88410],
                        [1.11621, 1.07520, 8.94728, 4.98292],
                        [0.88184, 0.43213, 4.61045, 2.09906]], device='cuda:0', grad_fn=<CatBackward>)

此时的tbox[1]=tensor([  [ 0.2041,  0.0555, 13.5233,  3.6464],
                                      [ 0.2041,  0.0555, 13.5233,  3.6464],
                                      [ 1.2041,  0.0555, 13.5233,  3.6464],
                                      [ 1.2041,  0.0555, 13.5233,  3.6464],
                                      [ 0.2041,  1.0555, 13.5233,  3.6464],
                                      [ 0.2041,  1.0555, 13.5233,  3.6464]], device='cuda:0')

iou=tensor([0.50696, 0.36387, 0.35847, 0.50845, 0.52728, 0.19179], device='cuda:0', grad_fn=<SubBackward0>)

此处计算的iou为:预测框与三个负责预测的网格(其宽高为gtbox的宽高)的CIoU

lbox=tensor([0.59053], device='cuda:0', grad_fn=<AddBackward0>)

# 获取target所对应的obj,网格中存在gt目标的会被标记为iou与gt的交并比
score_iou = iou.detach().clamp(0).type(tobj.dtype)
if self.sort_obj_iou:
    sort_id = torch.argsort(score_iou)
    b, a, gj, gi, score_iou = b[sort_id], a[sort_id], gj[sort_id], \
                              gi[sort_id], score_iou[sort_id]
tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * score_iou  # iou ratio

tobj.dtype=torch.float16

iou.detach()=tensor([0.50696, 0.36387, 0.35847, 0.50845, 0.52728, 0.19179], device='cuda:0', dtype=torch.float16)

iou.detach().clamp(0)=tensor([0.50696, 0.36387, 0.35847, 0.50845, 0.52728, 0.19179], device='cuda:0', dtype=torch.float16)

score_iou=tensor([0.50696, 0.36387, 0.35847, 0.50845, 0.52728, 0.19179], device='cuda:0', dtype=torch.float16)

self.sort_obj_iou = False,不进入if判断语句

self.gr=1.0

tobj[b, a, gj, gi]=tensor([0.50696, 0.36387, 0.35847, 0.50845, 0.52728, 0.19179], device='cuda:0', dtype=torch.float16)

if self.nc > 1:  # cls loss (only if multiple classes)
    # target所在的gird对应的cls的one hot格式
    t = torch.full_like(ps[:, 5:], self.cn, device=device)  # targets
    t[range(n), tcls[i]] = self.cp
    # 计算loss
    lcls += self.BCEcls(ps[:, 5:], t)  # BCE

ps[:, 5:]=tensor([  [-1.7500,  0.0835],
                            [-1.1768, -1.3984],
                            [-0.8174, -0.4229],
                            [-1.2812, -2.1250],
                            [-1.6895, -0.1453],
                            [-1.4268, -1.1973]], device='cuda:0', dtype=torch.float16)

self.cn=0.0

t=tensor([  [1., 0.],
                 [1., 0.],
                 [1., 0.],
                 [1., 0.],
                 [1., 0.],
                 [1., 0.]], device='cuda:0', dtype=torch.float16)

self.cp=1.0

lcls=tensor([1.00219], device='cuda:0', grad_fn=<AddBackward0>)

obji = self.BCEobj(pi[..., 4], tobj)
lobj += obji * self.balance[i]  # obj loss
if self.autobalance:
    self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()

obji=tensor(0.00761, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)

self.balance[1]=1.0

lobj=tensor([0.01367], device='cuda:0', grad_fn=<AddBackward0>)

self.autobalance=False


i=2时的for循环同理,这里不再给全部出数值,其中

iou=tensor([0.46995, 0.07967, 0.39397, 0.83563, 0.33876, 0.14102, 0.41098, 0.65456, 0.16342, 0.29438, 0.47743, 0.64110], device='cuda:0', grad_fn=<SubBackward0>)

lbox=tensor([1.18213], device='cuda:0', grad_fn=<AddBackward0>)

score_iou=tensor([0.46997, 0.07965, 0.39404, 0.83545, 0.33887, 0.14099, 0.41089, 0.65479, 0.16345, 0.29443, 0.47754, 0.64111], device='cuda:0', dtype=torch.float16)

lcls=tensor([1.76345], device='cuda:0', grad_fn=<AddBackward0>)

obji=tensor(0.03073, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)

lobj=tensor([0.02596], device='cuda:0', grad_fn=<AddBackward0>)


if self.autobalance:
    self.balance = [x / self.balance[self.ssi] for x in self.balance]
# 提高loss各自的权重,可以在配置文件中设置
lbox *= self.hyp['box']
lobj *= self.hyp['obj']
lcls *= self.hyp['cls']
bs = tobj.shape[0]  # batch size

loss各自的权重存在于配置文件data\hyps\hyp.scratch.yaml中

self.hyp['box']=0.05

self.hyp['obj']=1.0

self.hyp['cls']=0.5

lbox=tensor([0.05911], device='cuda:0', grad_fn=<MulBackward0>)

lobj=tensor([0.02596], device='cuda:0', grad_fn=<MulBackward0>)

lcls=tensor([0.02204], device='cuda:0', grad_fn=<MulBackward0>)

bs=2

return (lbox + lobj + lcls) * bs, torch.cat((lbox, lobj, lcls)).detach()

(lbox + lobj + lcls) * bs=tensor([0.2142], device='cuda:0')

torch.cat((lbox, lobj, lcls)).detach()=tensor([0.0591, 0.0260, 0.0220], device='cuda:0')

最后将这两个值返回到train.py中调用ComputeLoss()的地方

  • 5
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值