yolov5 build_targets()

最新推荐文章于 2023-12-29 17:05:13 发布

打团小能手

最新推荐文章于 2023-12-29 17:05:13 发布

阅读量1.5k

点赞数 2

文章标签： pytorch 深度学习 python

原文链接：https://blog.csdn.net/qq_42362891/article/details/114489112?utm_medium=distribute.pc_relevant.none-task-blog-2~default~baidujs_title~default-1.no_search_link&spm=1001.2101.3001.4242

版权

#这里na为锚框种类数 nt为目标数 这里的na为3，nt也为3
na, nt = self.na, targets.shape[0]  # number of anchors, targets
#类别 边界盒 索引 锚框
tcls, tbox, indices, anch = [], [], [], []
#利用gain来计算目标在某一个特征图上的位置信息，初始化为1
gain = torch.ones(7, device=targets.device)  # normalized to gridspace gain
# ai.shape = (na, nt)，锚框的索引，三个目标，三种锚框，所以共9个元素
ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt)  # same as .repeat_interleave(nt)
Out[3]: 
tensor([[0., 0., 0.],
        [1., 1., 1.],
        [2., 2., 2.]], device='cuda:0')
# targets.shape = (na, nt, 7)（3，3，7）给每个目标加上锚框索引
#targets[i,c,x,y,w,h,锚框索引]
targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2)  
Out[4]: 
tensor([[[0.00000, 0.00000, 0.58192, 0.16796, 0.26108, 0.08724, 0.00000],
         [1.00000, 0.00000, 0.54517, 0.33744, 0.06395, 0.02632, 0.00000],
         [1.00000, 0.00000, 0.96964, 0.42483, 0.06071, 0.05264, 0.00000]],
        [[0.00000, 0.00000, 0.58192, 0.16796, 0.26108, 0.08724, 1.00000],
         [1.00000, 0.00000, 0.54517, 0.33744, 0.06395, 0.02632, 1.00000],
         [1.00000, 0.00000, 0.96964, 0.42483, 0.06071, 0.05264, 1.00000]],
        [[0.00000, 0.00000, 0.58192, 0.16796, 0.26108, 0.08724, 2.00000],
         [1.00000, 0.00000, 0.54517, 0.33744, 0.06395, 0.02632, 2.00000],
         [1.00000, 0.00000, 0.96964, 0.42483, 0.06071, 0.05264, 2.00000]]], device='cuda:0')
g = 0.5  # bias
#off偏移量（不知道这么称合适吗）
off = torch.tensor([[0, 0],
                            [1, 0], [0, 1], [-1, 0], [0, -1],  # j,k,l,m
                            # [1, 1], [1, -1], [-1, 1], [-1, -1],  # jk,jm,lk,lm
                            ], device=targets.device).float() * g  # offsets
#off的形状如下，为什么是这个形状下文解释
Out[5]: 
tensor([[ 0.00000,  0.00000],
        [ 0.50000,  0.00000],
        [ 0.00000,  0.50000],
        [-0.50000,  0.00000],
        [ 0.00000, -0.50000]], device='cuda:0')
for i in range(self.nl):
    
"""
p[i].shape = (b, 3, h, w，nc+5)
gain = [1, 1, w, h, w, h, 1]                   
"""#获取当前的锚框尺寸
    anchors = self.anchors[i]
Out[7]: 
tensor([[1.25000, 1.62500],
        [2.00000, 3.75000],
        [4.12500, 2.87500]], device='cuda:0')
    gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]]  # xyxy gain
Out[8]: tensor([ 1.,  1., 64., 64., 64., 64.,  1.], device='cuda:0')
    # 将xywh映射到当前特征图，即乘以对应的特征图尺寸
    # Match targets to anchors
    t = targets * gain #
Out[9]: 
tensor([[[ 0.00000,  0.00000, 37.24281, 10.74930, 16.70916,  5.58366,  0.00000],
         [ 1.00000,  0.00000, 34.89063, 21.59622,  4.09269,  1.68436,  0.00000],
         [ 1.00000,  0.00000, 62.05726, 27.18916,  3.88548,  3.36872,  0.00000]],
        [[ 0.00000,  0.00000, 37.24281, 10.74930, 16.70916,  5.58366,  1.00000],
         [ 1.00000,  0.00000, 34.89063, 21.59622,  4.09269,  1.68436,  1.00000],
         [ 1.00000,  0.00000, 62.05726, 27.18916,  3.88548,  3.36872,  1.00000]],
        [[ 0.00000,  0.00000, 37.24281, 10.74930, 16.70916,  5.58366,  2.00000],
         [ 1.00000,  0.00000, 34.89063, 21.59622,  4.09269,  1.68436,  2.00000],
         [ 1.00000,  0.00000, 62.05726, 27.18916,  3.88548,  3.36872,  2.00000]]], device='cuda:0')#t(3,3,7)
    if nt:
        #r为目标wh和锚框wh的比值，比值在0.25到4即采用该种锚框预测目标
        r = t[:, :, 4:6] / anchors[:, None]
Out[10]: 
tensor([[[13.36733,  3.43610],
         [ 3.27415,  1.03653],
         [ 3.10838,  2.07306]],
        [[ 8.35458,  1.48897],
         [ 2.04635,  0.44916],
         [ 1.94274,  0.89833]],
        [[ 4.05071,  1.94214],
         [ 0.99217,  0.58587],
         [ 0.94193,  1.17173]]], device='cuda:0')
        #将比值和预先设置的比例anchor_t对比，符合条件为True，反之False
        j = torch.max(r, 1. / r).max(2)[0] < self.hyp['anchor_t'] 
        #首先选出宽比和高比最大的那一个，若最大小于阈值，则另一个比一定小于阈值。
Out[12]: 
tensor([[False,  True,  True],
        [False,  True,  True],
        [False,  True,  True]], device='cuda:0')
         #根据j筛选符合条件的情况
         t = t[j]
Out[14]: 
tensor([[ 1.00000,  0.00000, 34.89063, 21.59622,  4.09269,  1.68436,  0.00000],
        [ 1.00000,  0.00000, 62.05726, 27.18916,  3.88548,  3.36872,  0.00000],
        [ 1.00000,  0.00000, 34.89063, 21.59622,  4.09269,  1.68436,  1.00000],
        [ 1.00000,  0.00000, 62.05726, 27.18916,  3.88548,  3.36872,  1.00000],
        [ 1.00000,  0.00000, 34.89063, 21.59622,  4.09269,  1.68436,  2.00000],
        [ 1.00000,  0.00000, 62.05726, 27.18916,  3.88548,  3.36872,  2.00000]], device='cuda:0') #t（6，7）筛选掉了三种条件下的目标
        #得到相对于左上角的目标
        gxy = t[:, 2:4]  # grid xy
        #得到相对于右上角的目标
        gxi = gain[[2, 3]] - gxy 
        #这里是重点，也是比较难理解的部分，jk是判断gxy更偏向哪里，左？上？
        j, k = ((gxy % 1. < g) & (gxy > 1.)).T
        #jk是判断gxi更偏向哪里，下？右？
        l, m = ((gxi % 1. < g) & (gxi > 1.)).T
        j = torch.stack((torch.ones_like(j), j, k, l, m))
        #yolov5不仅用目标中心点所在的网格预测该目标，还采用了距目标中心点的最近两个网格
        #所以有五种情况，网格本身，上下左右，这就是repeat函数第一个参数为5的原因
        #用图来表示下吧
        #对t复制5份，即本身点外加上下左右四个候选区共五个区域，选出三份，具体选出哪三份？由torch.stack后的j决定，第一项是torch.ones_like,即全1矩阵，说明本身是必选中状态的。剩下的4项中，由于是inverse操作，所以j和l，k和m是两两互斥的。这样就确保了只选出三项，但是到现在为止，还并没有产生偏移。offset是对off中选出与t相对应位置操作。

        t = t.repeat((5, 1, 1))[j]

在这里插入图片描述
原文

打团小能手

关注

2
点赞
踩
11

收藏

觉得还不错? 一键收藏
0
评论
yolov5 build_targets()

#这里na为锚框种类数 nt为目标数这里的na为3，nt也为3na, nt = self.na, targets.shape[0] # number of anchors, targets#类别边界盒索引锚框tcls, tbox, indices, anch = [], [], [], []#利用gain来计算目标在某一个特征图上的位置信息，初始化为1gain = torch.ones(7, device=targets.device) # normalized to gridspac
复制链接

扫一扫