#这里na为锚框种类数 nt为目标数 这里的na为3,nt也为3
na, nt = self.na, targets.shape[0] # number of anchors, targets
#类别 边界盒 索引 锚框
tcls, tbox, indices, anch = [], [], [], []
#利用gain来计算目标在某一个特征图上的位置信息,初始化为1
gain = torch.ones(7, device=targets.device) # normalized to gridspace gain
# ai.shape = (na, nt),锚框的索引,三个目标,三种锚框,所以共9个元素
ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt)
Out[3]:
tensor([[0., 0., 0.],
[1., 1., 1.],
[2., 2., 2.]], device='cuda:0')
# targets.shape = (na, nt, 7)(3,3,7)给每个目标加上锚框索引
#targets[i,c,x,y,w,h,锚框索引]
targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2)
Out[4]:
tensor([[[0.00000, 0.00000, 0.58192, 0.16796, 0.26108, 0.08724, 0.00000],
[1.00000, 0.00000, 0.54517, 0.33744, 0.06395, 0.02632, 0.00000],
[1.00000, 0.00000, 0.96964, 0.42483, 0.06071, 0.05264, 0.00000]],
[[0.00000, 0.00000, 0.58192, 0.16796, 0.26108, 0.08724, 1.00000],
[1.00000, 0.00000, 0.54517, 0.33744, 0.06395, 0.02632, 1.00000],
[1.00000, 0.00000, 0.96964, 0.42483, 0.06071, 0.05264, 1.00000]],
[[0.00000, 0.00000, 0.58192, 0.16796, 0.26108, 0.08724, 2.00000],
[1.00000, 0.00000, 0.54517, 0.33744, 0.06395, 0.02632, 2.00000],
[1.00000, 0.00000, 0.96964, 0.42483, 0.06071, 0.05264, 2.00000]]], device='cuda:0')
g = 0.5 # bias
#off偏移量(不知道这么称合适吗)
off = torch.tensor([[0, 0],
[1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m
# [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm
], device=targets.device).float() * g # offsets
#off的形状如下,为什么是这个形状下文解释
Out[5]:
tensor([[ 0.00000, 0.00000],
[ 0.50000, 0.00000],
[ 0.00000, 0.50000],
[-0.50000, 0.00000],
[ 0.00000, -0.50000]], device='cuda:0')
for i in range(self.nl):
"""
p[i].shape = (b, 3, h, w,nc+5)
gain = [1, 1, w, h, w, h, 1]
"""#获取当前的锚框尺寸
anchors = self.anchors[i]
Out[7]:
tensor([[1.25000, 1.62500],
[2.00000, 3.75000],
[4.12500, 2.87500]], device='cuda:0')
gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain
Out[8]: tensor([ 1., 1., 64., 64., 64., 64., 1.], device='cuda:0')
# 将xywh映射到当前特征图,即乘以对应的特征图尺寸
# Match targets to anchors
t = targets * gain #
Out[9]:
tensor([[[ 0.00000, 0.00000, 37.24281, 10.74930, 16.70916, 5.58366, 0.00000],
[ 1.00000, 0.00000, 34.89063, 21.59622, 4.09269, 1.68436, 0.00000],
[ 1.00000, 0.00000, 62.05726, 27.18916, 3.88548, 3.36872, 0.00000]],
[[ 0.00000, 0.00000, 37.24281, 10.74930, 16.70916, 5.58366, 1.00000],
[ 1.00000, 0.00000, 34.89063, 21.59622, 4.09269, 1.68436, 1.00000],
[ 1.00000, 0.00000, 62.05726, 27.18916, 3.88548, 3.36872, 1.00000]],
[[ 0.00000, 0.00000, 37.24281, 10.74930, 16.70916, 5.58366, 2.00000],
[ 1.00000, 0.00000, 34.89063, 21.59622, 4.09269, 1.68436, 2.00000],
[ 1.00000, 0.00000, 62.05726, 27.18916, 3.88548, 3.36872, 2.00000]]], device='cuda:0')#t(3,3,7)
if nt:
#r为目标wh和锚框wh的比值,比值在0.25到4即采用该种锚框预测目标
r = t[:, :, 4:6] / anchors[:, None]
Out[10]:
tensor([[[13.36733, 3.43610],
[ 3.27415, 1.03653],
[ 3.10838, 2.07306]],
[[ 8.35458, 1.48897],
[ 2.04635, 0.44916],
[ 1.94274, 0.89833]],
[[ 4.05071, 1.94214],
[ 0.99217, 0.58587],
[ 0.94193, 1.17173]]], device='cuda:0')
#将比值和预先设置的比例anchor_t对比,符合条件为True,反之False
j = torch.max(r, 1. / r).max(2)[0] < self.hyp['anchor_t']
#首先选出宽比和高比最大的那一个,若最大小于阈值,则另一个比一定小于阈值。
Out[12]:
tensor([[False, True, True],
[False, True, True],
[False, True, True]], device='cuda:0')
#根据j筛选符合条件的情况
t = t[j]
Out[14]:
tensor([[ 1.00000, 0.00000, 34.89063, 21.59622, 4.09269, 1.68436, 0.00000],
[ 1.00000, 0.00000, 62.05726, 27.18916, 3.88548, 3.36872, 0.00000],
[ 1.00000, 0.00000, 34.89063, 21.59622, 4.09269, 1.68436, 1.00000],
[ 1.00000, 0.00000, 62.05726, 27.18916, 3.88548, 3.36872, 1.00000],
[ 1.00000, 0.00000, 34.89063, 21.59622, 4.09269, 1.68436, 2.00000],
[ 1.00000, 0.00000, 62.05726, 27.18916, 3.88548, 3.36872, 2.00000]], device='cuda:0') #t(6,7)筛选掉了三种条件下的目标
#得到相对于左上角的目标
gxy = t[:, 2:4] # grid xy
#得到相对于右上角的目标
gxi = gain[[2, 3]] - gxy
#这里是重点,也是比较难理解的部分,jk是判断gxy更偏向哪里,左?上?
j, k = ((gxy % 1. < g) & (gxy > 1.)).T
#jk是判断gxi更偏向哪里,下?右?
l, m = ((gxi % 1. < g) & (gxi > 1.)).T
j = torch.stack((torch.ones_like(j), j, k, l, m))
#yolov5不仅用目标中心点所在的网格预测该目标,还采用了距目标中心点的最近两个网格
#所以有五种情况,网格本身,上下左右,这就是repeat函数第一个参数为5的原因
#用图来表示下吧
#对t复制5份,即本身点外加上下左右四个候选区共五个区域,选出三份,具体选出哪三份?由torch.stack后的j决定,第一项是torch.ones_like,即全1矩阵,说明本身是必选中状态的。剩下的4项中,由于是inverse操作,所以j和l,k和m是两两互斥的。这样就确保了只选出三项,但是到现在为止,还并没有产生偏移。offset是对off中选出与t相对应位置操作。
t = t.repeat((5, 1, 1))[j]
yolov5 build_targets()
最新推荐文章于 2023-12-29 17:05:13 发布