AlphaPose源代码学习之DetectionLoader中的update()函数

最新推荐文章于 2024-08-06 11:12:22 发布

想成为书呆子

最新推荐文章于 2024-08-06 11:12:22 发布

阅读量2.9k

点赞数 1

本文链接：https://blog.csdn.net/Ji_HON/article/details/121677846

版权

关键词由CSDN通过智能技术生成

该函数调用YOLO检测模型检测出人员目标

def update(self):
    for i in range(self.num_batches):
        img, orig_img, im_name, im_dim_list = self.dataloder.getitem()
        if img is None:
            self.Q.put((None, None, None, None, None, None, None))
            return

进入函数后，依次输出调试中各变量的值

self.num_batches=403
img.shape=torch.Size([1, 3, 608, 608])

orig_img为一个list，len(orig_img)=1，list中的元素为一幅图像，查看该图像的维度得到orig_img[0].shape=(1280, 720, 3)

im_name=['0.jpg']
im_dim_list=tensor([ [ 720., 1280., 720., 1280.] ])

不进入if语句

    with torch.no_grad():
        # Human Detection
        img = img.cuda()
        prediction = self.det_model(img, CUDA=True)
        # NMS process
        dets = dynamic_write_results(prediction, opt.confidence, opt.num_classes,
                                             nms=True, nms_conf=opt.nms_thesh)
        if isinstance(dets, int) or dets.shape[0] == 0:
            for k in range(len(orig_img)):
                if self.Q.full():
                    time.sleep(2)
                self.Q.put((orig_img[k], im_name[k], None, None, None, None, None))
            continue

使用YOLO检测模型检测图片中的人员

该det_model的定义位于DetectionLoader中的__init__初始化函数中

模型的预测输出prediction shape= torch.Size([1, 22743, 85])

这里调用dynamic_write_results()函数

opt.confidence=0.05
opt.num_classes=80
nms_conf=opt.nms_thesh=0.6

调用函数后dets=tensor([ [ 0.0000, 308.6383, 108.1259, 389.7433, 282.7530, 0.8985, 0.9994,
0.0000],
[ 0.0000, 415.9088, 119.1262, 473.4511, 285.9348, 0.8481, 0.9996,
0.0000]], device='cuda:0')

不进入if判断语句

dets = dets.cpu()
im_dim_list = torch.index_select(im_dim_list,0, dets[:, 0].long())
scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1)

dets=tensor([ [ 0.0000, 308.6383, 108.1259, 389.7433, 282.7530, 0.8985, 0.9994, 0.0000],
[ 0.0000, 415.9088, 119.1262, 473.4511, 285.9348, 0.8481, 0.9996, 0.0000] ])

dets[:, 0].long()=tensor([0, 0])

torch.index_select(input, dim, index, out=None) 函数返回的是沿着输入张量input的指定维度dim的指定索引号index进行索引的张量子集，out为指定输出的张量

im_dim_list=tensor([ [ 720., 1280., 720., 1280.],
[ 720., 1280., 720., 1280.] ])

self.det_inp_dim=608

self.det_inp_dim / im_dim_list=tensor([ [0.8444, 0.4750, 0.8444, 0.4750],
[0.8444, 0.4750, 0.8444, 0.4750] ])

torch.min(self.det_inp_dim / im_dim_list, 1)=torch.return_types.min(
values=tensor([0.4750, 0.4750]),
indices=tensor([1, 1]))

torch.min(self.det_inp_dim / im_dim_list, 1)[0]=tensor([0.4750, 0.4750])

torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1)=tensor([ [0.4750],
[0.4750] ])

scaling_factor=tensor([ [0.4750],
[0.4750] ])

dets[:, [1, 3]] -= (self.det_inp_dim - 
                    scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2
dets[:, [2, 4]] -= (self.det_inp_dim - 
                    scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2
dets[:, 1:5] /= scaling_factor

这里对中间过程进行了拆解，可以看到中间的变量值

scaling_factor * im_dim_list[:, 0]=tensor([ [342., 342.],
[342., 342.] ])
scaling_factor * im_dim_list[:, 0].view(-1, 1)=tensor([ [342.],
[342.] ])
det_inp_dim-scaling_factor * im_dim_list[:, 0].view(-1, 1)=tensor([ [266.],
[266.] ])
(self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2=tensor([ [133.],
[133.] ])
dets[:, [1, 3]]=tensor([ [175.6383, 256.7433],
[282.9088, 340.4511] ])

得dets=tensor([ [ 0.0000, 175.6383, 108.1259, 256.7433, 282.7530, 0.8985, 0.9994, 0.0000],
[ 0.0000, 282.9088, 119.1262, 340.4511, 285.9348, 0.8481, 0.9996, 0.0000] ])

(self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2=tensor([ [0.],
[0.] ])
dets[:, [2, 4]]=tensor([ [108.1259, 282.7530],
[119.1262, 285.9348]])

得dets=tensor([ [ 0.0000, 175.6383, 108.1259, 256.7433, 282.7530, 0.8985, 0.9994, 0.0000],
[ 0.0000, 282.9088, 119.1262, 340.4511, 285.9348, 0.8481, 0.9996, 0.0000] ])

dets[:, 1:5]/scaling_factor=tensor([ [369.7649, 227.6335, 540.5122, 595.2695],
[595.5975, 250.7920, 716.7392, 601.9680] ])
dets=tensor([ [ 0.0000, 369.7649, 227.6336, 540.5123, 595.2694, 0.8985, 0.9994, 0.0000],
[ 0.0000, 595.5975, 250.7920, 716.7391, 601.9680, 0.8481, 0.9996, 0.0000] ])

for j in range(dets.shape[0]):
    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0])
    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1])
boxes = dets[:, 1:5]
scores = dets[:, 5:6]

进入for循环，dets.shape[0]=2，j=0时

im_dim_list[j, 0]=tensor(720.)
dets[j, [1, 3]]=tensor([369.7649, 540.5123])

torch.clamp(input, min, max, out=None)将输入张量input每个元素的范围限制到区间 [min,max]，返回结果到一个新张量。

torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0])=tensor([369.7649, 540.5123])
torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1])=tensor([227.6336, 595.2694])

循环结束后

dets=tensor([ [ 0.0000, 369.7649, 227.6336, 540.5123, 595.2694, 0.8985, 0.9994, 0.0000],
[ 0.0000, 595.5975, 250.7920, 716.7391, 601.9680, 0.8481, 0.9996, 0.0000]])
boxes=tensor([ [369.7649, 227.6336, 540.5123, 595.2694],
[595.5975, 250.7920, 716.7391, 601.9680] ])
scores=tensor([ [0.8985],
[0.8481] ])

for k in range(len(orig_img)):
    boxes_k = boxes[dets[:,0]==k]
    if isinstance(boxes_k, int) or boxes_k.shape[0] == 0:
        if self.Q.full():
            time.sleep(2)
        self.Q.put((orig_img[k], im_name[k], None, None, None, None, None))
        continue
    inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH, opt.inputResW)
    pt1 = torch.zeros(boxes_k.size(0), 2)
    pt2 = torch.zeros(boxes_k.size(0), 2)
    if self.Q.full():
        time.sleep(2)
    self.Q.put((orig_img[k],im_name[k],boxes_k,scores[dets[:,0]==k],inps,pt1,pt2))

进入for循环，len(orig_img)=1

boxes_k=tensor([ [369.7649, 227.6336, 540.5123, 595.2694],
[595.5975, 250.7920, 716.7391, 601.9680] ])

不进入if语句

opt.inputResH=320, opt.inputResW=256
inps = torch.zeros(2, 3, 320, 256)
pt1=tensor([ [0., 0.],
[0., 0.] ])
pt2=tensor([ [0., 0.],
[0., 0.] ])

最后将（原图像，图像名，图像中的人员目标框，目标框的置信度，inps，pt1，pt2）放入队列Q中；inps，pt1，pt2在后续处理中分别用于存放（人员目标框区域图像，人员目标框在原图像中的左上角坐标，人员目标框在原图像中的右下角坐标）