该函数提取出图像中人员检测框的区域,并对其进行缩放,最后映射到(320, 256, 3)大小的图像上
由于调试该程序查看变量输出的时候会同时以线程方式调用DetectionLoader,因此要在DetectionLoader的update()函数最后添加一个time.sleep(200000),让线程停下200000秒,否则,DetectionLoader会的以线程方式不断执行,在DetectionProcessor中获取到的视频图像帧、人员检测框等信息在不断变化。
def update(self):
# keep looping the whole dataset
for i in range(self.datalen):
with torch.no_grad():
(orig_img, im_name, boxes, scores, inps, pt1, pt2) = \
self.detectionLoader.read()
if orig_img is None:
self.Q.put((None, None, None, None, None, None, None))
return
if boxes is None or boxes.nelement() == 0:
while self.Q.full():
time.sleep(0.2)
self.Q.put((None, orig_img, im_name, boxes, scores, None, None))
continue
self.datalen=403
进入for循环后,首先获取detectionLoader传过来的视频图像帧以及YOLO检测到的人员目标框等
orig_img.shape=(1280, 720, 3)原始视频帧图像
im_name='0.jpg'
boxes=tensor([ [369.7649, 227.6336, 540.5123, 595.2694],
[595.5975, 250.7920, 716.7391, 601.9680] ])
scores=tensor([ [0.8985],
[0.8481] ])
inps=torch.zeros(2, 3, 320, 256)
pt1=tensor([ [0., 0.],
[0., 0.] ])
pt2=tensor([ [0., 0.],
[0., 0.] ])
图像不为空,不进入第一个if语句
boxes也不为空,不进入第二个if语句
inp = im_to_torch(cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB))
#cv2.cvtColor(p1,p2) 是颜色空间转换函数,p1是需要转换的图片,p2是转换成何种格式。
#cv2.COLOR_BGR2RGB将BGR格式转换成RGB格式
调用cv2.cvtColor()函数将orig_img从BGR转为RGB格式,再调用im_to_torch()函数
进入im_to_torch()函数
def im_to_torch(img):
img=np.array(img)
img = np.transpose(img, (2, 0, 1)) #交换图像img的通道顺序
img = to_torch(img).float() #将图像转为tensor格式,再将其中的数据转为float型
if img.max() > 1: #如果像素值大于1,对图像进行归一化处理
img /= 255
return img
该函数修改img的通道顺序,将其转为tensor格式,修改img的数据类型为float()型,最后对图像img进行归一化之后处理
inps, pt1, pt2 = crop_from_dets(inp, boxes, inps, pt1, pt2)
然后调用了crop_from_dets()函数
进入crop_from_dets()函数
def crop_from_dets(img, boxes, inps, pt1, pt2):
#Crop human from origin image according to Dectecion Results
imght = img.size(1)
imgwidth = img.size(2)
tmp_img = img
tmp_img[0].add_(-0.406)
tmp_img[1].add_(-0.457)
tmp_img[2].add_(-0.480)
imght =1280
imgwidth=720
tmp_img.shape=torch.Size([3, 1280, 720]),为归一化之后的图像
for i, box in enumerate(boxes):
upLeft = torch.Tensor((float(box[0]), float(box[1])))
bottomRight = torch.Tensor((float(box[2]), float(box[3])))
ht = bottomRight[1] - upLeft[1]
width = bottomRight[0] - upLeft[0]
进入for循环,i=0,box=tensor([369.7649, 227.6336, 540.5123, 595.2694])
upLeft=tensor([369.7649, 227.6336])
bottomRight=tensor([540.5123, 595.2694])
ht=tensor(367.6359)为人员目标框的高度
width=tensor(170.7475)为人员目标框的宽
scaleRate = 0.3
upLeft[0] = max(0, upLeft[0] - width * scaleRate / 2)
upLeft[1] = max(0, upLeft[1] - ht * scaleRate / 2)
upLeft[0] = max(0, 369.7649 - 170.7475 * 0.3 / 2)=344.152775
upLeft[1] = max(0, 227.6336 - 367.6359 * 0.3 / 2)=172.488215
此时upLeft=tensor([344.1527, 172.4882])
bottomRight[0] = max(
min(imgwidth - 1, bottomRight[0] + width * scaleRate / 2), upLeft[0] + 5)
bottomRight[1] = max(
min(imght - 1, bottomRight[1] + ht * scaleRate / 2), upLeft[1] + 5)
bottomRight[0]=max( min(720 -1, 540.5123+170.7475 * 0.3 / 2), 344.152775+5)=566.124425
bottomRight[1] = max(min(1280 -1, 595.2694 + 367.6359 * 0.3 / 2), 172.488215 + 5)=650.414785
此时bottomRight=tensor([566.1245, 650.4148])
try:
inps[i] = cropBox(tmp_img.clone(), upLeft, bottomRight,
opt.inputResH, opt.inputResW)
except IndexError:
print(tmp_img.shape)
print(upLeft)
print(bottomRight)
print('===')
pt1[i] = upLeft#目标框左上角的位置
pt2[i] = bottomRight#目标框右下角的位置
这里调用了cropBox()函数,该函数提取出图像中人员检测框的区域,并将其映射到(320, 256, 3)大小的图像上
进入cropBox函数后
def cropBox(img, ul, br, resH, resW):
ul = ul.int()
br = (br - 1).int()
# br = br.int()
lenH = max((br[1] - ul[1]).item(), (br[0] - ul[0]).item() * resH / resW)
lenW = lenH * resW / resH
if img.dim() == 2:
img = img[np.newaxis, :]
img.shape=torch.Size([3, 1280, 720])
ul=tensor([344.1527, 172.4882])为缩放后的人员目标框左上角坐标
br=tensor([566.1245, 650.4148])为缩放后的人员目标框右下角坐标
resH=320
resW=256
ul = ul.int() = tensor([344, 172], dtype=torch.int32)
br = (br - 1).int() = tensor([565, 649], dtype=torch.int32)
lenH = max((649 - 172).item(), (565 - 344).item() * 320 / 256) = 477
lenW = lenH * resW / resH = 477*256/320 = 381.6
如果图片img的维度为2,就进入if语句为图片增加一个维度,此处img的维度为3,不进入if语句
box_shape = [(br[1] - ul[1]).item(), (br[0] - ul[0]).item()]
pad_size = [(lenH - box_shape[0]) // 2, (lenW - box_shape[1]) // 2]
box_shape = [(649 - 172).item(), (565 - 344).item()]=[477, 221]
pad_size =[(477 - 477) // 2, (381.6 - 221) // 2]=[0, 80.0]
if ul[1] > 0:
img[:, :ul[1], :] = 0
if ul[0] > 0:
img[:, :, :ul[0]] = 0
if br[1] < img.shape[1] - 1:
img[:, br[1] + 1:, :] = 0
if br[0] < img.shape[2] - 1:
img[:, :, br[0] + 1:] = 0
将图像img人员目标框范围外的区域全部置0
ul[1]=172>0,进入if语句
img[:, :172, :] = 0
ul[0]=344>0,进入if语句
img[:, :, :344] = 0
br[1] =649< 1280 - 1:
img[:, 649 + 1:, :] = 0
br[0] =565< 720 - 1:
img[:, :, 565 + 1:] = 0
src = np.zeros((3, 2), dtype=np.float32)
dst = np.zeros((3, 2), dtype=np.float32)
src = [[0. 0.], [0. 0.], [0. 0.]]
dst = [[0. 0.], [0. 0.], [0. 0.]]
src[0, :] = np.array([ul[0] - pad_size[1], ul[1] - pad_size[0]], np.float32)
src[1, :] = np.array([br[0] + pad_size[1], br[1] + pad_size[0]], np.float32)
dst[0, :] = 0
dst[1, :] = np.array([resW - 1, resH - 1], np.float32)
src[0, :] = np.array([344 - 80, 172 - 0], np.float32)
src[1, :] = np.array([565 + 80, 649 + 0], np.float32)
此时src=[[264. 172.], [645. 649.], [ 0. 0.]]
dst[0, :] = 0
dst[1, :] = np.array([256 - 1, 320 - 1], np.float32)
此时dst = [[ 0. 0.], [255. 319.], [ 0. 0.]]
src[2:, :] = get_3rd_point(src[0, :], src[1, :])
dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
这里调用了get_3rd_point()函数
def get_3rd_point(a, b):
direct = a - b
return b + np.array([-direct[1], direct[0]], dtype=np.float32)
src[2:, :] = get_3rd_point([264. 172.], [645. 649.])
进入get_3rd_point()函数
direct = a - b=[264. 172.] - [645. 649.]=[-381. -477.]
return [645. 649.] + np.array([477, -381], dtype=np.float32)= [1122. 268.]
调用完get_3rd_point()函数之后
此时src=[ [ 264. 172.], [ 645. 649.], [1122. 268.] ]
dst = [ [ 0. 0.], [255. 319.], [574. 64.] ]
trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))#生成仿射变换矩阵
dst_img = cv2.warpAffine(torch_to_im(img), trans,(resW, resH), #仿射变换
flags=cv2.INTER_LINEAR)
return im_to_torch(torch.Tensor(dst_img))
仿射变换,又称仿射映射;是指在几何中,一个向量空间进行一次线性变换并接上一个平移,变换到另一个向量空间。仿射变换需要一个M矩阵,但是由于仿射变换比较复杂,一般很难直接找到这个矩阵,opencv提供了根据变换前后三个点的对应关系来自动求解M的函数M=cv2.getAffineTransform(src, dst)
trans=[[ 6.68968848e-01 2.57586734e-04 -1.76652081e+02], [-2.57586734e-04 6.68968848e-01 -1.14994639e+02]]为放射变换矩阵
dst_img.shape=(320, 256, 3)
执行完cropBox()函数之后
pt1=tensor([ [344.1527, 172.4882],
[ 0.0000, 0.0000]])
pt2=tensor([ [566.1245, 650.4148],
[ 0.0000, 0.0000]])
i=1时,boxes=tensor([ [369.7649, 227.6336, 540.5123, 595.2694],
[595.5975, 250.7920, 716.7391, 601.9680]])
for循环完成后
pt1=tensor([ [344.1527, 172.4882],
[577.4262, 198.1156]])
pt2=tensor([ [566.1245, 650.4148],
[719.0000, 654.6444]])
return inps, pt1, pt2
返回提取到的(320, 256, 3)大小的人体目标检测框,该目标框在原图的左上角坐标,该目标框在原图右下角坐标
执行完crop_from_dets()函数后
while self.Q.full():
time.sleep(0.2)
self.Q.put((inps, orig_img, im_name, boxes, scores, pt1, pt2))
如果队列满了,就延时0.2秒
最后将((320, 256, 3)大小的人体目标检测框图像,原图像,图像名,人员目标检测框,置信度,人体目标检测框在原图像中的左上角坐标,人体目标检测框在原图像中的右下角坐标)放入队列