MTCNN对R网络的测试

最新推荐文章于 2022-01-07 13:56:16 发布

Alphapeople

最新推荐文章于 2022-01-07 13:56:16 发布

阅读量346

点赞数

分类专栏：人工智能深度学习计算机视觉文章标签： mtcnn

本文链接：https://blog.csdn.net/weixin_38241876/article/details/92080130

版权

人工智能同时被 3 个专栏收录

130 篇文章 8 订阅

订阅专栏

深度学习

122 篇文章 4 订阅

订阅专栏

计算机视觉

107 篇文章 5 订阅

订阅专栏

简易版：

cpimg = img
    imgdraw = ImageDraw.Draw(img)
    imglist = []
    for box in boxs:
        cropimg = cpimg.crop((box[0],box[1],box[2],box[3]))
        cropimg = cropimg.resize((24,24),Image.ANTIALIAS)
        cropimg = np.array(cropimg)
        cropimg = np.transpose(cropimg,[2,0,1])
        imglist.append(cropimg)
    imglist = np.array(imglist)
    imglist = torch.FloatTensor(imglist)
    if torch.cuda.is_available():
        imglist = imglist.cuda()
    confidence,offset = net(imglist)
    confidence = confidence.cpu().data.numpy()
    offset = offset.cpu().data.numpy()
    confidence = confidence[:,0,0,0]
    offset = offset[:,:,0,0]
    indexs = np.where(confidence > 0.7)
    conf = confidence[indexs]
    off = offset[indexs]
    boxs = np.array(boxs)
    bboxs = boxs[indexs]
    bboxs = NmsDo(bboxs,"UNIU")
    print(len(bboxs))
    for box in bboxs:
        imgdraw.rectangle((box[0],box[1],box[2],box[3]))
    img.show()
    return bboxs

def R(net,img,box):
    #创建一个列表来保存r网络筛选出来的框
    rboxs = []
    boxs = np.array(box)
    #从特殊到一般，如果没有任何框就退出
    if boxs.shape[0] == 0:
        return []
    #得到这堆框的四个坐标
    x1 = boxs[:,0]
    y1 = boxs[:,1]
    x2 = boxs[:,2]
    y2 = boxs[:,3]

    #由于框的大小不是正方形的，所以需要构造正方形的框用以将方框变成24x24，如果直接缩放的话会造成变形
    #在构造方框之前需要获取各个框的中心坐标
    #在得到中心坐标之前需要先得到框的宽和高
    w = x2 - x1
    h = y2 - y1
    cx  = x1 + w/2
    cy = y1 + h/2
    #获取最大边长以构建方框
    side = np.maximum(w,h)
    #得到方框的四个坐标
    _x1 = cx - side/2
    _y1 = cy - side/2
    _x2 = _x1 + side
    _y2 = _y1 + side
    #将四个坐标向量的对应位置进行拼接以得到新的框的集合
    new_boxs = np.stack([_x1,_y1,_x2,_y2],axis=1)
    #创建一个列表用于保存裁剪和缩放后的框中的图片用以传入r网络
    imglist = []
    #在原始测试图片上对这些框进行裁剪和缩放
    for box in new_boxs:
        cropimg = img.crop((box[0],box[1],box[2],box[3]))
        imgdata = cropimg.resize((24,24),Image.ANTIALIAS)
        #归一化
        imgdata = np.array(imgdata,dtype=np.float32)/255
        #转成torch的格式
        imgdata = np.transpose(imgdata,[2,0,1])
        imglist.append(imgdata)
    #将这些框转成Tensor并传入网络得到置信度和偏移
    imgdata = np.array(imgdata)
    imgdata = torch.FloatTensor(imgdata)
    if torch.cuda.is_available():
        imgdata = imgdata.cuda()
    confidence,offset = net(imgdata)
    #将置信度和偏移转换成numpy
    confidence = confidence.cpu().data.numpy()
    offset = offset.cpu().numpy()
    #筛选出置信度大于阈值的框的索引
    indexs = np.where(confidence > 0.7)
    #得到保留下来的框的四个坐标
    _x1 = new_boxs[indexs[0,:],0]
    _y1 = new_boxs[indexs[0,:],1]
    _x2 = new_boxs[indexs[0,:],2]
    _y2 = new_boxs[indexs[0,:],3]
    #以上四个坐标只是基准框，而实际框是需要在基准框的基础上加上偏移量
    #得到每个坐标点的偏移量
    offset_x1 = offset[indexs[0,:],0]
    offset_y1 = offset[indexs[0,:],1]
    offset_x2 = offset[indexs[0,:],2]
    offset_y2 = offset[indexs[0,:],3]
    #根据偏移量得到实际框的坐标点
    #先获取基准框的宽和高
    w = _x2 - _x1
    h = _y2 - _y1
    x1 = _x1 + offset_x1*w
    y1 = _y1 + offset_y1*h
    x2 = _x2 + offset_x2*w
    y2 = _y2 + offset_y2*h
    #将得到的新框的坐标组合到一起形成新的坐标点和置信度
    conf = confidence[0,indexs[0,:]]
    rboxs.extend(np.stack([x1,y1,x2,y2,conf],axis=1))
    boxlist = NmsDo(rboxs,"UNIUM")
    imgdraw = ImageDraw.Draw(img)
    for box in boxlist:
        imgdraw.rectangle((box[0],box[1],box[2],box[3]),outline='red')
    img.show()
    return boxlist

def RnetDetect(net, img, boxs, imgshow=False, show_conf=False):
    imglist = []
    rboxslist = []
    boxss = np.array(boxs)
    if boxss.shape[0] == 0:
        return []
    #把框变成24*24的正方形
    #得到我这堆框的坐标
    x1 = boxss[:, 0]
    y1 = boxss[:, 1]
    x2 = boxss[:, 2]
    y2 = boxss[:, 3]
    #得到每个框的宽和高
    w = x2 - x1
    h = y2 - y1
    sidelen = np.maximum(w, h)
    #获取每个框的中心点
    cx = x1 + w / 2
    cy = y1 + h / 2
    #保证预测的框的坐标是包含了原始的框
    _x1 = cx - sidelen / 2
    _y1 = cy - sidelen / 2
    _x2 = cx + sidelen / 2
    _y2 = cy + sidelen / 2

    _boxs = np.stack([_x1, _y1, _x2, _y2], axis=1)
    for i in range(_boxs.shape[0]):#多少个框
        #对这些框进行裁剪和缩放
        cropimg = img.crop((_boxs[i,0], _boxs[i,1], _boxs[i,2], _boxs[i,3]))
        imgdata = cropimg.resize((24, 24), Image.ANTIALIAS)
        # cropimg.show()
        cropimg = np.array(imgdata, dtype=np.float32)/255
        cropimg = cropimg.transpose([2, 0, 1]) #CHW
        imglist.append(cropimg)

    #传入R网络
    imgdata = np.array(imglist)
    imgdata = torch.FloatTensor(imgdata)
    if torch.cuda.is_available():
        imgdata = imgdata.cuda()

    confidence, offset = net(imgdata)
    confidence = confidence.view(-1, 1) #N * 1
    offset = offset.view(-1, 4) #N * 4
    confidence = confidence.cpu().data.numpy()
    offset = offset.cpu().data.numpy()
    if show_conf:
        BoxsAndConfidence(img, _boxs, confidence, offset)
    # 筛选置信度大于阈值的框
    #confidence是每个框的置信度而不是像P网络那样是每个像素坐标的置信度，所有R网络是选框而不是选坐标
    indexs = np.where(confidence > 0.7)  # np.where()[0] 表示行的索引 1是列
    indexs = np.stack(indexs, axis=1)
    if indexs.shape[0] > 0:
        # 反算坐标
        # for index in indexs:
        #选的是第几个框的对应坐标
        _x1 = _boxs[indexs[:,0], 0]
        _y1 = _boxs[indexs[:,0], 1]
        _x2 = _boxs[indexs[:,0], 2]
        _y2 = _boxs[indexs[:,0], 3]
        w = _x2 - _x1
        h = _y2 - _y1
        offx1 = offset[indexs[:,0],0]
        offy1 = offset[indexs[:,0],1]
        offx2 = offset[indexs[:,0],2]
        offy2 = offset[indexs[:,0],3]
        conf = confidence[indexs[:,0],indexs[:,1]]#在这里等价于0
        # 真实框坐标
        x1 = _x1 + offx1 * w
        y1 = _y1 + offy1 * h
        x2 = _x2 + offx2 * w
        y2 = _y2 + offy2 * h
        rboxslist.extend(np.stack([x1, y1, x2, y2, conf],axis=1))

    #做NMS
    oklist = NmsDo(rboxslist, "UNIUM")
    if imgshow == True:
        h_img = ImageDraw.Draw(img)
        for box in oklist:
            h_img.rectangle((box[0], box[1], box[2], box[3]), outline="red")
            h_img.text((box[0], box[1]), str(box[4]), "black")
        img.show()
    return oklist