使用Tensorflow实现MTCNN遇到了nms作用不理想的问题

使用widerface库对pnet,rnet,onet分别进行了训练,训练阶段没问题,测试FDDB数据集发现nms没有起到太好的效果,如下图所示:

一开始效果太差,后面进行了两次nms,效果依旧不好





代码如下:
 def detect_pnet(self, im):
        """Get face candidates through pnet
        Parameters:
        ----------
        im: numpy array
            input image array
        Returns:
        -------
        boxes: numpy array
            detected boxes before calibration
        boxes_c: numpy array
            boxes after calibration
        """
        h, w, c = im.shape
       # factro
        net_size = 12
        minl=np.amin([h, w])
        total_boxes=np.empty((0,9))
        current_scale = float(net_size) / self.min_face_size    # find initial scale
        minl=minl*current_scale
     #   print(current_scale,'-------------------------------current-scale')
        im_resized = self.resize_image(im, current_scale)
        current_height, current_width, _ = im_resized.shape
        print(current_height,current_width,'---------------imresize.shape')
        if self.slide_window:
            # sliding window
            temp_rectangles = list()
            rectangles = list()     # list of rectangles [x11, y11, x12, y12, confidence] (corresponding to original image)
            all_cropped_ims = list()
            all_cropped_ims02=np.ndarray([12,12,3])
            while min(current_height, current_width) > net_size:

                if (current_height - net_size) % self.stride == 0:
                    current_y_list = range(0, current_height - net_size + 1, self.stride)
                else:
                   # print(list(range(0, current_height - net_size + 1, self.stride)),'--------------print01')
                  #  print(list(range(0, current_height - net_size + 1, self.stride)) + [current_height - net_size])
                    current_y_list=list(range(0, current_height - net_size + 1, self.stride)) + [current_height - net_size]
                if (current_width - net_size) % self.stride == 0:
                    current_x_list = range(0, current_width - net_size + 1, self.stride)
                else:
                    current_x_list =list(range(0, current_width - net_size + 1, self.stride)) + [current_width - net_size]

                for current_y in current_y_list:
                    for current_x in current_x_list:
                        cropped_im = im_resized[current_y:current_y + net_size, current_x:current_x + net_size, :]
                     #   print(cropped_im.shape,'----------------------cropped--shape')
                        current_rectangle = [int(w * float(current_x) / current_width), int(h * float(current_y) / current_height),
                                             int(w * float(current_x) / current_width) + int(w * float(net_size) / current_width),
                                             int(h * float(current_y) / current_height) + int(w * float(net_size) / current_width),
                                                 0.0]
                        temp_rectangles.append(current_rectangle)
                        all_cropped_ims.append(cropped_im)

                current_scale *= self.scale_factor
                im_resized = self.resize_image(im, current_scale)
                current_height, current_width ,_= im_resized.shape

            '''
            # helper for setting PNet batch size
            num_boxes = len(all_cropped_ims)
            batch_size = self.pnet_detector.batch_size
            ratio = float(num_boxes) / batch_size
            if ratio > 3 or ratio < 0.3:
                print "You may need to reset PNet batch size if this info appears frequently, \
face candidates:%d, current batch_size:%d"%(num_boxes, batch_size)
            '''
      #      print(len(all_cropped_ims),'---------------------------len-----------ims-----')
            all_cropped_ims = np.reshape(all_cropped_ims,(len(all_cropped_ims),12,12,3))
        #    print('---before-----------------predict------shape-------',all_cropped_ims.shape)
            cls_scores, reg = self.pnet_detector.predict(all_cropped_ims)
            #print(cls_scores.flatten())
            #cls_scores = cls_scores[0][:, 1].flatten()
            cls_scores = cls_scores.flatten()
            keep_inds = np.where(cls_scores > self.thresh[0])[0]
            if len(keep_inds) > 0:
                boxes = np.vstack(temp_rectangles[ind] for ind in keep_inds)
                boxes[:, 4] = cls_scores[keep_inds]
                reg = reg[keep_inds].reshape(-1, 4)
            else:
                return None, None

        #     print(boxes,'--------------before--boxes')

            keep = py_nms(boxes, 0.5, 'Union')

            boxes = boxes[keep]

            boxes_c = self.calibrate_box(boxes, reg[keep])

        else:
            # fcn
            all_boxes = list()
            while min(current_height, current_width) > net_size:
                cls_map, reg = self.pnet_detector.predict(im_resized)
                boxes = self.generate_bbox(cls_map[ 0, :, :], reg, current_scale, self.thresh[0])

                current_scale *= self.scale_factor
                im_resized = self.resize_image(im, current_scale)
                current_height, current_width, _ = im_resized.shape

                if boxes.size == 0:
                    continue
                keep = py_nms(boxes[:, :5], 0.5, 'Union')
                boxes = boxes[keep]
                keep = py_nms(boxes[:, :5], 0.7, 'Union')
                boxes = boxes[keep]
                all_boxes.append(boxes)

            if len(all_boxes) == 0:
                return None, None

            all_boxes = np.vstack(all_boxes)

            # merge the detection from first stage
            keep = py_nms(all_boxes[:, 0:5], 0.7, 'Union')
            all_boxes = all_boxes[keep]
            keep = py_nms(all_boxes[:, 0:5], 0.7, 'Union')
            all_boxes = all_boxes[keep]
            boxes = all_boxes[:, :5]

            bbw = all_boxes[:, 2] - all_boxes[:, 0] + 1
            bbh = all_boxes[:, 3] - all_boxes[:, 1] + 1

            # refine the boxes
            boxes_c = np.vstack([all_boxes[:, 0] + all_boxes[:, 5] * bbw,
                                 all_boxes[:, 1] + all_boxes[:, 6] * bbh,
                                 all_boxes[:, 2] + all_boxes[:, 7] * bbw,
                                 all_boxes[:, 3] + all_boxes[:, 8] * bbh,
                                 all_boxes[:, 4]])
            boxes_c = boxes_c.T

        return boxes, boxes_c

    def detect_rnet(self, im, dets):
        """Get face candidates using rnet
        Parameters:
        ----------
        im: numpy array
            input image array
        dets: numpy array
            detection results of pnet
        Returns:
        -------
        boxes: numpy array
            detected boxes before calibration
        boxes_c: numpy array
            boxes after calibration
        """
        h, w, c = im.shape
        dets = self.convert_to_square(dets)
        dets[:, 0:4] = np.round(dets[:, 0:4])

        [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h)
        num_boxes = dets.shape[0]
   #     print(num_boxes,'-------------------------rnet--before--num-boxes')

        '''
        # helper for setting RNet batch size
        batch_size = self.rnet_detector.batch_size
        ratio = float(num_boxes) / batch_size
        if ratio > 3 or ratio < 0.3:
            print "You may need to reset RNet batch size if this info appears frequently, \
face candidates:%d, current batch_size:%d"%(num_boxes, batch_size)
        '''

        cropped_ims = np.zeros((num_boxes, 24, 24, 3), dtype=np.float32)
        for i in range(num_boxes):
            tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
            tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, :] = im[y[i]:ey[i]+1, x[i]:ex[i]+1, :]
            cropped_ims[i, :, :, :] = cv2.resize(tmp, (24, 24))/127.5

        cls_scores, reg = self.rnet_detector.predict(cropped_ims)
        keep_inds = np.where(cls_scores > self.thresh[1])[0]

        if len(keep_inds) > 0:
            boxes = dets[keep_inds]
            boxes[:, 4] = cls_scores[keep_inds]
            reg = reg[keep_inds]
        else:
            return None, None

        keep = py_nms(boxes, 0.5,"Union")
        boxes = boxes[keep]
        keep = py_nms(boxes, 0.7,"Minimum")
        boxes = boxes[keep]
        boxes_c = self.calibrate_box(boxes, reg[keep])
        return boxes, boxes_c

    def detect_onet(self, im, dets):
        """Get face candidates using onet
        Parameters:
        ----------
        im: numpy array
            input image array
        dets: numpy array
            detection results of rnet
        Returns:
        -------
        boxes: numpy array
            detected boxes before calibration
        boxes_c: numpy array
            boxes after calibration
        """
        h, w, c = im.shape
        dets = self.convert_to_square(dets)
        dets[:, 0:4] = np.round(dets[:, 0:4])

        [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h)
        num_boxes = dets.shape[0]

        '''
        # helper for setting ONet batch size
        batch_size = self.onet_detector.batch_size
        ratio = float(num_boxes) / batch_size
        if ratio > 3 or ratio < 0.3:
            print "You may need to reset ONet batch size if this info appears frequently, \
face candidates:%d, current batch_size:%d"%(num_boxes, batch_size)
        '''

        cropped_ims = np.zeros((num_boxes, 48, 48, 3), dtype=np.float32)
        for i in range(num_boxes):
            tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
            tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, :] = im[y[i]:ey[i]+1, x[i]:ex[i]+1, :]
            cropped_ims[i, :, :, :] = cv2.resize(tmp, (48, 48))/127.5

        cls_scores, reg = self.onet_detector.predict(cropped_ims)
        keep_inds = np.where(cls_scores > self.thresh[2])[0]

        if len(keep_inds) > 0:
            boxes = dets[keep_inds]
            boxes[:, 4] = cls_scores[keep_inds]
            reg = reg[keep_inds]
        else:
            return None, None

        boxes_c = self.calibrate_box(boxes, reg)

        keep = py_nms(boxes_c, 0.5, "Union")
        boxes_c = boxes_c[keep]
        keep = py_nms(boxes_c, 0.7, "Minimum")
        boxes_c = boxes_c[keep]
        return boxes, boxes_c
此处为NMS:
import numpy as np

def py_nms(dets,thresh,mode="Union"):
    '''
            使用 高的置信度 贪婪法 选择 boxes
           使得boxes重叠 小于等于 阈值
           除去大于阈值的boxes
          形参:dets :[[x1, y1, x2, y2 score]]
         thresh:  overlap<= thresh
         返回值:保留下来的索引
    '''
    x1=dets[:,0]
   # print(x1,'------------pynms--x1')
    y1=dets[:,1]
    x2=dets[:,2]
    y2=dets[:,3]
    scores=dets[:,4]
    areas=(x2-x1+1)*(y2-y1+1)
#从大到小排列,取index
    order = scores.argsort()[::-1]
  #  print('pynms----order--scores--',order)

#keep为最后保留的边框
    keep = []
    while order.size > 0:
#order[0]是当前分数最大的窗口,之前没有被过滤掉,肯定是要保留的
        i = order[0]
        keep.append(i)
#计算窗口i与其他所以窗口的交叠部分的面积
        xx1 = np.maximum(x1[i], x1[order[1:]])
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])

        w = np.maximum(0.0, xx2 - xx1 + 1)
        h = np.maximum(0.0, yy2 - yy1 + 1)
        inter = w * h
      #  print('pynms----------------inter',inter)
        if mode =="Union":
             #   ovr=inter/areas[i]+areas[order[1:]]
                ovr = inter / (areas[i] + areas[order[1:]] - inter)
              #  print(ovr,'============ovr')
        elif mode=="Minimum":
            ovr=inter/np.minimum(areas[i],areas[order[1:]])
        #ind为所有与窗口i的iou值小于threshold值的窗口的index,其他窗口此次都被窗口i吸收

        inds=np.where(ovr<=thresh)[0]
        #下一次计算前要把窗口i去除,所有i对应的在order里的位置是0,所以剩下的加1

        order=order[inds+1]
   # print(keep,'-------------keep')
    return keep



  • 3
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 9
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 9
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值