MTCNN-tensorflow源码解析-gen_hard_example.py生成RNet和ONet的训练数据

最新推荐文章于 2021-08-07 10:17:49 发布

菜鸟知识搬运工

最新推荐文章于 2021-08-07 10:17:49 发布

阅读量1.7k

点赞数 1

分类专栏： MTCNN

本文链接：https://blog.csdn.net/qq_30815237/article/details/95665331

版权

MTCNN 专栏收录该内容

8 篇文章 4 订阅

订阅专栏

进入prepare_data文件夹打开gen_hard_example.py脚本，该脚本既可用于生成RNet，也可用于ONet,只要把主函数中：net = 'RNet' 改为 net = 'ONet'。该脚本只生成pos,neg,part三类样本。至于landmark样本，由gen_landmark_aug_24.py和gen_landmark_aug_48.py脚本生成

gen_hard_example.py代码如下：

#coding:utf-8
import sys
#sys.path.append("../")
from prepare_data.utils import convert_to_square

sys.path.insert(0,'..')
import numpy as np
import argparse
import os
import pickle as pickle
import cv2
from train_models.mtcnn_model import P_Net, R_Net, O_Net
from train_models.MTCNN_config import config
from prepare_data.loader import TestLoader
from Detection.detector import Detector
from Detection.fcn_detector import FcnDetector
from Detection.MtcnnDetector import MtcnnDetector
from utils import *
from prepare_data.data_utils import *
#net : 24(RNet)/48(ONet)
#data: dict()
def save_hard_example(net, data,save_path):
    # load ground truth from annotation file
    # format of each line: image/path [x1,y1,x2,y2] for each gt_box in this image

    im_idx_list = data['images']
    # print(images[0])
    gt_boxes_list = data['bboxes']
    num_of_images = len(im_idx_list)

    print("processing %d images in total" % num_of_images)

    
    # save files
    neg_label_file = "../../DATA/no_LM%d/neg_%d.txt" % (net, image_size)
    neg_file = open(neg_label_file, 'w')

    pos_label_file = "../../DATA/no_LM%d/pos_%d.txt" % (net, image_size)
    pos_file = open(pos_label_file, 'w')

    part_label_file = "../../DATA/no_LM%d/part_%d.txt" % (net, image_size)
    part_file = open(part_label_file, 'w')
    #read detect result
    det_boxes = pickle.load(open(os.path.join(save_path, 'detections.pkl'), 'rb'))
    # print(len(det_boxes), num_of_images)
    print(len(det_boxes))
    print(num_of_images)
    assert len(det_boxes) == num_of_images, "incorrect detections or ground truths"

    # index of neg, pos and part face, used as their image names
    n_idx = 0
    p_idx = 0
    d_idx = 0
    image_done = 0
    #im_idx_list image index(list)
    #det_boxes detect result(list)
    #gt_boxes_list gt(list)
    for im_idx, dets, gts in zip(im_idx_list, det_boxes, gt_boxes_list):
        gts = np.array(gts, dtype=np.float32).reshape(-1, 4)
        if image_done % 100 == 0:
            print("%d images done" % image_done)
        image_done += 1

        if dets.shape[0] == 0:
            continue
        img = cv2.imread(im_idx)
        #change to square
        dets = convert_to_square(dets)
        dets[:, 0:4] = np.round(dets[:, 0:4])
        neg_num = 0
        for box in dets:
            x_left, y_top, x_right, y_bottom, _ = box.astype(int)
            width = x_right - x_left + 1
            height = y_bottom - y_top + 1

            # ignore box that is too small or beyond image border
            if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[1] - 1 or y_bottom > img.shape[0] - 1:
                continue

            # compute intersection over union(IoU) between current box and all gt boxes
            Iou = IoU(box, gts)
            cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :]
            resized_im = cv2.resize(cropped_im, (image_size, image_size),
                                    interpolation=cv2.INTER_LINEAR)

            # save negative images and write label
            # Iou with all gts must below 0.3            
            if np.max(Iou) < 0.3 and neg_num < 60:
                #save the examples
                save_file = get_path(neg_dir, "%s.jpg" % n_idx)
                # print(save_file)
                neg_file.write(save_file + ' 0\n')
                cv2.imwrite(save_file, resized_im)
                n_idx += 1
                neg_num += 1
            else:
                # find gt_box with the highest iou
                idx = np.argmax(Iou)
                assigned_gt = gts[idx]
                x1, y1, x2, y2 = assigned_gt

                # compute bbox reg label
                offset_x1 = (x1 - x_left) / float(width)
                offset_y1 = (y1 - y_top) / float(height)
                offset_x2 = (x2 - x_right) / float(width)
                offset_y2 = (y2 - y_bottom) / float(height)

                # save positive and part-face images and write labels
                if np.max(Iou) >= 0.65:
                    save_file = get_path(pos_dir, "%s.jpg" % p_idx)
                    pos_file.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' % (
                        offset_x1, offset_y1, offset_x2, offset_y2))
                    cv2.imwrite(save_file, resized_im)
                    p_idx += 1

                elif np.max(Iou) >= 0.4:
                    save_file = os.path.join(part_dir, "%s.jpg" % d_idx)
                    part_file.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' % (
                        offset_x1, offset_y1, offset_x2, offset_y2))
                    cv2.imwrite(save_file, resized_im)
                    d_idx += 1
    neg_file.close()
    part_file.close()
    pos_file.close()

def t_net(prefix, epoch,
             batch_size, test_mode="PNet",
             thresh=[0.6, 0.6, 0.7], min_face_size=25,
             stride=2, slide_window=False, shuffle=False, vis=False):
    # prefix:['../data/MTCNN_model/PNet_Landmark/PNet', '../data/MTCNN_model/RNet_Landmark/RNet', '../data/MTCNN_model/ONet_Landmark/ONet']
    # epoch:[18, 14, 16]
    # batch_size:[2048, 256, 16]
    # test_mode:"PNet"
    # thresh:[0.3, 0.1, 0.7]
    # min_face_size:20
    # stride=2
    # slide_window:False
    # shuffle:False
    # vis:False
    detectors = [None, None, None]
    print("Test model: ", test_mode)
    #PNet-echo
    model_path = ['%s-%s' % (x, y) for x, y in zip(prefix, epoch)]  #训练得到的模型文件prefix
    print(model_path[0])
    # load pnet model
    if slide_window:
        PNet = Detector(P_Net, 12, batch_size[0], model_path[0]) #2048
    else:
        PNet = FcnDetector(P_Net, model_path[0])
    detectors[0] = PNet

    # load rnet model
    if test_mode in ["RNet", "ONet"]:
        print("==================================", test_mode)
        RNet = Detector(R_Net, 24, batch_size[1], model_path[1]) #256
        detectors[1] = RNet

    # load onet model
    if test_mode == "ONet":
        print("==================================", test_mode)
        ONet = Detector(O_Net, 48, batch_size[2], model_path[2])  #16
        detectors[2] = ONet
        
    basedir = '../../DATA/'
    #anno_file
    filename = './wider_face_train_bbx_gt.txt'
    #read anotation(type:dict), include 'images' and 'bboxes'
    data = read_annotation(basedir,filename)    #read_annotation在data_utils.py中，返回字典data，包含图像路径，以及图像中的人脸框
    mtcnn_detector = MtcnnDetector(detectors=detectors, min_face_size=min_face_size,
                                   stride=stride, threshold=thresh, slide_window=slide_window)
    print("==================================")
    # 注意是在“test”模式下
    # imdb = IMDB("wider", image_set, root_path, dataset_path, 'test')
    # gt_imdb = imdb.gt_imdb()
    print('load test data')
    test_data = TestLoader(data['images'])  #loader.py，
    print ('finish loading')
    #list
    print ('start detecting....')
    detections,_ = mtcnn_detector.detect_face(test_data)
    print ('finish detecting ')
    save_net = 'RNet'
    if test_mode == "PNet":
        save_net = "RNet"
    elif test_mode == "RNet":
        save_net = "ONet"
    #save detect result
    save_path = os.path.join(data_dir, save_net)
    print ('save_path is :')
    print(save_path)
    if not os.path.exists(save_path):
        os.mkdir(save_path)

    save_file = os.path.join(save_path, "detections.pkl")
    with open(save_file, 'wb') as f:
        pickle.dump(detections, f,1)
    print("%s测试完成开始OHEM" % image_size)
    save_hard_example(image_size, data, save_path)

  #命令解析器，定义了一系列的参数，每个参数里面的'help'是该参数的具体描述
def parse_args():
    parser = argparse.ArgumentParser(description='Test mtcnn',
                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--test_mode', dest='test_mode', help='test net type, can be pnet, rnet or onet',
                        default='RNet', type=str)
    parser.add_argument('--prefix', dest='prefix', help='prefix of model name', nargs="+",
                        default=['../data/MTCNN_model/PNet_No_Landmark/PNet', '../data/MTCNN_model/RNet_No_Landmark/RNet', '../data/MTCNN_model/ONet_No_Landmark/ONet'],
                        type=str)
    parser.add_argument('--epoch', dest='epoch', help='epoch number of model to load', nargs="+",
                        default=[18, 14, 16], type=int)
    parser.add_argument('--batch_size', dest='batch_size', help='list of batch size used in prediction', nargs="+",
                        default=[2048, 256, 16], type=int)
    parser.add_argument('--thresh', dest='thresh', help='list of thresh for pnet, rnet, onet', nargs="+",
                        default=[0.3, 0.1, 0.7], type=float)
    parser.add_argument('--min_face', dest='min_face', help='minimum face size for detection',
                        default=20, type=int)
    parser.add_argument('--stride', dest='stride', help='stride of sliding window',
                        default=2, type=int)
    parser.add_argument('--sw', dest='slide_window', help='use sliding window in pnet', action='store_true')#只要运行时该变量有传参就将该变量设为True。
    #parser.add_argument('--gpu', dest='gpu_id', help='GPU device to train with',default=0, type=int)
    parser.add_argument('--shuffle', dest='shuffle', help='shuffle data on visualization', action='store_true')
    parser.add_argument('--vis', dest='vis', help='turn on visualization', action='store_true')
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    net = 'ONet'
    #net = 'RNet'
    if net == "RNet":
        image_size = 24
    if net == "ONet":
        image_size = 48
    base_dir = '../../DATA/WIDER_train'
    data_dir = '../../DATA/no_LM%s' % str(image_size)   #两个数据集的位置
    
    neg_dir = get_path(data_dir, 'negative')
    pos_dir = get_path(data_dir, 'positive')
    part_dir = get_path(data_dir, 'part')
    #创建路径
    for dir_path in [neg_dir, pos_dir, part_dir]:
        if not os.path.exists(dir_path):
            os.makedirs(dir_path)

    args = parse_args()

    print('Called with argument:')
    print(args)
    t_net(args.prefix,     #训练得到的模型文件
          args.epoch,      #循环次数
          args.batch_size, #测试时使用batch_size，default=[2048, 256, 16]
          args.test_mode,#test which model PNet，RNet，ONet
          args.thresh, ##分类阈值
          args.min_face, #最小脸尺寸为20
          args.stride,#滑动框步长
          args.slide_window, #在PNet中用滑动窗
          args.shuffle, 
          vis=False)

调用read_annotation函数，read_annotation在data_utils.py中，返回字典data，包含图像路径，以及图像中的人脸框，我们从文件wider_face_train_bbx_gt.txt中读取wider_face数据集的图像，人脸框个数，人脸框位置。它的人脸框保存格式是[x,y,w,h]

注意wider_face_train_bbx_gt.txt和wider_face_train.txt的区别：后者是我们生成PNet样本时用到的文件，它保存人脸框的格式是[x1,y1,x2,y2],如下图所示：

wider_face_train_bbx_gt.txt：

wider_face_train.txt：

def read_annotation(base_dir, label_path):  #label_path='./wider_face_train_bbx_gt.txt'
    """
    read label file
    :param dir: path
    :return:
    """
    data = dict()
    images = []
    bboxes = []
    labelfile = open(label_path, 'r')
    while True:
        # image path
        imagepath = labelfile.readline().strip('\n') #读取结果：0--Parade/0_Parade_marchingband_1_849.jpg
        if not imagepath:
            break  #跳出循环
        imagepath = base_dir + '/WIDER_train/images/' + imagepath
        images.append(imagepath)
        # face numbers
        nums = labelfile.readline().strip('\n')   #0--Parade/0_Parade_marchingband_1_849.jpg这个图像的人脸框个数
        one_image_bboxes = []
        for i in range(int(nums)):
            bb_info = labelfile.readline().strip('\n').split(' ')
            # only need x, y, w, h
            face_box = [float(bb_info[i]) for i in range(4)]   #前四个参数就是人脸框的参数
            xmin = face_box[0]
            ymin = face_box[1]
            xmax = xmin + face_box[2]
            ymax = ymin + face_box[3]
            one_image_bboxes.append([xmin, ymin, xmax, ymax])
        bboxes.append(one_image_bboxes)
    data['images'] = images#图像路径
    data['bboxes'] = bboxes#bboxes是一个列表，每个元素又是一个列表one_image_bboxes，one_image_bboxes列表的元素有一个列表，保存一个矩形框[xmin, ymin, xmax, ymax]
    return data

调用了类MtcnnDetector里面的方法detect_face()，并传入参数test_data，代码：

class MtcnnDetector(object):
    def __init__(self,
                 detectors,
                 min_face_size=20,
                 stride=2,
                 threshold=[0.6, 0.7, 0.7],
                 scale_factor=0.79,
                 # scale_factor=0.709,#change
                 slide_window=False):
        self.pnet_detector = detectors[0]    #初始化，其中一个有，另外两是None
        self.rnet_detector = detectors[1]
        self.onet_detector = detectors[2]
        self.min_face_size = min_face_size
        self.stride = stride
        self.thresh = threshold
        self.scale_factor = scale_factor
        self.slide_window = slide_window
        def detect_face(self, test_data):
        all_boxes = []  # #保存每一张图片的bboxes
        landmarks = []
        batch_idx = 0
        sum_time = 0
        t1_sum = 0
        t2_sum = 0
        t3_sum = 0
        num_of_img = test_data.size  #图像个数
        empty_array = np.array([])
        # test_data is iter_
        s_time = time.time()    #返回当前时间的时间戳
        for databatch in test_data:
            # databatch(image returned) #依次提取test_data里面的每一张图片，每提取一百张图片,打印进度和所耗费时间
            batch_idx += 1
            if batch_idx % 100 == 0:
                c_time = (time.time() - s_time )/100
                print("%d out of %d images done" % (batch_idx ,test_data.size))
                print('%f seconds for each image' % c_time)
                s_time = time.time()
            im = databatch
            # pnet
            if self.pnet_detector: #如果初始化的参数是PNet
                st = time.time()
                # ignore landmark
                boxes, boxes_c, landmark = self.detect_pnet(im) #这里调用了该类下的方法detect_pnet，

                t1 = time.time() - st
                sum_time += t1
                t1_sum += t1
                if boxes_c is None:
                    print("boxes_c is None...")
                    all_boxes.append(empty_array)
                    # pay attention
                    landmarks.append(empty_array)

                    continue
                #print(all_boxes)
            # rnet
            if self.rnet_detector:
                t = time.time()
                # ignore landmark
                boxes, boxes_c, landmark = self.detect_rnet(im, boxes_c)# 传入图片和pnet_detector返回的bbox坐标
                t2 = time.time() - t
                sum_time += t2
                t2_sum += t2
                if boxes_c is None:
                    all_boxes.append(empty_array)
                    landmarks.append(empty_array)
                    continue
            # onet
            if self.onet_detector:
                t = time.time()
                boxes, boxes_c, landmark = self.detect_onet(im, boxes_c)
                t3 = time.time() - t
                sum_time += t3
                t3_sum += t3
                if boxes_c is None:
                    all_boxes.append(empty_array)
                    landmarks.append(empty_array)
                    continue
            all_boxes.append(boxes_c)
            landmark = [1]
            landmarks.append(landmark)
        print('num of images', num_of_img)
        print("time cost in average" +
            '{:.3f}'.format(sum_time/num_of_img) +
            '  pnet {:.3f}  rnet {:.3f}  onet {:.3f}'.format(t1_sum/num_of_img, t2_sum/num_of_img,t3_sum/num_of_img))
        # num_of_data*9,num_of_data*10
        print('boxes length:',len(all_boxes))
        return all_boxes, landmarks

上面的类MtcnnDetector里面的方法detect_face()调用了类MtcnnDetector里面的方法detect_pnet()，并传入参数im，：

    def detect_pnet(self, im):
        """Get face candidates through pnet
        Parameters:
        ----------
        im: numpy array
            input image array
        Returns:
        -------
        boxes: numpy array
            detected boxes before calibration
        boxes_c: numpy array
            boxes after calibration
        """
        h, w, c = im.shape    #获得图片的宽、高、通道数
        net_size = 12
        current_scale = float(net_size) / self.min_face_size   #current_scale=12 / 20 = 0.6
        # 我们的卷积核是12*12，假设我们要求检测的最小人脸是min_face_size=6，我们必须要放大至12*12，这样，卷积后才能检测的到，如果直接用12*12卷积，人脸之战6*6，干扰过大，可能识别不出啦
        #假设我们要求检测的最小人脸是min_face_size=24，我们必须要缩放至12*12，否则12*12只能覆盖一部分人脸
        im_resized = self.processed_image(im, current_scale) #缩放图像0.6倍
        current_height, current_width, _ = im_resized.shape
        # fcn
        all_boxes = list()
        while min(current_height, current_width) > net_size:
            # return the result predicted by pnet
            # cls_cls_map : H*w*2
            # reg: H*w*4
            # class_prob andd bbox_pred
            cls_cls_map, reg = self.pnet_detector.predict(im_resized)   #找pnet_detector方法，这个方式用传入的detectors[0]=PNet来初始化，PNet是Detector类封装，所以最终是调用Detector类的predict函数，即self.pnet_detector = detectors[0] =  FcnDetector(P_Net, model_path[0])
            # 返回PNet网络的预测结果，得到class_prob 和 bbox_pred

            # boxes: num*9(x1,y1,x2,y2,score,x1_offset,y1_offset,x2_offset,y2_offset)
            boxes = self.generate_bbox(cls_cls_map[:, :, 1], reg, current_scale, self.thresh[0])
            #将宽高进一步缩放，形成图像金字塔，注意这里scale_factor默认为0.79，在宽高小于12之前一直进行此while循环
            current_scale *= self.scale_factor
            im_resized = self.processed_image(im, current_scale) #将im缩放0.79*0.6 倍
            current_height, current_width, _ = im_resized.shape   #获得新的高宽

            if boxes.size == 0:
                continue
             #从非极大值抑制算法获得index，此处的nms是针对一张图像上的矩形框
            keep = py_nms(boxes[:, :5], 0.5, 'Union')
            boxes = boxes[keep]  #根据索引提取出矩形框
            all_boxes.append(boxes)

        if len(all_boxes) == 0:
            return None, None, None

        all_boxes = np.vstack(all_boxes) #按照行顺序把数组给堆叠起来，表示整个金字塔所有图相上检测的矩形框
        # 针对金字塔所有矩形框作nms
        keep = py_nms(all_boxes[:, 0:5], 0.7, 'Union')
        all_boxes = all_boxes[keep]
        boxes = all_boxes[:, :5]

        bbw = all_boxes[:, 2] - all_boxes[:, 0] + 1
        bbh = all_boxes[:, 3] - all_boxes[:, 1] + 1

        # 得到bbox的坐标
        boxes_c = np.vstack([all_boxes[:, 0] + all_boxes[:, 5] * bbw,
                             all_boxes[:, 1] + all_boxes[:, 6] * bbh,
                             all_boxes[:, 2] + all_boxes[:, 7] * bbw,
                             all_boxes[:, 3] + all_boxes[:, 8] * bbh,
                             all_boxes[:, 4]])
        boxes_c = boxes_c.T

        return boxes, boxes_c, None

该函数中调用了 im_resized = self.processed_image(im, current_scale)，用于求出图像的初始尺寸

    def processed_image(self, img, scale):
        '''
        rescale/resize the image according to the scale
        :param img: image
        :param scale:
        :return: resized image
        '''
        height, width, channels = img.shape
        new_height = int(height * scale)  # resized new height
        new_width = int(width * scale)  # resized new width
        new_dim = (new_width, new_height)
        img_resized = cv2.resize(img, new_dim, interpolation=cv2.INTER_LINEAR)  # resized image
        # don't understand this operation
        img_resized = (img_resized - 127.5) / 128
        return img_resized

根据最小人脸检测尺寸与size12*12的比值，对原图进行放大或缩小，作为金字塔的底层图像。

detect_pnet()方法调用了类MtcnnDetector里面的方法generate_bbox()，并传入参数cls_cls_map[:, :, 1], reg, current_scale, self.thresh[0]

    def generate_bbox(self, cls_map, reg, scale, threshold):
        """
            generate bbox from feature cls_map according to the threshold
        Parameters:
        ----------
            cls_map: numpy array , n x m 
                detect score for each position
            reg: numpy array , n x m x 4,这里的回归量是偏差量
                bbox
            scale: float number
                scale of this detection
            threshold: float number
                detect threshold
        Returns:
        -------
            bbox array
        """
        stride = 2
        # stride = 4
        cellsize = 12
        # cellsize = 25

        #返回人脸分类概率大于0.6的样本的index
        t_index = np.where(cls_map > threshold)  #[0.6, 0.7, 0.7],这里选第一个PNet=0.6

        # #不存在对应的样本时返回空值
        if t_index[0].size == 0:
            return np.array([])
        dx1, dy1, dx2, dy2 = [reg[t_index[0], t_index[1], i] for i in range(4)]   # reg= n x m x 4，#得到对应bbox的offset

        reg = np.array([dx1, dy1, dx2, dy2])
        score = cls_map[t_index[0], t_index[1]]  #根据行列点的索引找对应的人脸概率
        boundingbox = np.vstack([np.round((stride * t_index[1]) / scale),
                                 np.round((stride * t_index[0]) / scale),
                                 np.round((stride * t_index[1] + cellsize) / scale),
                                 np.round((stride * t_index[0] + cellsize) / scale),
                                 score,
                                 reg])
        # 原始图片中回归框坐标需要经过反向运算，计算方式如下，其中cellSize=12，是因为12*12的图片进去后变成1*1
        # stride=2是因为几层卷积中只有一个stride为2，scale代表的是我们在哪个尺度金字塔的图像，
        return boundingbox.T

detect_pnet()方法还调用了类FcnDetector，并传入参数im_resized

class FcnDetector(object):
    #net_factory: which net
    #model_path: where the params'file is
    def __init__(self, net_factory, model_path):
        #create a graph
        graph = tf.Graph()
        with graph.as_default():
            ##在图中定义张量和运算 graph(-1,1)
            self.image_op = tf.placeholder(tf.float32, name='input_image')
            self.width_op = tf.placeholder(tf.int32, name='image_width')
            self.height_op = tf.placeholder(tf.int32, name='image_height')
            image_reshape = tf.reshape(self.image_op, [1, self.height_op, self.width_op, 3])
            #self.cls_prob=batch*2
            #self.bbox_pred =batch*4
            # 调用了PNet网络，得到训练PNet后的self.cls_prob和self.bbox_pred
            self.cls_prob, self.bbox_pred, _ = net_factory(image_reshape, training=False)
            
            #allow 
            self.sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=tf.GPUOptions(allow_growth=True)))
            saver = tf.train.Saver()
            #check whether the dictionary is valid
            model_dict = '/'.join(model_path.split('/')[:-1])
            ckpt = tf.train.get_checkpoint_state(model_dict)
            print(model_path)
            readstate = ckpt and ckpt.model_checkpoint_path
            assert  readstate, "the params dictionary is not valid"
            print("restore models' param")
            saver.restore(self.sess, model_path)
    def predict(self, databatch):
        height, width, _ = databatch.shape  #输入一张图
        # print(height, width)
        cls_prob, bbox_pred = self.sess.run([self.cls_prob, self.bbox_pred],
                                                           feed_dict={self.image_op: databatch, self.width_op: width,
                                                                      self.height_op: height})
        return cls_prob, bbox_pred

save_hard_example函数用于从PNet输出的检测结果中，提取矩形框保存，并进入TXT信息，作为RNet，ONet的训练样本：

def save_hard_example(net, data,save_path):
    # load ground truth from annotation file
    # format of each line: image/path [x1,y1,x2,y2] for each gt_box in this image
    #net=12
    im_idx_list = data['images']   #图像名
    gt_boxes_list = data['bboxes']   #人脸框，特征点
    num_of_images = len(im_idx_list)
    print("processing %d images in total" % num_of_images)
    # TXT文件用于写入图像名，label，offset等
    neg_label_file = "../../DATA/no_LM%d/neg_%d.txt" % (net, image_size)#  ../../DATA/no_LM12/neg_12.txt"
    neg_file = open(neg_label_file, 'w')

    pos_label_file = "../../DATA/no_LM%d/pos_%d.txt" % (net, image_size)
    pos_file = open(pos_label_file, 'w')

    part_label_file = "../../DATA/no_LM%d/part_%d.txt" % (net, image_size)
    part_file = open(part_label_file, 'w')
    #read detect result
    det_boxes = pickle.load(open(os.path.join(save_path, 'detections.pkl'), 'rb'))
    # print(len(det_boxes), num_of_images)
    print(len(det_boxes))
    print(num_of_images)
    assert len(det_boxes) == num_of_images, "incorrect detections or ground truths"

    # index of neg, pos and part face, used as their image names
    n_idx = 0
    p_idx = 0
    d_idx = 0
    image_done = 0
    for im_idx, dets, gts in zip(im_idx_list, det_boxes, gt_boxes_list):  #图像名，检测结果框，真实人脸框
        gts = np.array(gts, dtype=np.float32).reshape(-1, 4)
        if image_done % 100 == 0:
            print("%d images done" % image_done)
        image_done += 1

        if dets.shape[0] == 0:
            continue
        img = cv2.imread(im_idx)
        #change to square
        dets = convert_to_square(dets)
        dets[:, 0:4] = np.round(dets[:, 0:4])
        neg_num = 0
        for box in dets:  #dets里保存：是一张图中检测到的多个矩形框，一个一个框的遍历处理
            x_left, y_top, x_right, y_bottom, _ = box.astype(int)
            width = x_right - x_left + 1
            height = y_bottom - y_top + 1

            # 忽略过小和超出边界的矩形框
            if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[1] - 1 or y_bottom > img.shape[0] - 1:
                continue

            # 计算当前检测的框（1个）与真实框gts（多个）的iou
            Iou = IoU(box, gts)
            cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :]
            resized_im = cv2.resize(cropped_im, (image_size, image_size),
                                    interpolation=cv2.INTER_LINEAR)  #缩放成24*24

            # 保存负样本，并写入TXT
            if np.max(Iou) < 0.3 and neg_num < 60:
                #save the examples
                save_file = get_path(neg_dir, "%s.jpg" % n_idx)  #当前检测的框24*24存入  neg_dir='../../DATA/no_LM24/negative',生成这个路径
                # print(save_file)
                neg_file.write(save_file + ' 0\n')     # 在./../DATA/no_LM12/neg_12.txt"文件中写入：图像路径+label
                cv2.imwrite(save_file, resized_im)
                n_idx += 1
                neg_num += 1
            else:
                # find gt_box with the highest iou
                idx = np.argmax(Iou)
                assigned_gt = gts[idx]  #真实框
                x1, y1, x2, y2 = assigned_gt

                # x1是真实值，x_left是检测值，计算偏移量
                offset_x1 = (x1 - x_left) / float(width)
                offset_y1 = (y1 - y_top) / float(height)
                offset_x2 = (x2 - x_right) / float(width)
                offset_y2 = (y2 - y_bottom) / float(height)

                # 保存正样本和部分样本
                if np.max(Iou) >= 0.65:
                    save_file = get_path(pos_dir, "%s.jpg" % p_idx)
                    pos_file.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' % (
                        offset_x1, offset_y1, offset_x2, offset_y2))  #向TXT文件中写入（路径+label+offset
                    cv2.imwrite(save_file, resized_im)
                    p_idx += 1

                elif np.max(Iou) >= 0.4:  #part样本
                    save_file = os.path.join(part_dir, "%s.jpg" % d_idx)
                    part_file.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' % (
                        offset_x1, offset_y1, offset_x2, offset_y2))
                    cv2.imwrite(save_file, resized_im)
                    d_idx += 1
    neg_file.close()
    part_file.close()
    pos_file.close()

参考：https://blog.csdn.net/He_yuan_hong/article/details/85218240

菜鸟知识搬运工

关注

1
点赞
踩
11

收藏

觉得还不错? 一键收藏
1
评论
MTCNN-tensorflow源码解析-gen_hard_example.py生成RNet和ONet的训练数据

进入prepare_data文件夹打开gen_hard_example.py脚本，该脚本既可用于生成RNet，也可用于ONet,只要把主函数中：net = 'RNet' 改为 net = 'ONet'。该脚本只生成pos,neg,part三类样本。至于landmark样本，由gen_landmark_aug_24.py和gen_landmark_aug_48.py脚本生成gen_hard_e...
复制链接

扫一扫

专栏目录