基于TensorFlow的MTCNN人脸检测算法（生成PNet的人脸数据样本代码注解）

最新推荐文章于 2022-09-26 09:13:47 发布
Home丶Basic
最新推荐文章于 2022-09-26 09:13:47 发布
阅读量343
点赞数
分类专栏： MTCNN
本文链接：https://blog.csdn.net/weixin_44650248/article/details/91043380
版权
MTCNN 专栏收录该内容
6 篇文章 0 订阅
订阅专栏
代码源自Github:https://github.com/AITTSMD/MTCNN-Tensorflow
该阶段代码取自 Git主代码目录下 MTCNN-Tensorflow-master / prepare_data / gen_12net_data.py
#coding:utf-8
import os   #满足不同系统路径适应要求
import cv2   
import numpy as np
import numpy.random as npr   #简化矩阵模块中随机函数表示为npr

from prepare_data.utils import IoU		#从...中加载IOU模块

##文件读取和存储路径
anno_file = "wider_face_train.txt"
im_dir = ".../DATA/WIDER_train/images"
pos_save_dir = ".../DATA/12/positive" 			#生成的**正样本**存放路径
part_save_dir = ".../DATA/12/part"				#生成的**无关样本**存放路径
neg_save_dir = '.../DATA/12/negative'			#生成的**负样本**存放路径
save_dir = ".../DATA/12"

##如果文件路径不存在，则创建该路径
if not os.path.exists(save_dir):
    os.mkdir(save_dir)
if not os.path.exists(pos_save_dir):
    os.mkdir(pos_save_dir)
if not os.path.exists(part_save_dir):
    os.mkdir(part_save_dir)
if not os.path.exists(neg_save_dir):
    os.mkdir(neg_save_dir)

##对应的样本的文档建立，open（，）以“写入”的方式打开.text，join（，）将路径结合在一起
f1 = open(os.path.join(save_dir, 'pos_12.txt'), 'w')
f2 = open(os.path.join(save_dir, 'neg_12.txt'), 'w')
f3 = open(os.path.join(save_dir, 'part_12.txt'), 'w')

with open(anno_file, 'r') as f:
    annotations = f.readlines()			#按行读取并存入列表annotations里面
num = len(annotations)					#里面的每一个元素对应着一张照片的人脸数据，所以这个列表的大小就是数据集的照片数量
print("%d pics in total" % num)			#照片数量为num，并将其打印出来

p_idx = 0 								# positive
n_idx = 0								# negative
d_idx = 0 								# don't care
idx = 0
box_idx = 0
for annotation in annotations:										#for循环来读取数据
    annotation = annotation.strip().split(' ')						#涉及strip().split(' ')	函数，意思为去掉annotations列表里每一行的首、尾处空格、换行字符，并以空格为界限拆分成一个个的字符！注意Python中的字代表着一个字符串！！！
    
    im_path = annotation[0]											#image_path，序列号为0的元素（即第一个元素）代表路径
    #print(im_path)
    #boxed change to float type
    bbox = list(map(float, annotation[1:]))							#从序列号为1的元素，即第二个元素开始到结束，每四个元素代表着一个人脸框；map(，)函数会根据提供的函数对指定序列做映射，然后转为float型列表
    #gt
    boxes = np.array(bbox, dtype=np.float32).reshape(-1, 4)			#将人脸框的坐标进行reshape，变成n行4列的array（阵列），reshape（-1，x）中的-1代表不知道几行，用-1来表示
    #load image
    img = cv2.imread(os.path.join(im_dir, im_path + '.jpg'))		#路径拼接，然后读取图片
    idx += 1
    #if idx % 100 == 0:												##英文翻译：如果读取的数量idx对100取余==0，那么打印idx
        #print(idx, "images done")

    height, width, channel = img.shape								#读取图片的宽、高、通道数并记录下来，.shape（）函数显示的是阵列的形状，比如几行几列几层，对应着图片的参数就是高度、宽度、通道数

    neg_num = 0														#将负样本50个循环计数器数量初始化为0
    #1---->50
    # keep crop random parts, until have 50 negative examples       ##英文翻译：持续裁剪，直到产生50个负样本；也就意味着从每个图片那里得到50个负样本
    # get 50 negative sample from every image
    while neg_num < 50:												#当负样本数量<50时，执行当前循环
        #neg_num's size [40,min(width, height) / 2],min_size:40		#
        # size is a random number between 12 and min(width,height)
        size = npr.randint(12, min(width, height) / 2)				#size是一个随机数，.randint(a,b)用于生成一个指定范围内的整数。其中参数a是下限,参数b是上限,生成的随机数满足: a <= n <= b
        #top_left coordinate										##英文翻译：左上角坐标
        nx = npr.randint(0, width - size)							#左上方的x坐标是一个随机数
        ny = npr.randint(0, height - size)							#左上方的y坐标是一个随机数
        #random crop												##随机剪裁
        crop_box = np.array([nx, ny, nx + size, ny + size])			#随机裁剪的样本
        #calculate iou
        Iou = IoU(crop_box, boxes)									#引入Iou()函数，含有两个参数，随机裁剪的样本crop_box和实际的人脸框boxes,计算出Iou()值

        #crop a part from inital image
        cropped_im = img[ny : ny + size, nx : nx + size, :]			#将这个部分样本从原始图片上裁剪下来
        #resize the cropped image to size 12*12
        resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR)
        															#将裁剪下来的部分resize为12*12，插值为双线性插值（默认）


        if np.max(Iou) < 0.3:										#当Iou的值小于0.3的时候为负样本
            # Iou with all gts must below 0.3
            save_file = os.path.join(neg_save_dir, "%s.jpg"%n_idx)
            f2.write(".../DATA/12/negative/%s.jpg"%n_idx + ' 0\n')	#样本的路径保存下来
            cv2.imwrite(save_file, resized_im)						#把负样本图片保存下来
            n_idx += 1
            neg_num += 1


    #for every bounding boxes
    for box in boxes:												#历遍所有边界框
        # box (x_left, y_top, x_right, y_bottom)
        x1, y1, x2, y2 = box
        #gt's width
        w = x2 - x1 + 1
        #gt's height
        h = y2 - y1 + 1												#求样本的宽和高


        # ignore small faces and those faces has left-top corner out of the image   ##英文翻译：忽略（滤除）太小的人脸和那些左上角溢出图片的人脸框，防止那些太小的人脸边界框不够准确
        # in case the ground truth boxes of small faces are not accurate
       	if max(w, h) < 20 or x1 < 0 or y1 < 0:
            continue

        # crop another 5 images near the bounding box if IoU less than 0.5, save as negative samples
        #下面仍然是返回另外5个负样本，但是这5个返回的样本一定是和真实的人脸框有一定的交集，即(0<IoU<0.3)，上面返回的50个负样本是不一定和真实人脸框有交集
        for i in range(5):
            #size of the image to be cropped
            size = npr.randint(12, min(width, height) / 2)
            # delta_x and delta_y are offsets of (x1, y1)   						# delta_x和delta_y是(x1, y1)的偏移量
            # max can make sure if the delta is a negative number , x1+delta_x >0
            # parameter high of randint make sure there will be intersection between bbox and cropped_box
            delta_x = npr.randint(max(-size, -x1), w) 								#求(-size和-x1之间的最大值可以保证x1+delta_x一定大于等于0
            delta_y = npr.randint(max(-size, -y1), h)								#求(-size和-y1之间的最大值可以保证y1+delta_y一定大于等于0
            																		##这个max函数放在这里觉得并不是必要的
            nx1 = int(max(0, x1 + delta_x))											#得到x1的偏移坐标nx1
            ny1 = int(max(0, y1 + delta_y))											#得到y1的偏移坐标ny1
            
            
            # if the right bottom point is out of image then skip					#如果裁剪图片的右下坐标超出了图片范围就跳过此次循环，进行下一次截图（注意：这里的width是指的原始图片的宽度，而不是真实人脸框的宽度）
            if nx1 + size > width or ny1 + size > height:
                continue
            
            crop_box = np.array([nx1, ny1, nx1 + size, ny1 + size])					#获取裁剪后的矩形框（可以确定矩形框位置）
            Iou = IoU(crop_box, boxes)												#计算IoU(crop_box, boxes)值
    
            cropped_im = img[ny1: ny1 + size, nx1: nx1 + size, :]					#裁剪下图片的位置坐标
            #rexize cropped image to be 12 * 12
            resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR)  #将裁剪下的图片resize到12*12
            
            #接下来将符合条件的样本框保存，每个图片都生成了55个负样本
            if np.max(Iou) < 0.3:
                # Iou with all gts must below 0.3
                save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
                f2.write(".../DATA/12/negative/%s.jpg" % n_idx + ' 0\n')
                cv2.imwrite(save_file, resized_im)
                n_idx += 1


        #接下来，生成正样本和无关样本


        for i in range(20):
            # pos and part face size [minsize*0.8,maxsize*1.25]
            size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h)))			#设置正样本和无关样本的size

            # delta here is the offset of box center									#这里的delta指的是边框中心的偏移量
            if w<5:
                print (w)
                continue
            #print (box)
            #x1和y1的偏移量
            delta_x = npr.randint(-w * 0.2, w * 0.2)
            delta_y = npr.randint(-h * 0.2, h * 0.2)

            #show this way: nx1 = max(x1+w/2-size/2+delta_x)
            #show this way: ny1 = max(y1+h/2-size/2+delta_y)
            # x1+ w/2 is the central point, then add offset , then deduct size/2（人脸框中点坐标是x1+ w/2）
            # deduct size/2 to make sure that the right bottom corner will be out of
            #nx1是人脸框的中点的x坐标加减0.2倍宽度再减去一半的size、和0之间的最大值
            #ny1是人脸框的中点的y坐标加减0.2倍高度再减去一半的size、和0之间的最大值
            nx1 = int(max(x1 + w / 2 + delta_x - size / 2, 0))
            ny1 = int(max(y1 + h / 2 + delta_y - size / 2, 0))
            #获得右下角nx2和ny2的坐标
            nx2 = nx1 + size
            ny2 = ny1 + size
			
			#去掉溢出图片的坐标点
            if nx2 > width or ny2 > height:
                continue 
            
            crop_box = np.array([nx1, ny1, nx2, ny2])
            #yu gt de offset
            #接下来这是一个bounding box regression即边框回归
            offset_x1 = (x1 - nx1) / float(size)
            offset_y1 = (y1 - ny1) / float(size)
            offset_x2 = (x2 - nx2) / float(size)
            offset_y2 = (y2 - ny2) / float(size)
            #crop img
            cropped_im = img[ny1 : ny2, nx1 : nx2, :]
            #resize img
            resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR)


            box_ = box.reshape(1, -1)											#把box边框reshape成一行未知列的数组
            iou = IoU(crop_box, box_)											#计算IoU(crop_box, box_)值
            
            if iou  >= 0.65:													#保存正样本
                save_file = os.path.join(pos_save_dir, "%s.jpg"%p_idx)
                f1.write(".../DATA/12/positive/%s.jpg"%p_idx + ' 1 %.2f %.2f %.2f %.2f\n'%(offset_x1, offset_y1, offset_x2, offset_y2))
                cv2.imwrite(save_file, resized_im)
                p_idx += 1
            elif iou >= 0.4:													#保存无关样本
                save_file = os.path.join(part_save_dir, "%s.jpg"%d_idx)
                f3.write(".../DATA/12/part/%s.jpg"%d_idx + ' -1 %.2f %.2f %.2f %.2f\n'%(offset_x1, offset_y1, offset_x2, offset_y2))
                cv2.imwrite(save_file, resized_im)
                d_idx += 1
        box_idx += 1
        #输出图片处理进度信息
        if idx % 100 == 0:
            print("%s images done, pos: %s part: %s neg: %s" % (idx, p_idx, d_idx, n_idx))
f1.close()
f2.close()
f3.close()