【tensorflow】MTCNN网络Pnet数据生成（人脸数据）

最新推荐文章于 2021-04-06 17:53:56 发布
胖子工作室
最新推荐文章于 2021-04-06 17:53:56 发布
阅读量541
点赞数 1
分类专栏： tensorflow python 文章标签： Pnet数据生成
本文链接：https://blog.csdn.net/zhouzongzong/article/details/94892675
版权
python 同时被 2 个专栏收录
38 篇文章 0 订阅
订阅专栏
tensorflow
37 篇文章 0 订阅
订阅专栏
Pnet数据生成：数据源下载链接（ https://download.csdn.net/download/zhouzongzong/11290551 ）
已经通过验证。
# coding:utf-8
import os
import cv2
import numpy as np
import numpy.random as npr
def IoU(box, boxes):
    """Compute IoU between detect box and gt boxes
    Parameters:
    ----------
    box: numpy array , shape (4, ): x1, y1, x2, y2
        predicted boxes
    boxes: numpy array, shape (n, 4): x1, x2, y1, y2
        input ground truth boxes

    Returns:
    -------
    ovr: numpy.array, shape (n, )
        IoU
    """
    # 函数的传入参数为box(随机裁剪后的框）和boxes（实际人脸框）
    box_area = (box[2] - box[0] + 1) * (box[3] - box[1] + 1)
    # 计算随机裁剪后的框的面积，因为传入的box是以x1, y1, x2, y2这样的数组形式，所以分别对应着左上角的顶点坐标和右下角的顶点坐标，根据这两个坐
    # 标点就可以确定出了一个裁剪框，然后横纵坐标的差值的乘积就是随机裁剪框的面积，
    area = (boxes[:, 1] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 2] + 1)
    # 同上，得出的是实际的人脸框的面积，但是这里要注意一点，因为一张图片的人脸是一个或者多个，所以说实际的boxes是个n行4列的数组，n>=1,n表示实
    # 际人脸的个数。故这里用到了boxes[:,2]-boxes[:,0]这样的写法，意思是取出所有维数的第3个元素减去对应的第1个元素，然后加上一，这样就把n个人
    # 脸对应的各自的面积存进了area这个数组里面
    xx1 = np.maximum(box[0], boxes[:, 0])  # 将随机裁剪框的x1和各个人脸的x1比较，得到较大的xx1
    yy1 = np.maximum(box[1], boxes[:, 2])  # 将随机裁剪框的y1和各个人脸的y1比较，得到较大的yy1
    xx2 = np.minimum(box[2], boxes[:, 1])  # 将随机裁剪框的x2和各个人脸的x2比较，得到较小的xx2
    yy2 = np.minimum(box[3], boxes[:, 3])  # 将随机裁剪框的y2和各个人脸的y2比较，得到较小的yy2
    # 这样做的目的是得出两个图片交叉重叠区域的矩形的左上角和右下角坐标

    # compute the width and height of the bounding box
    h = np.maximum(0, xx2 - xx1 + 1)

    w = np.maximum(0, yy2 - yy1 + 1)


    inter = w * h  # 求得重叠区域的面积
    ovr = inter / (box_area + area - inter)  # 重叠区域的面积除以真实人脸框的面积与随机裁剪区域面积的和减去重叠区域的面积就是重合率
    return ovr  # 返回重合率



anno_file = "C:/Desktop/train/trainImageList.txt"  # 下载的wider face数据集对应的每张图片的人脸方框数据
im_dir = "C:\\Users\\Desktop\\train"  # 将图片解压到这个文件夹
pos_save_dir = "E:/MTCNN/12/positive"  # 生成的正样本存放路径
part_save_dir = "E:/MTCNN/12/part"  # 生成的无关样本存放路径
neg_save_dir = 'E:/MTCNN/12/negative'  # 生成的负样本存放路径
save_dir = "E:/MTCNN/12"
if not os.path.exists(save_dir):  # 路径的创建
    os.makedirs(save_dir)
if not os.path.exists(pos_save_dir):
    os.makedirs(pos_save_dir)
if not os.path.exists(part_save_dir):
    os.makedirs(part_save_dir)
if not os.path.exists(neg_save_dir):
    os.makedirs(neg_save_dir)

f1 = open(os.path.join(save_dir, 'pos_12.txt'), 'w')  # 对应的样本的文档建立
f2 = open(os.path.join(save_dir, 'neg_12.txt'), 'w')
f3 = open(os.path.join(save_dir, 'part_12.txt'), 'w')
with open(anno_file, 'r') as f:
    annotations = f.readlines()  # 按行读取存放进列表annotations里面
num = len(annotations)  # 里面的每一个元素对应着一张照片的人脸数据，所以这个列表的大小就是数据集的照片数量。
print("%d pics in total" % num)  # 打印出照片的数量
p_idx = 0  # positive
n_idx = 0  # negative
d_idx = 0  # don't care
idx = 0
box_idx = 0
for annotation in annotations:  # for循环读取数据
    print(annotation)
    annotation = annotation.strip().split(' ')  # 去掉每一行数据的首尾空格换行字符，同时以空格为界限，分成一个个的字符
    # image path
    im_path = annotation[0]  # 第0号元素代表的是一个路径
    # print(im_path)
    # boxed change to float type
    bbox = list(map(float, annotation[1:5]))  # 第1号元素开始到第4个元素，每四个元素代表着一个人脸框
    # gt
    print(bbox)
    boxes = np.array(bbox, dtype=np.float32).reshape(-1, 4)  # 将人脸框的坐标进行reshape操作，变成n行4列的array
    # load image

    path = os.path.join(im_dir, im_path )
    path = path.replace('\\', '/')
    print(path)
    img = cv2.imread(os.path.join(im_dir, im_path ))  # 将路径拼接后读取图片
    idx += 1
    # if idx % 100 == 0:
    # print(idx, "images done")

    height, width, channel = img.shape  # 读取图片的宽、高、通道数并记录下来

    neg_num = 0  # 负样本数初始化为0
    # 1---->50
    # keep crop random parts, until have 50 negative examples
    # get 50 negative sample from every image
    while neg_num < 5:  # 负样本数小于50的时候
        # neg_num's size [40,min(width, height) / 2],min_size:40
        # size is a random number between 12 and min(width,height)
        size = npr.randint(12, min(width, height) / 2)  # size是一个随机数
        # top_left coordinate
        nx = npr.randint(0, width - size)  # 左上方的x坐标是一个随机数
        ny = npr.randint(0, height - size)  # 左上方的y坐标是一个随机数
        # random crop
        crop_box = np.array([nx, ny, nx + size, ny + size])  # 随机裁剪的样本
        print(crop_box)
        # calculate iou
        Iou = IoU(crop_box, boxes)  # 引入Iou()函数，含有两个参数，随机裁剪的样本crop_box和实际的人脸框boxes,计
        # 算出Iou()值

        # crop a part from inital image
        cropped_im = img[ny: ny + size, nx: nx + size, :]  # 将这个部分样本裁剪下来
        # resize the cropped image to size 12*12
        resized_im = cv2.resize(cropped_im, (12, 12),  # resize这个样本成12*12
                                interpolation=cv2.INTER_LINEAR)

        if np.max(Iou) < 0.3:  # 当Iou的值小于0.3的时候为负样本
            # Iou with all gts must below 0.3
            save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
            f2.write("E:/MTCNN/12/negative/%s.jpg" % n_idx + ' 0\n')  # 样本的路径保存下来
            cv2.imwrite(save_file, resized_im)  # 图片保存下来
            n_idx += 1
            neg_num += 1

    # for every bounding boxes
    for box in boxes:
        # box (x_left, x_right,y_top , y_bottom)
        x1, x2, y1, y2 = box
        # gt's width
        w = x2 - x1 + 1
        # gt's height
        h = y2 - y1 + 1
        # 获取每一个样本的宽和高

        # in case the ground truth boxes of small faces are not accurate
        # 忽略一些小的人脸和那些左顶点超出了图片的人脸框
        # 防止那些小人脸的坐标不准确
        if max(w, h) < 20 or x1 < 0 or y1 < 0:
            continue

        # 下面仍然是返回5个负样本，但是返回的样本一定是和真实的人脸框有一定的交集，即(0<IoU<0.3)，上面返回的50负样本是不一定和真实人脸框有交集
        for i in range(2):
            # size of the image to be cropped
            size = npr.randint(12, min(width, height) / 2)
            # parameter high of randint make sure there will be intersection between bbox and cropped_box
            delta_x = npr.randint(max(-size, -x1), w)  # 求取(-size和-x1的最大值可以保证x1+delta_x一定大于等于0，
            delta_y = npr.randint(max(-size, -y1), h)  # 同上
            # max here not really necessary
            nx1 = int(max(0, x1 + delta_x))  # 得到x1的偏移坐标nx1
            ny1 = int(max(0, y1 + delta_y))  # 得到y1的偏移坐标ny1

            # 如果裁剪框的右下坐标超出了图片的范围就跳过此次循环，进行下一次裁剪，注意这里的width是原始图片的宽度，不是真实人脸框的宽w
            if nx1 + size > width or ny1 + size > height:
                continue
            crop_box = np.array([nx1, ny1, nx1 + size, ny1 + size])  # 获取裁剪后的矩形框
            Iou = IoU(crop_box, boxes)  # 计算IoU值

            cropped_im = img[ny1: ny1 + size, nx1: nx1 + size, :]
            # 图片resize到12*12
            resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR)
            # 将符合条件的样本框保存，完成这部操作之后每张图片都生成了55个负样本
            if np.max(Iou) < 0.3:
                # Iou with all gts must below 0.3
                save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
                f2.write("E:/MTCNN/12/negative/%s.jpg" % n_idx + ' 0\n')
                cv2.imwrite(save_file, resized_im)
                n_idx += 1

        # 生成正样本和无关样本
        for i in range(3):
            # pos and part face size [minsize*0.8,maxsize*1.25]
            # 设置正样本和部分样本的size
            size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h)))

            # delta here is the offset of box center
            if w < 5:
                print(w)
                continue

            # x1和y1的偏移量
            delta_x = npr.randint(-w *0.2, w * 0.2)
            delta_y = npr.randint(-h *0.2, h * 0.2)

            # deduct size/2 to make sure that the right bottom corner will be out of
            # nx1是人脸框的中点的x坐标加减0.2倍宽度再减去一半的size和0之间的最大值
            # ny1是人脸框的中点的y坐标加减0.2倍高度再减去一半的size和0之间的最大值
            nx1 = int(max(x1 + w / 2 + delta_x - size / 2, 0))
            ny1 = int(max(y1 + h / 2 + delta_y - size / 2, 0))
            nx2 = nx1 + size  # 获得右下角的nx2坐标
            ny2 = ny1 + size  # 获得右下角的ny2坐标

            # 去掉超出图片的的坐标点
            if nx2 > width or ny2 > height:
                continue
            crop_box = np.array([nx1, ny1, nx2, ny2])
            # yu gt de offset
            # 这是一个bounding box regression操作
            offset_x1 = (x1 - nx1) / float(size)
            offset_y1 = (y1 - ny1) / float(size)
            offset_x2 = (x2 - nx2) / float(size)
            offset_y2 = (y2 - ny2) / float(size)
            # 裁剪图片
            cropped_im = img[ny1: ny2, nx1: nx2, :]
            # resize操作
            resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR)

            box_ = box.reshape(1, -1)  # reshape成行数等于一列数未知的数组
            iou = IoU(crop_box, box_)  # 计算IoU值
            if iou >= 0.65:  # 保存为正样本
                save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx)
                f1.write("E:/MTCNN/12/positive/%s.jpg" % p_idx + ' 1 %.2f %.2f %.2f %.2f\n' % (
                offset_x1, offset_y1, offset_x2, offset_y2))
                cv2.imwrite(save_file, resized_im)
                p_idx += 1
            elif iou >= 0.4:  # 保存为部分样本
                save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx)
                f3.write("E:/MTCNN/12/part/%s.jpg" % d_idx + ' -1 %.2f %.2f %.2f %.2f\n' % (
                offset_x1, offset_y1, offset_x2, offset_y2))
                cv2.imwrite(save_file, resized_im)
                d_idx += 1
        box_idx += 1
        if idx % 100 == 0:
            print("%s images done, pos: %s part: %s neg: %s" % (idx, p_idx, d_idx, n_idx))
f1.close()
f2.close()
f3.close()
在这里插入图片描述