MTCNN（Tensorflow）学习记录（生成P_Net人脸关键点的训练数据）

最新推荐文章于 2023-12-24 12:45:38 发布

He_yuan_hong

最新推荐文章于 2023-12-24 12:45:38 发布

阅读量3k

点赞数 3

文章标签： MTCNN Tensorflow

本文链接：https://blog.csdn.net/He_yuan_hong/article/details/84939321

版权

1 生成P_Net人脸关键点的训练数据

上一篇博客是生成P_Net人脸框的训练数据，这一篇博客是生成P_Net人脸关键点的训练数据。
进入prepare_data文件夹打开gen_landmark_aug_12，代码如下：

# coding: utf-8
import os
import random
from os.path import join, exists

import cv2
import numpy as np
import numpy.random as npr

from BBox_utils import getDataFromTxt, BBox
from Landmark_utils import rotate, flip
from utils import IoU
#首先导入各种包和库



def GenerateData(ftxt,data_path,net,argument=False):
    '''

    :param ftxt: name/path of the text file that contains image path,
                bounding box, and landmarks

    :param output: path of the output dir
    :param net: one of the net in the cascaded networks
    :param argument: apply augmentation or not
    :return:  images and related landmarks
    '''
    #  定义函数GenerateData()，其中
    #：参数ftxt表示包含了图片路径的文档的路径
    #：参数data_path表示输出目录的路径
    #：参数net表示三个级联网络中的一个
    #：参数arguement表示
    if net == "PNet":
        size = 12
    elif net == "RNet":
        size = 24
    elif net == "ONet":
        size = 48
    else:
        print('Net type error')
        return
                                                              #判断输入进来的是三个网络中的哪一个，得出size的取值
    image_id = 0                                              #初始化image_id的取值
    f = open(join(OUTPUT,"landmark_%s_aug.txt" %(size)),'w')  #打开路径对应的文档
    #dstdir = "train_landmark_few"
    # get image path , bounding box, and landmarks from file 'ftxt'
    
    data = getDataFromTxt(ftxt,data_path=data_path)     #函数的调用
    idx = 0
    #image_path bbox landmark(5*2)
    for (imgPath, bbox, landmarkGt) in data:
        #print imgPath
        F_imgs = []									 #列表的事先定义
        F_landmarks = []							     #列表的事先定义
        #print(imgPath)
        img = cv2.imread(imgPath)					 #图片的读取

        assert(img is not None)			   			 #判断图片是否存在
        img_h,img_w,img_c = img.shape				 #获取图片高、宽、通道数
        gt_box = np.array([bbox.left,bbox.top,bbox.right,bbox.bottom])  #(x1,y1,x2,y2)
        #get sub-image from bbox
        f_face = img[bbox.top:bbox.bottom+1,bbox.left:bbox.right+1]     #获得人脸框
        # resize the gt image to specified size
        f_face = cv2.resize(f_face,(size,size))						 #resize成12*12
        #initialize the landmark
        landmark = np.zeros((5, 2)) 									 #初始化人脸关键点

        #normalize land mark by dividing the width and height of the ground truth bounding box
        # landmakrGt is a list of tuples
        for index, one in enumerate(landmarkGt):
            # (( x - bbox.left)/ width of bounding box, (y - bbox.top)/ height of bounding box
            rv = ((one[0]-gt_box[0])/(gt_box[2]-gt_box[0]), (one[1]-gt_box[1])/(gt_box[3]-gt_box[1]))
            # put the normalized value into the new list landmark
            landmark[index] = rv
        '''操作的内容看懂了，但是为什么这么做还是不懂，先记在这里'''
        F_imgs.append(f_face)
        F_landmarks.append(landmark.reshape(10))
        landmark = np.zeros((5, 2))        
        if argument:
            idx = idx + 1
            if idx % 100 == 0:
                print(idx, "images done")
            x1, y1, x2, y2 = gt_box
            #gt's width
            gt_w = x2 - x1 + 1
            #gt's height
            gt_h = y2 - y1 + 1        
            if max(gt_w, gt_h) < 40 or x1 < 0 or y1 < 0:
                continue
            #random shift
            #这部分内容上一篇博客里有，就不重复了，是在人脸框附近做一个随机的裁剪框
            for i in range(10):
                bbox_size = npr.randint(int(min(gt_w, gt_h) * 0.8), np.ceil(1.25 * max(gt_w, gt_h)))
                delta_x = npr.randint(-gt_w * 0.2, gt_w * 0.2)
                delta_y = npr.randint(-gt_h * 0.2, gt_h * 0.2)
                nx1 = int(max(x1+gt_w/2-bbox_size/2+delta_x,0))
                ny1 = int(max(y1+gt_h/2-bbox_size/2+delta_y,0))

                nx2 = nx1 + bbox_size
                ny2 = ny1 + bbox_size
                if nx2 > img_w or ny2 > img_h:                            #抛弃错误的裁剪边框  
                    continue
                crop_box = np.array([nx1,ny1,nx2,ny2])


                cropped_im = img[ny1:ny2+1,nx1:nx2+1,:]				   #裁剪边框
                resized_im = cv2.resize(cropped_im, (size, size))		   #resize成12*12
                #cal iou
                iou = IoU(crop_box, np.expand_dims(gt_box,0))			   #计算IoU值
                if iou > 0.65:
                    F_imgs.append(resized_im)
                    #normalize
                    #和之前一样的操作
                    for index, one in enumerate(landmarkGt):
                        rv = ((one[0]-nx1)/bbox_size, (one[1]-ny1)/bbox_size)
                        landmark[index] = rv
                    F_landmarks.append(landmark.reshape(10))
                    landmark = np.zeros((5, 2))
                    landmark_ = F_landmarks[-1].reshape(-1,2)
                    bbox = BBox([nx1,ny1,nx2,ny2])                    

                    #mirror
                    #随机镜像                    
                    if random.choice([0,1]) > 0:
                        face_flipped, landmark_flipped = flip(resized_im, landmark_)  #水平复制后的人脸框和人脸坐标
                        face_flipped = cv2.resize(face_flipped, (size, size))         #resize操作
                        #c*h*w
                        F_imgs.append(face_flipped)  					  #人脸的保存
                        F_landmarks.append(landmark_flipped.reshape(10))#关键点的保存
                    #rotate
                    #随机旋转
                    if random.choice([0,1]) > 0:
                        face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \
                                                                         bbox.reprojectLandmark(landmark_), 5)#获得旋转后的face和landmark
                        #landmark_offset
                        #对于landmark的偏移
                        landmark_rotated = bbox.projectLandmark(landmark_rotated)
                        face_rotated_by_alpha = cv2.resize(face_rotated_by_alpha, (size, size))
                        F_imgs.append(face_rotated_by_alpha)
                        F_landmarks.append(landmark_rotated.reshape(10))
                
                        #flip
                        #在进行一次水平翻转操作，和上面一样
                        face_flipped, landmark_flipped = flip(face_rotated_by_alpha, landmark_rotated)
                        face_flipped = cv2.resize(face_flipped, (size, size))
                        F_imgs.append(face_flipped)
                        F_landmarks.append(landmark_flipped.reshape(10))                
                    
                    #anti-clockwise rotation
                    #顺时针旋转5度，步骤内容和上面一样
                    if random.choice([0,1]) > 0: 
                        face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \
                                                                         bbox.reprojectLandmark(landmark_), -5)#顺时针旋转
                        landmark_rotated = bbox.projectLandmark(landmark_rotated)
                        face_rotated_by_alpha = cv2.resize(face_rotated_by_alpha, (size, size))
                        F_imgs.append(face_rotated_by_alpha)
                        F_landmarks.append(landmark_rotated.reshape(10))
                		   #水平翻转操作
                        face_flipped, landmark_flipped = flip(face_rotated_by_alpha, landmark_rotated)
                        face_flipped = cv2.resize(face_flipped, (size, size))
                        F_imgs.append(face_flipped)
                        F_landmarks.append(landmark_flipped.reshape(10)) 
            #图片和landmark的asarray化   
            F_imgs, F_landmarks = np.asarray(F_imgs), np.asarray(F_landmarks)
            #print F_imgs.shape
            #print F_landmarks.shape
            for i in range(len(F_imgs)):
                #if image_id % 100 == 0:

                    #print('image id : ', image_id)
				 #np.where(a,b,c):a为真时，返回b，不然返回c
                if np.sum(np.where(F_landmarks[i] <= 0, 1, 0)) > 0:
                    continue

                if np.sum(np.where(F_landmarks[i] >= 1, 1, 0)) > 0:
                    continue
				 #当landmark的值在(0，1)之外时，舍弃错误的数据，将正确的landmark和图片保存
                cv2.imwrite(join(dstdir,"%d.jpg" %(image_id)), F_imgs[i])
                landmarks = map(str,list(F_landmarks[i]))
                f.write(join(dstdir,"%d.jpg" %(image_id))+" -2 "+" ".join(landmarks)+"\n")
                image_id = image_id + 1
            
    #print F_imgs.shape
    #print F_landmarks.shape
    #F_imgs = processImage(F_imgs)
    #shuffle_in_unison_scary(F_imgs, F_landmarks)
    
    f.close()
    return F_imgs,F_landmarks

if __name__ == '__main__':
    dstdir = "../../DATA/12/train_PNet_landmark_aug"
    OUTPUT = '../../DATA/12'
    data_path = '../../DATA'
    if not exists(OUTPUT):
        os.mkdir(OUTPUT)
    if not exists(dstdir):
        os.mkdir(dstdir)
    assert (exists(dstdir) and exists(OUTPUT)) #目录的创建
    # train data
    net = "PNet"
    #the file contains the names of all the landmark training data
    train_txt = "trainImageList.txt"
    imgs,landmarks = GenerateData(train_txt,data_path,net,argument=True )

这里用到了flip函数和getDataFromTxt函数和rotate函数。输出结果是一个包含图片的文件夹train_PNet_landmark_aug和一个包含了图片信息的注释文档landmark_12_aug.txt。注释文档里面每行有12个参数，第1个是表示路径，第二个是表示这是个landmark数据的label值-2，最后10个数据是人脸关键点的坐标。
下一篇博客将两份数据集合并在一起。

He_yuan_hong

关注

3
点赞
踩
9

收藏

觉得还不错? 一键收藏
15
评论
MTCNN（Tensorflow）学习记录（生成P_Net人脸关键点的训练数据）

1 训练PNet的数据的采样代码上一篇博客是生成P_Net人脸框的训练数据，这一篇博客是生成P_Net人脸关键点的训练数据。进入prepare_data文件夹打开gen_landmark_aug_12，代码如下：# coding: utf-8import osimport randomfrom os.path import join, existsimport cv2import...
复制链接

扫一扫