基于TensorFlow的MTCNN人脸检测算法（生成PNet的人脸关键点数据代码注解）

最新推荐文章于 2024-05-26 09:47:58 发布

Home丶Basic

最新推荐文章于 2024-05-26 09:47:58 发布

阅读量466

点赞数

分类专栏： MTCNN

本文链接：https://blog.csdn.net/weixin_44650248/article/details/91360277

版权

MTCNN 专栏收录该内容

6 篇文章 0 订阅

订阅专栏

代码源自Github:https://github.com/AITTSMD/MTCNN-Tensorflow
该阶段代码取自 Git主代码目录下 MTCNN-Tensorflow-master / prepare_data / gen_landmark_aug_12.py

# coding: utf-8
import os
import random
from os.path import join, exists

import cv2
import numpy as np
import numpy.random as npr

from prepare_data.BBox_utils import getDataFromTxt, BBox
from prepare_data.Landmark_utils import rotate, flip
from prepare_data.utils import IoU



#定义随机数据表格生成（param：参数）
def GenerateData(ftxt,data_path,net,argument=False):
    '''
    :param ftxt: name/path of the text file that contains image path,
                bounding box, and landmarks
    :param output: path of the output dir
    :param net: one of the net in the cascaded networks（级联网络）
    :param argument: apply augmentation or not
    :return:  images and related landmarks
    定义函数GenerateData()，其中参数ftxt表示包含了图片路径的文档的路径
    					   参数data_path表示输出目录的路径
    					   参数net表示三个级联网络中的一个
    '''
    #判断输入进来的是三个网络中的哪一个，得出size的取值
    if net == "PNet":
        size = 12
    elif net == "RNet":
        size = 24
    elif net == "ONet":
        size = 48
    else:
        print('Net type error')
        return
    image_id = 0
    
    #以写入的方式打开以下目录文件
    f = open(join(OUTPUT,"landmark_%s_aug.txt" %(size)),'w')
    #dstdir = "train_landmark_few"
    # get image path , bounding box, and landmarks from file 'ftxt'
    data = getDataFromTxt(ftxt,data_path=data_path)								#函数调用
    idx = 0
    #image_path bbox landmark(5*2)
    for (imgPath, bbox, landmarkGt) in data:
        #print imgPath
        F_imgs = []																#列表的事先定义
        F_landmarks = []
        #print(imgPath)
        img = cv2.imread(imgPath)												#图片读取

        assert(img is not None)													#利用断言，确保图片存在
        img_h,img_w,img_c = img.shape											#获取图片高、宽、信道数
        gt_box = np.array([bbox.left,bbox.top,bbox.right,bbox.bottom])			#定义边框（x1,y1,x2,y2）
        #get sub-image from bbox
        f_face = img[bbox.top:bbox.bottom+1,bbox.left:bbox.right+1]				#获得人脸框
        # resize the gt image to specified size
        f_face = cv2.resize(f_face,(size,size))									#resize成12*12
        #initialize the landmark
        landmark = np.zeros((5, 2))												#人脸关键点初始化

        #normalize land mark by dividing the width and height of the ground truth bounding box
        # landmakrGt is a list of tuples
        #通过划分实际边界框的宽和高来实现特征点的标准化；landmakrGt是一个元组列表；enumerate：列举
        for index, one in enumerate(landmarkGt):
            # (( x - bbox.left)/ width of bounding box, (y - bbox.top)/ height of bounding box
            rv = ((one[0]-gt_box[0])/(gt_box[2]-gt_box[0]), (one[1]-gt_box[1])/(gt_box[3]-gt_box[1]))
            # put the normalized value into the new list landmark
            landmark[index] = rv
        
        F_imgs.append(f_face)
        F_landmarks.append(landmark.reshape(10))
        landmark = np.zeros((5, 2))        
        if argument:
            idx = idx + 1
            if idx % 100 == 0:
                print(idx, "images done")
            x1, y1, x2, y2 = gt_box  										#所得边框的（x1,y1,x2,y2）
            #gt's width
            gt_w = x2 - x1 + 1												#gt_box边框的宽
            #gt's height
            gt_h = y2 - y1 + 1        										#gt_box边框的高
            if max(gt_w, gt_h) < 40 or x1 < 0 or y1 < 0:					#过滤掉过小的、或者在图片尺寸之外的边框
                continue
            #在人脸框附近做一个随机的裁剪框
            #random shift
            for i in range(10):
                bbox_size = npr.randint(int(min(gt_w, gt_h) * 0.8), np.ceil(1.25 * max(gt_w, gt_h)))
                delta_x = npr.randint(-gt_w * 0.2, gt_w * 0.2)
                delta_y = npr.randint(-gt_h * 0.2, gt_h * 0.2)
                nx1 = int(max(x1+gt_w/2-bbox_size/2+delta_x,0))
                ny1 = int(max(y1+gt_h/2-bbox_size/2+delta_y,0))

                nx2 = nx1 + bbox_size
                ny2 = ny1 + bbox_size
                if nx2 > img_w or ny2 > img_h:										#滤除错误的边界框
                    continue
                crop_box = np.array([nx1,ny1,nx2,ny2])


                cropped_im = img[ny1:ny2+1,nx1:nx2+1,:]								#裁剪
                resized_im = cv2.resize(cropped_im, (size, size))					#resize成12*12
                #cal iou
                iou = IoU(crop_box, np.expand_dims(gt_box,0))						#计算IOU交并比
                if iou > 0.65:
                    F_imgs.append(resized_im)
                    #normalize
                    for index, one in enumerate(landmarkGt):
                        rv = ((one[0]-nx1)/bbox_size, (one[1]-ny1)/bbox_size)
                        landmark[index] = rv
                    F_landmarks.append(landmark.reshape(10))
                    landmark = np.zeros((5, 2))
                    landmark_ = F_landmarks[-1].reshape(-1,2)
                    bbox = BBox([nx1,ny1,nx2,ny2])                    

                    #mirror                    										#随机镜像
                    if random.choice([0,1]) > 0:
                        face_flipped, landmark_flipped = flip(resized_im, landmark_)#水平复制后的人脸框和人脸坐标
                        face_flipped = cv2.resize(face_flipped, (size, size))		#resize
                        #c*h*w	
                        F_imgs.append(face_flipped)									#保存人脸
                        F_landmarks.append(landmark_flipped.reshape(10))			#保存人脸关键点
                    #rotate															#随机旋转
                    if random.choice([0,1]) > 0:
                        face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \
                                                                         bbox.reprojectLandmark(landmark_), 5)#逆时针旋转
                        #landmark_offset											#landmark的偏移
                        landmark_rotated = bbox.projectLandmark(landmark_rotated)
                        face_rotated_by_alpha = cv2.resize(face_rotated_by_alpha, (size, size))
                        F_imgs.append(face_rotated_by_alpha)
                        F_landmarks.append(landmark_rotated.reshape(10))
                
                        #flip														#水平翻转
                        face_flipped, landmark_flipped = flip(face_rotated_by_alpha, landmark_rotated)
                        face_flipped = cv2.resize(face_flipped, (size, size))
                        F_imgs.append(face_flipped)
                        F_landmarks.append(landmark_flipped.reshape(10))                
                    
                    #anti-clockwise rotation										#逆时针旋转
                    if random.choice([0,1]) > 0: 
                        face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \
                                                                         bbox.reprojectLandmark(landmark_), -5)#顺时针旋转
                        landmark_rotated = bbox.projectLandmark(landmark_rotated)
                        face_rotated_by_alpha = cv2.resize(face_rotated_by_alpha, (size, size))
                        F_imgs.append(face_rotated_by_alpha)
                        F_landmarks.append(landmark_rotated.reshape(10))
                
                        face_flipped, landmark_flipped = flip(face_rotated_by_alpha, landmark_rotated)
                        face_flipped = cv2.resize(face_flipped, (size, size))
                        F_imgs.append(face_flipped)
                        F_landmarks.append(landmark_flipped.reshape(10)) 
                    
            F_imgs, F_landmarks = np.asarray(F_imgs), np.asarray(F_landmarks)		#img和landmark的asarray化  
            #print F_imgs.shape
            #print F_landmarks.shape
            for i in range(len(F_imgs)):
                #if image_id % 100 == 0:

                    #print('image id : ', image_id)

                #np.where(a,b,c):a为真时，返回b，不然返回c
                if np.sum(np.where(F_landmarks[i] <= 0, 1, 0)) > 0:
                    continue

                if np.sum(np.where(F_landmarks[i] >= 1, 1, 0)) > 0:
                    continue

                #当landmark的值在(0，1)之外时，舍弃错误的数据，将正确的landmark和图片保存
                cv2.imwrite(join(dstdir,"%d.jpg" %(image_id)), F_imgs[i])
                landmarks = map(str,list(F_landmarks[i]))
                f.write(join(dstdir,"%d.jpg" %(image_id))+" -2 "+" ".join(landmarks)+"\n")
                image_id = image_id + 1
            
    #print F_imgs.shape
    #print F_landmarks.shape
    #F_imgs = processImage(F_imgs)
    #shuffle_in_unison_scary(F_imgs, F_landmarks)
    
    f.close()
    return F_imgs,F_landmarks

if __name__ == '__main__':
    dstdir = "../../DATA/12/train_PNet_landmark_aug"
    OUTPUT = '../../DATA/12'
    data_path = '../../DATA'
    if not exists(OUTPUT):
        os.mkdir(OUTPUT)
    if not exists(dstdir):
        os.mkdir(dstdir)
    assert (exists(dstdir) and exists(OUTPUT))									#断言，以确保目录存在
    # train data
    net = "PNet"
    #the file contains the names of all the landmark training data
    train_txt = "trainImageList.txt"
    imgs,landmarks = GenerateData(train_txt,data_path,net,argument=True )

Home丶Basic

关注

0
点赞
踩
6

收藏

觉得还不错? 一键收藏
1
评论
基于TensorFlow的MTCNN人脸检测算法（生成PNet的人脸关键点数据代码注解）

代码源自Github:https://github.com/AITTSMD/MTCNN-Tensorflow该阶段代码取自 Git主代码目录下 MTCNN-Tensorflow-master / prepare_data / gen_landmark_aug_12.py# coding: utf-8import osimport randomfrom os.path import jo...
复制链接

扫一扫