【tensorflow】MTCNN网络Pnet数据生成（关键点数据）

最新推荐文章于 2021-04-10 17:14:47 发布
胖子工作室
最新推荐文章于 2021-04-10 17:14:47 发布
阅读量1.1k
点赞数
分类专栏： tensorflow python 文章标签：关键点数据 pnet
本文链接：https://blog.csdn.net/zhouzongzong/article/details/94911083
版权
python 同时被 2 个专栏收录
38 篇文章 0 订阅
订阅专栏
tensorflow
37 篇文章 0 订阅
订阅专栏
# coding: utf-8
import os
import random
from os.path import join, exists

import cv2
import numpy as np
import numpy.random as npr


def IoU(box, boxes):
    """Compute IoU between detect box and gt boxes

    Parameters:
    ----------
    box: numpy array , shape (5, ): x1, y1, x2, y2, score
        predicted boxes
    boxes: numpy array, shape (n, 4): x1, y1, x2, y2
        input ground truth boxes

    Returns:
    -------
    ovr: numpy.array, shape (n, )
        IoU
    """
    box_area = (box[2] - box[0] + 1) * (box[3] - box[1] + 1)
    area = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1)
    xx1 = np.maximum(box[0], boxes[:, 0])
    yy1 = np.maximum(box[1], boxes[:, 1])
    xx2 = np.minimum(box[2], boxes[:, 2])
    yy2 = np.minimum(box[3], boxes[:, 3])

    # compute the width and height of the bounding box
    w = np.maximum(0, xx2 - xx1 + 1)
    h = np.maximum(0, yy2 - yy1 + 1)

    inter = w * h
    ovr = inter / (box_area + area - inter)
    return ovr



def flip(face, landmark):
    """
        flip face
    """
    face_flipped_by_x = cv2.flip(face, 1)
    #mirror
    landmark_ = np.asarray([(1-x, y) for (x, y) in landmark])
    landmark_[[0, 1]] = landmark_[[1, 0]]#left eye<->right eye
    landmark_[[3, 4]] = landmark_[[4, 3]]#left mouth<->right mouth
    return (face_flipped_by_x, landmark_)


def rotate(img, bbox, landmark, alpha):
    """
        given a face with bbox and landmark, rotate with alpha
        and return rotated face with bbox, landmark (absolute position)
    """
    center = ((bbox.left+bbox.right)/2, (bbox.top+bbox.bottom)/2)
    rot_mat = cv2.getRotationMatrix2D(center, alpha, 1)
    #whole image rotate
    #pay attention: 3rd param(col*row)
    img_rotated_by_alpha = cv2.warpAffine(img, rot_mat,(img.shape[1],img.shape[0]))
    landmark_ = np.asarray([(rot_mat[0][0]*x+rot_mat[0][1]*y+rot_mat[0][2],
                 rot_mat[1][0]*x+rot_mat[1][1]*y+rot_mat[1][2]) for (x, y) in landmark])
    #crop face
    face = img_rotated_by_alpha[bbox.top:bbox.bottom+1,bbox.left:bbox.right+1]
    return (face, landmark_)

def getDataFromTxt(txt,data_path, with_landmark=True):
    """
        Generate data from txt file
        return [(img_path, bbox, landmark)]
            bbox: [left, right, top, bottom]
            landmark: [(x1, y1), (x2, y2), ...]
    """


    with open(txt, 'r') as fd:
        lines = fd.readlines()

    result = []
    for line in lines:
        line = line.strip()
        components = line.split(' ')
        img_path = os.path.join(data_path, components[0]).replace('\\','/') # file path
        print()
        # bounding box, (x1, y1, x2, y2)
        #bbox = (components[1], components[2], components[3], components[4])
        bbox = (components[1], components[3], components[2], components[4])
        bbox = [float(_) for _ in bbox]
        bbox = list(map(int,bbox))
        # landmark
        if not with_landmark:
            result.append((img_path, BBox(bbox)))
            continue
        landmark = np.zeros((5, 2))
        for index in range(0, 5):
            rv = (float(components[5+2*index]), float(components[5+2*index+1]))
            landmark[index] = rv
        #normalize
        '''
        for index, one in enumerate(landmark):
            rv = ((one[0]-bbox[0])/(bbox[2]-bbox[0]), (one[1]-bbox[1])/(bbox[3]-bbox[1]))
            landmark[index] = rv
        '''
        result.append((img_path, BBox(bbox), landmark))
    return result

class BBox(object):
    """
        Bounding Box of face
    """

    def __init__(self, bbox):
        self.left = bbox[0]
        self.top = bbox[1]
        self.right = bbox[2]
        self.bottom = bbox[3]

        self.x = bbox[0]
        self.y = bbox[1]
        self.w = bbox[2] - bbox[0]
        self.h = bbox[3] - bbox[1]

    def expand(self, scale=0.05):
        bbox = [self.left, self.right, self.top, self.bottom]
        bbox[0] -= int(self.w * scale)
        bbox[1] += int(self.w * scale)
        bbox[2] -= int(self.h * scale)
        bbox[3] += int(self.h * scale)
        return BBox(bbox)

    # offset
    def project(self, point):
        x = (point[0] - self.x) / self.w
        y = (point[1] - self.y) / self.h
        return np.asarray([x, y])

    # absolute position(image (left,top))
    def reproject(self, point):
        x = self.x + self.w * point[0]
        y = self.y + self.h * point[1]
        return np.asarray([x, y])

    # landmark: 5*2
    def reprojectLandmark(self, landmark):
        p = np.zeros((len(landmark), 2))
        for i in range(len(landmark)):
            p[i] = self.reproject(landmark[i])
        return p

    # change to offset according to bbox
    def projectLandmark(self, landmark):
        p = np.zeros((len(landmark), 2))
        for i in range(len(landmark)):
            p[i] = self.project(landmark[i])
        return p

    # f_bbox = bbox.subBBox(-0.05, 1.05, -0.05, 1.05)
    # self.w bounding-box width
    # self.h bounding-box height
    def subBBox(self, leftR, rightR, topR, bottomR):
        leftDelta = self.w * leftR
        rightDelta = self.w * rightR
        topDelta = self.h * topR
        bottomDelta = self.h * bottomR
        left = self.left + leftDelta
        right = self.left + rightDelta
        top = self.top + topDelta
        bottom = self.top + bottomDelta
        return BBox([left, right, top, bottom])


# 首先导入各种包和库


def GenerateData(ftxt, data_path, net, argument=False):
    '''

    :param ftxt: name/path of the text file that contains image path,
                bounding box, and landmarks

    :param output: path of the output dir
    :param net: one of the net in the cascaded networks
    :param argument: apply augmentation or not
    :return:  images and related landmarks
    '''
    #  定义函数GenerateData()，其中
    # ：参数ftxt表示包含了图片路径的文档的路径
    # ：参数data_path表示输出目录的路径
    # ：参数net表示三个级联网络中的一个
    # ：参数arguement表示
    if net == "PNet":
        size = 12
    elif net == "RNet":
        size = 24
    elif net == "ONet":
        size = 48
    else:
        print('Net type error')
        return
        # 判断输入进来的是三个网络中的哪一个，得出size的取值
    image_id = 0  # 初始化image_id的取值
    f = open(join(OUTPUT, "landmark_%s_aug.txt" % (size)), 'w')  # 打开路径对应的文档
    # dstdir = "train_landmark_few"
    # get image path , bounding box, and landmarks from file 'ftxt'
    print(join(OUTPUT, "landmark_%s_aug.txt" % (size)))
    data = getDataFromTxt(ftxt, data_path=data_path)  # 函数的调用
    idx = 0
    # image_path bbox landmark(5*2)
    for (imgPath, bbox, landmarkGt) in data:
        # print imgPath
        F_imgs = []  # 列表的事先定义
        F_landmarks = []  # 列表的事先定义
        # print(imgPath)
        img = cv2.imread(imgPath)  # 图片的读取
        print(imgPath)
        assert (img is not None)  # 判断图片是否存在
        img_h, img_w, img_c = img.shape  # 获取图片高、宽、通道数
        gt_box = np.array([bbox.left, bbox.top, bbox.right, bbox.bottom])  # (x1,y1,x2,y2)
        # get sub-image from bbox
        f_face = img[bbox.top:bbox.bottom + 1, bbox.left:bbox.right + 1]  # 获得人脸框
        # resize the gt image to specified size
        f_face = cv2.resize(f_face, (size, size))  # resize成12*12
        # initialize the landmark
        landmark = np.zeros((5, 2))  # 初始化人脸关键点

        # normalize land mark by dividing the width and height of the ground truth bounding box
        # landmakrGt is a list of tuples
        for index, one in enumerate(landmarkGt):
            # (( x - bbox.left)/ width of bounding box, (y - bbox.top)/ height of bounding box
            rv = ((one[0] - gt_box[0]) / (gt_box[2] - gt_box[0]), (one[1] - gt_box[1]) / (gt_box[3] - gt_box[1]))
            # put the normalized value into the new list landmark
            landmark[index] = rv
        F_imgs.append(f_face)
        F_landmarks.append(landmark.reshape(10))
        landmark = np.zeros((5, 2))
        if argument:
            idx = idx + 1
            if idx % 100 == 0:
                print(idx, "images done")
            x1, y1, x2, y2 = gt_box
            # gt's width
            gt_w = x2 - x1 + 1
            # gt's height
            gt_h = y2 - y1 + 1
            if max(gt_w, gt_h) < 40 or x1 < 0 or y1 < 0:
                continue
            # random shift
            # 这部分内容上一篇博客里有，就不重复了，是在人脸框附近做一个随机的裁剪框
            for i in range(2):
                bbox_size = npr.randint(int(min(gt_w, gt_h) * 0.8), np.ceil(1.25 * max(gt_w, gt_h)))
                delta_x = npr.randint(-gt_w * 0.2, gt_w * 0.2)
                delta_y = npr.randint(-gt_h * 0.2, gt_h * 0.2)
                nx1 = int(max(x1 + gt_w / 2 - bbox_size / 2 + delta_x, 0))
                ny1 = int(max(y1 + gt_h / 2 - bbox_size / 2 + delta_y, 0))

                nx2 = nx1 + bbox_size
                ny2 = ny1 + bbox_size
                if nx2 > img_w or ny2 > img_h:  # 抛弃错误的裁剪边框
                    continue
                crop_box = np.array([nx1, ny1, nx2, ny2])

                cropped_im = img[ny1:ny2 + 1, nx1:nx2 + 1, :]  # 裁剪边框
                resized_im = cv2.resize(cropped_im, (size, size))  # resize成12*12
                # cal iou
                iou = IoU(crop_box, np.expand_dims(gt_box, 0))  # 计算IoU值
                if iou > 0.65:
                    F_imgs.append(resized_im)
                    # normalize
                    # 和之前一样的操作
                    for index, one in enumerate(landmarkGt):
                        rv = ((one[0] - nx1) / bbox_size, (one[1] - ny1) / bbox_size)
                        landmark[index] = rv
                    F_landmarks.append(landmark.reshape(10))
                    landmark = np.zeros((5, 2))
                    landmark_ = F_landmarks[-1].reshape(-1, 2)
                    bbox = BBox([nx1, ny1, nx2, ny2])

                    # mirror
                    # 随机镜像
                    if random.choice([0, 1]) > 0:
                        face_flipped, landmark_flipped = flip(resized_im, landmark_)  # 水平复制后的人脸框和人脸坐标
                        face_flipped = cv2.resize(face_flipped, (size, size))  # resize操作
                        # c*h*w
                        F_imgs.append(face_flipped)  # 人脸的保存
                        F_landmarks.append(landmark_flipped.reshape(10))  # 关键点的保存
                    # rotate
                    # 随机旋转
                    if random.choice([0, 1]) > 0:
                        face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \
                                                                         bbox.reprojectLandmark(landmark_),
                                                                         5)  # 获得旋转后的face和landmark
                        # landmark_offset
                        # 对于landmark的偏移
                        landmark_rotated = bbox.projectLandmark(landmark_rotated)
                        face_rotated_by_alpha = cv2.resize(face_rotated_by_alpha, (size, size))
                        F_imgs.append(face_rotated_by_alpha)
                        F_landmarks.append(landmark_rotated.reshape(10))

                        # flip
                        # 在进行一次水平翻转操作，和上面一样
                        face_flipped, landmark_flipped = flip(face_rotated_by_alpha, landmark_rotated)
                        face_flipped = cv2.resize(face_flipped, (size, size))
                        F_imgs.append(face_flipped)
                        F_landmarks.append(landmark_flipped.reshape(10))

                        # anti-clockwise rotation
                    # 顺时针旋转5度，步骤内容和上面一样
                    if random.choice([0, 1]) > 0:
                        face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \
                                                                         bbox.reprojectLandmark(landmark_), -5)  # 顺时针旋转
                        landmark_rotated = bbox.projectLandmark(landmark_rotated)
                        face_rotated_by_alpha = cv2.resize(face_rotated_by_alpha, (size, size))
                        F_imgs.append(face_rotated_by_alpha)
                        F_landmarks.append(landmark_rotated.reshape(10))
                        # 水平翻转操作
                        face_flipped, landmark_flipped = flip(face_rotated_by_alpha, landmark_rotated)
                        face_flipped = cv2.resize(face_flipped, (size, size))
                        F_imgs.append(face_flipped)
                        F_landmarks.append(landmark_flipped.reshape(10))
                        # 图片和landmark的asarray化
            F_imgs, F_landmarks = np.asarray(F_imgs), np.asarray(F_landmarks)
            # print F_imgs.shape
            # print F_landmarks.shape
            for i in range(len(F_imgs)):
                # if image_id % 100 == 0:

                # print('image id : ', image_id)
                # np.where(a,b,c):a为真时，返回b，不然返回c
                if np.sum(np.where(F_landmarks[i] <= 0, 1, 0)) > 0:
                    continue

                if np.sum(np.where(F_landmarks[i] >= 1, 1, 0)) > 0:
                    continue
                    # 当landmark的值在(0，1)之外时，舍弃错误的数据，将正确的landmark和图片保存
                cv2.imwrite(join(dstdir, "%d.jpg" % (image_id)), F_imgs[i])
                landmarks = map(str, list(F_landmarks[i]))
                f.write(join(dstdir, "%d.jpg" % (image_id)) + " -2 " + " ".join(landmarks) + "\n")
                image_id = image_id + 1

    # print F_imgs.shape
    # print F_landmarks.shape
    # F_imgs = processImage(F_imgs)
    # shuffle_in_unison_scary(F_imgs, F_landmarks)

    f.close()
    return F_imgs, F_landmarks


if __name__ == '__main__':
    dstdir = "E:/MTCNN/12/train_PNet_landmark_aug"
    OUTPUT = 'E:/MTCNN/12'
    data_path = 'C:\\Users\zonglei.zhou\\Desktop\\train'
    if not exists(OUTPUT):
        os.mkdir(OUTPUT)
    if not exists(dstdir):
        os.mkdir(dstdir)
    assert (exists(dstdir) and exists(OUTPUT))  # 目录的创建
    # train data
    net = "PNet"
    # the file contains the names of all the landmark training data
    train_txt = "C:/Desktop/train/trainImageList.txt"
    imgs, landmarks = GenerateData(train_txt, data_path, net, argument=True)
在这里插入图片描述
胖子工作室
关注
0
点赞
踩
1

收藏

觉得还不错? 一键收藏
打赏
0
评论
【tensorflow】MTCNN网络Pnet数据生成（关键点数据）

# coding: utf-8import osimport randomfrom os.path import join, existsimport cv2import numpy as npimport numpy.random as nprdef IoU(box, boxes): """Compute IoU between detect box and gt b...
复制链接

扫一扫