# coding: utf-8
import os
import random
from os.path import join, exists
import cv2
import numpy as np
import numpy.random as npr
def IoU(box, boxes):
"""Compute IoU between detect box and gt boxes
Parameters:
----------
box: numpy array , shape (5, ): x1, y1, x2, y2, score
predicted boxes
boxes: numpy array, shape (n, 4): x1, y1, x2, y2
input ground truth boxes
Returns:
-------
ovr: numpy.array, shape (n, )
IoU
"""
box_area = (box[2] - box[0] + 1) * (box[3] - box[1] + 1)
area = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1)
xx1 = np.maximum(box[0], boxes[:, 0])
yy1 = np.maximum(box[1], boxes[:, 1])
xx2 = np.minimum(box[2], boxes[:, 2])
yy2 = np.minimum(box[3], boxes[:, 3])
# compute the width and height of the bounding box
w = np.maximum(0, xx2 - xx1 + 1)
h = np.maximum(0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (box_area + area - inter)
return ovr
def flip(face, landmark):
"""
flip face
"""
face_flipped_by_x = cv2.flip(face, 1)
#mirror
landmark_ = np.asarray([(1-x, y) for (x, y) in landmark])
landmark_[[0, 1]] = landmark_[[1, 0]]#left eye<->right eye
landmark_[[3, 4]] = landmark_[[4, 3]]#left mouth<->right mouth
return (face_flipped_by_x, landmark_)
def rotate(img, bbox, landmark, alpha):
"""
given a face with bbox and landmark, rotate with alpha
and return rotated face with bbox, landmark (absolute position)
"""
center = ((bbox.left+bbox.right)/2, (bbox.top+bbox.bottom)/2)
rot_mat = cv2.getRotationMatrix2D(center, alpha, 1)
#whole image rotate
#pay attention: 3rd param(col*row)
img_rotated_by_alpha = cv2.warpAffine(img, rot_mat,(img.shape[1],img.shape[0]))
landmark_ = np.asarray([(rot_mat[0][0]*x+rot_mat[0][1]*y+rot_mat[0][2],
rot_mat[1][0]*x+rot_mat[1][1]*y+rot_mat[1][2]) for (x, y) in landmark])
#crop face
face = img_rotated_by_alpha[bbox.top:bbox.bottom+1,bbox.left:bbox.right+1]
return (face, landmark_)
def getDataFromTxt(txt,data_path, with_landmark=True):
"""
Generate data from txt file
return [(img_path, bbox, landmark)]
bbox: [left, right, top, bottom]
landmark: [(x1, y1), (x2, y2), ...]
"""
with open(txt, 'r') as fd:
lines = fd.readlines()
result = []
for line in lines:
line = line.strip()
components = line.split(' ')
img_path = os.path.join(data_path, components[0]).replace('\\','/') # file path
print()
# bounding box, (x1, y1, x2, y2)
#bbox = (components[1], components[2], components[3], components[4])
bbox = (components[1], components[3], components[2], components[4])
bbox = [float(_) for _ in bbox]
bbox = list(map(int,bbox))
# landmark
if not with_landmark:
result.append((img_path, BBox(bbox)))
continue
landmark = np.zeros((5, 2))
for index in range(0, 5):
rv = (float(components[5+2*index]), float(components[5+2*index+1]))
landmark[index] = rv
#normalize
'''
for index, one in enumerate(landmark):
rv = ((one[0]-bbox[0])/(bbox[2]-bbox[0]), (one[1]-bbox[1])/(bbox[3]-bbox[1]))
landmark[index] = rv
'''
result.append((img_path, BBox(bbox), landmark))
return result
class BBox(object):
"""
Bounding Box of face
"""
def __init__(self, bbox):
self.left = bbox[0]
self.top = bbox[1]
self.right = bbox[2]
self.bottom = bbox[3]
self.x = bbox[0]
self.y = bbox[1]
self.w = bbox[2] - bbox[0]
self.h = bbox[3] - bbox[1]
def expand(self, scale=0.05):
bbox = [self.left, self.right, self.top, self.bottom]
bbox[0] -= int(self.w * scale)
bbox[1] += int(self.w * scale)
bbox[2] -= int(self.h * scale)
bbox[3] += int(self.h * scale)
return BBox(bbox)
# offset
def project(self, point):
x = (point[0] - self.x) / self.w
y = (point[1] - self.y) / self.h
return np.asarray([x, y])
# absolute position(image (left,top))
def reproject(self, point):
x = self.x + self.w * point[0]
y = self.y + self.h * point[1]
return np.asarray([x, y])
# landmark: 5*2
def reprojectLandmark(self, landmark):
p = np.zeros((len(landmark), 2))
for i in range(len(landmark)):
p[i] = self.reproject(landmark[i])
return p
# change to offset according to bbox
def projectLandmark(self, landmark):
p = np.zeros((len(landmark), 2))
for i in range(len(landmark)):
p[i] = self.project(landmark[i])
return p
# f_bbox = bbox.subBBox(-0.05, 1.05, -0.05, 1.05)
# self.w bounding-box width
# self.h bounding-box height
def subBBox(self, leftR, rightR, topR, bottomR):
leftDelta = self.w * leftR
rightDelta = self.w * rightR
topDelta = self.h * topR
bottomDelta = self.h * bottomR
left = self.left + leftDelta
right = self.left + rightDelta
top = self.top + topDelta
bottom = self.top + bottomDelta
return BBox([left, right, top, bottom])
# 首先导入各种包和库
def GenerateData(ftxt, data_path, net, argument=False):
'''
:param ftxt: name/path of the text file that contains image path,
bounding box, and landmarks
:param output: path of the output dir
:param net: one of the net in the cascaded networks
:param argument: apply augmentation or not
:return: images and related landmarks
'''
# 定义函数GenerateData(),其中
# :参数ftxt表示包含了图片路径的文档的路径
# :参数data_path表示输出目录的路径
# :参数net表示三个级联网络中的一个
# :参数arguement表示
if net == "PNet":
size = 12
elif net == "RNet":
size = 24
elif net == "ONet":
size = 48
else:
print('Net type error')
return
# 判断输入进来的是三个网络中的哪一个,得出size的取值
image_id = 0 # 初始化image_id的取值
f = open(join(OUTPUT, "landmark_%s_aug.txt" % (size)), 'w') # 打开路径对应的文档
# dstdir = "train_landmark_few"
# get image path , bounding box, and landmarks from file 'ftxt'
print(join(OUTPUT, "landmark_%s_aug.txt" % (size)))
data = getDataFromTxt(ftxt, data_path=data_path) # 函数的调用
idx = 0
# image_path bbox landmark(5*2)
for (imgPath, bbox, landmarkGt) in data:
# print imgPath
F_imgs = [] # 列表的事先定义
F_landmarks = [] # 列表的事先定义
# print(imgPath)
img = cv2.imread(imgPath) # 图片的读取
print(imgPath)
assert (img is not None) # 判断图片是否存在
img_h, img_w, img_c = img.shape # 获取图片高、宽、通道数
gt_box = np.array([bbox.left, bbox.top, bbox.right, bbox.bottom]) # (x1,y1,x2,y2)
# get sub-image from bbox
f_face = img[bbox.top:bbox.bottom + 1, bbox.left:bbox.right + 1] # 获得人脸框
# resize the gt image to specified size
f_face = cv2.resize(f_face, (size, size)) # resize成12*12
# initialize the landmark
landmark = np.zeros((5, 2)) # 初始化人脸关键点
# normalize land mark by dividing the width and height of the ground truth bounding box
# landmakrGt is a list of tuples
for index, one in enumerate(landmarkGt):
# (( x - bbox.left)/ width of bounding box, (y - bbox.top)/ height of bounding box
rv = ((one[0] - gt_box[0]) / (gt_box[2] - gt_box[0]), (one[1] - gt_box[1]) / (gt_box[3] - gt_box[1]))
# put the normalized value into the new list landmark
landmark[index] = rv
F_imgs.append(f_face)
F_landmarks.append(landmark.reshape(10))
landmark = np.zeros((5, 2))
if argument:
idx = idx + 1
if idx % 100 == 0:
print(idx, "images done")
x1, y1, x2, y2 = gt_box
# gt's width
gt_w = x2 - x1 + 1
# gt's height
gt_h = y2 - y1 + 1
if max(gt_w, gt_h) < 40 or x1 < 0 or y1 < 0:
continue
# random shift
# 这部分内容上一篇博客里有,就不重复了,是在人脸框附近做一个随机的裁剪框
for i in range(2):
bbox_size = npr.randint(int(min(gt_w, gt_h) * 0.8), np.ceil(1.25 * max(gt_w, gt_h)))
delta_x = npr.randint(-gt_w * 0.2, gt_w * 0.2)
delta_y = npr.randint(-gt_h * 0.2, gt_h * 0.2)
nx1 = int(max(x1 + gt_w / 2 - bbox_size / 2 + delta_x, 0))
ny1 = int(max(y1 + gt_h / 2 - bbox_size / 2 + delta_y, 0))
nx2 = nx1 + bbox_size
ny2 = ny1 + bbox_size
if nx2 > img_w or ny2 > img_h: # 抛弃错误的裁剪边框
continue
crop_box = np.array([nx1, ny1, nx2, ny2])
cropped_im = img[ny1:ny2 + 1, nx1:nx2 + 1, :] # 裁剪边框
resized_im = cv2.resize(cropped_im, (size, size)) # resize成12*12
# cal iou
iou = IoU(crop_box, np.expand_dims(gt_box, 0)) # 计算IoU值
if iou > 0.65:
F_imgs.append(resized_im)
# normalize
# 和之前一样的操作
for index, one in enumerate(landmarkGt):
rv = ((one[0] - nx1) / bbox_size, (one[1] - ny1) / bbox_size)
landmark[index] = rv
F_landmarks.append(landmark.reshape(10))
landmark = np.zeros((5, 2))
landmark_ = F_landmarks[-1].reshape(-1, 2)
bbox = BBox([nx1, ny1, nx2, ny2])
# mirror
# 随机镜像
if random.choice([0, 1]) > 0:
face_flipped, landmark_flipped = flip(resized_im, landmark_) # 水平复制后的人脸框和人脸坐标
face_flipped = cv2.resize(face_flipped, (size, size)) # resize操作
# c*h*w
F_imgs.append(face_flipped) # 人脸的保存
F_landmarks.append(landmark_flipped.reshape(10)) # 关键点的保存
# rotate
# 随机旋转
if random.choice([0, 1]) > 0:
face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \
bbox.reprojectLandmark(landmark_),
5) # 获得旋转后的face和landmark
# landmark_offset
# 对于landmark的偏移
landmark_rotated = bbox.projectLandmark(landmark_rotated)
face_rotated_by_alpha = cv2.resize(face_rotated_by_alpha, (size, size))
F_imgs.append(face_rotated_by_alpha)
F_landmarks.append(landmark_rotated.reshape(10))
# flip
# 在进行一次水平翻转操作,和上面一样
face_flipped, landmark_flipped = flip(face_rotated_by_alpha, landmark_rotated)
face_flipped = cv2.resize(face_flipped, (size, size))
F_imgs.append(face_flipped)
F_landmarks.append(landmark_flipped.reshape(10))
# anti-clockwise rotation
# 顺时针旋转5度,步骤内容和上面一样
if random.choice([0, 1]) > 0:
face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \
bbox.reprojectLandmark(landmark_), -5) # 顺时针旋转
landmark_rotated = bbox.projectLandmark(landmark_rotated)
face_rotated_by_alpha = cv2.resize(face_rotated_by_alpha, (size, size))
F_imgs.append(face_rotated_by_alpha)
F_landmarks.append(landmark_rotated.reshape(10))
# 水平翻转操作
face_flipped, landmark_flipped = flip(face_rotated_by_alpha, landmark_rotated)
face_flipped = cv2.resize(face_flipped, (size, size))
F_imgs.append(face_flipped)
F_landmarks.append(landmark_flipped.reshape(10))
# 图片和landmark的asarray化
F_imgs, F_landmarks = np.asarray(F_imgs), np.asarray(F_landmarks)
# print F_imgs.shape
# print F_landmarks.shape
for i in range(len(F_imgs)):
# if image_id % 100 == 0:
# print('image id : ', image_id)
# np.where(a,b,c):a为真时,返回b,不然返回c
if np.sum(np.where(F_landmarks[i] <= 0, 1, 0)) > 0:
continue
if np.sum(np.where(F_landmarks[i] >= 1, 1, 0)) > 0:
continue
# 当landmark的值在(0,1)之外时,舍弃错误的数据,将正确的landmark和图片保存
cv2.imwrite(join(dstdir, "%d.jpg" % (image_id)), F_imgs[i])
landmarks = map(str, list(F_landmarks[i]))
f.write(join(dstdir, "%d.jpg" % (image_id)) + " -2 " + " ".join(landmarks) + "\n")
image_id = image_id + 1
# print F_imgs.shape
# print F_landmarks.shape
# F_imgs = processImage(F_imgs)
# shuffle_in_unison_scary(F_imgs, F_landmarks)
f.close()
return F_imgs, F_landmarks
if __name__ == '__main__':
dstdir = "E:/MTCNN/12/train_PNet_landmark_aug"
OUTPUT = 'E:/MTCNN/12'
data_path = 'C:\\Users\zonglei.zhou\\Desktop\\train'
if not exists(OUTPUT):
os.mkdir(OUTPUT)
if not exists(dstdir):
os.mkdir(dstdir)
assert (exists(dstdir) and exists(OUTPUT)) # 目录的创建
# train data
net = "PNet"
# the file contains the names of all the landmark training data
train_txt = "C:/Desktop/train/trainImageList.txt"
imgs, landmarks = GenerateData(train_txt, data_path, net, argument=True)
【tensorflow】MTCNN网络Pnet数据生成(关键点数据)
最新推荐文章于 2021-04-10 17:14:47 发布