代码源自Github:https://github.com/AITTSMD/MTCNN-Tensorflow
该阶段代码取自 Git主 代码目录下 MTCNN-Tensorflow-master / prepare_data / gen_landmark_aug_12.py
# coding: utf-8
import os
import random
from os.path import join, exists
import cv2
import numpy as np
import numpy.random as npr
from prepare_data.BBox_utils import getDataFromTxt, BBox
from prepare_data.Landmark_utils import rotate, flip
from prepare_data.utils import IoU
#定义随机数据表格生成(param:参数)
def GenerateData(ftxt,data_path,net,argument=False):
'''
:param ftxt: name/path of the text file that contains image path,
bounding box, and landmarks
:param output: path of the output dir
:param net: one of the net in the cascaded networks(级联网络)
:param argument: apply augmentation or not
:return: images and related landmarks
定义函数GenerateData(),其中参数ftxt表示包含了图片路径的文档的路径
参数data_path表示输出目录的路径
参数net表示三个级联网络中的一个
'''
#判断输入进来的是三个网络中的哪一个,得出size的取值
if net == "PNet":
size = 12
elif net == "RNet":
size = 24
elif net == "ONet":
size = 48
else:
print('Net type error')
return
image_id = 0
#以写入的方式打开以下目录文件
f = open(join(OUTPUT,"landmark_%s_aug.txt" %(size)),'w')
#dstdir = "train_landmark_few"
# get image path , bounding box, and landmarks from file 'ftxt'
data = getDataFromTxt(ftxt,data_path=data_path) #函数调用
idx = 0
#image_path bbox landmark(5*2)
for (imgPath, bbox, landmarkGt) in data:
#print imgPath
F_imgs = [] #列表的事先定义
F_landmarks = []
#print(imgPath)
img = cv2.imread(imgPath) #图片读取
assert(img is not None) #利用断言,确保图片存在
img_h,img_w,img_c = img.shape #获取图片高、宽、信道数
gt_box = np.array([bbox.left,bbox.top,bbox.right,bbox.bottom]) #定义边框(x1,y1,x2,y2)
#get sub-image from bbox
f_face = img[bbox.top:bbox.bottom+1,bbox.left:bbox.right+1] #获得人脸框
# resize the gt image to specified size
f_face = cv2.resize(f_face,(size,size)) #resize成12*12
#initialize the landmark
landmark = np.zeros((5, 2)) #人脸关键点初始化
#normalize land mark by dividing the width and height of the ground truth bounding box
# landmakrGt is a list of tuples
#通过划分实际边界框的宽和高来实现特征点的标准化;landmakrGt是一个元组列表;enumerate:列举
for index, one in enumerate(landmarkGt):
# (( x - bbox.left)/ width of bounding box, (y - bbox.top)/ height of bounding box
rv = ((one[0]-gt_box[0])/(gt_box[2]-gt_box[0]), (one[1]-gt_box[1])/(gt_box[3]-gt_box[1]))
# put the normalized value into the new list landmark
landmark[index] = rv
F_imgs.append(f_face)
F_landmarks.append(landmark.reshape(10))
landmark = np.zeros((5, 2))
if argument:
idx = idx + 1
if idx % 100 == 0:
print(idx, "images done")
x1, y1, x2, y2 = gt_box #所得边框的(x1,y1,x2,y2)
#gt's width
gt_w = x2 - x1 + 1 #gt_box边框的宽
#gt's height
gt_h = y2 - y1 + 1 #gt_box边框的高
if max(gt_w, gt_h) < 40 or x1 < 0 or y1 < 0: #过滤掉过小的、或者在图片尺寸之外的边框
continue
#在人脸框附近做一个随机的裁剪框
#random shift
for i in range(10):
bbox_size = npr.randint(int(min(gt_w, gt_h) * 0.8), np.ceil(1.25 * max(gt_w, gt_h)))
delta_x = npr.randint(-gt_w * 0.2, gt_w * 0.2)
delta_y = npr.randint(-gt_h * 0.2, gt_h * 0.2)
nx1 = int(max(x1+gt_w/2-bbox_size/2+delta_x,0))
ny1 = int(max(y1+gt_h/2-bbox_size/2+delta_y,0))
nx2 = nx1 + bbox_size
ny2 = ny1 + bbox_size
if nx2 > img_w or ny2 > img_h: #滤除错误的边界框
continue
crop_box = np.array([nx1,ny1,nx2,ny2])
cropped_im = img[ny1:ny2+1,nx1:nx2+1,:] #裁剪
resized_im = cv2.resize(cropped_im, (size, size)) #resize成12*12
#cal iou
iou = IoU(crop_box, np.expand_dims(gt_box,0)) #计算IOU交并比
if iou > 0.65:
F_imgs.append(resized_im)
#normalize
for index, one in enumerate(landmarkGt):
rv = ((one[0]-nx1)/bbox_size, (one[1]-ny1)/bbox_size)
landmark[index] = rv
F_landmarks.append(landmark.reshape(10))
landmark = np.zeros((5, 2))
landmark_ = F_landmarks[-1].reshape(-1,2)
bbox = BBox([nx1,ny1,nx2,ny2])
#mirror #随机镜像
if random.choice([0,1]) > 0:
face_flipped, landmark_flipped = flip(resized_im, landmark_)#水平复制后的人脸框和人脸坐标
face_flipped = cv2.resize(face_flipped, (size, size)) #resize
#c*h*w
F_imgs.append(face_flipped) #保存人脸
F_landmarks.append(landmark_flipped.reshape(10)) #保存人脸关键点
#rotate #随机旋转
if random.choice([0,1]) > 0:
face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \
bbox.reprojectLandmark(landmark_), 5)#逆时针旋转
#landmark_offset #landmark的偏移
landmark_rotated = bbox.projectLandmark(landmark_rotated)
face_rotated_by_alpha = cv2.resize(face_rotated_by_alpha, (size, size))
F_imgs.append(face_rotated_by_alpha)
F_landmarks.append(landmark_rotated.reshape(10))
#flip #水平翻转
face_flipped, landmark_flipped = flip(face_rotated_by_alpha, landmark_rotated)
face_flipped = cv2.resize(face_flipped, (size, size))
F_imgs.append(face_flipped)
F_landmarks.append(landmark_flipped.reshape(10))
#anti-clockwise rotation #逆时针旋转
if random.choice([0,1]) > 0:
face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \
bbox.reprojectLandmark(landmark_), -5)#顺时针旋转
landmark_rotated = bbox.projectLandmark(landmark_rotated)
face_rotated_by_alpha = cv2.resize(face_rotated_by_alpha, (size, size))
F_imgs.append(face_rotated_by_alpha)
F_landmarks.append(landmark_rotated.reshape(10))
face_flipped, landmark_flipped = flip(face_rotated_by_alpha, landmark_rotated)
face_flipped = cv2.resize(face_flipped, (size, size))
F_imgs.append(face_flipped)
F_landmarks.append(landmark_flipped.reshape(10))
F_imgs, F_landmarks = np.asarray(F_imgs), np.asarray(F_landmarks) #img和landmark的asarray化
#print F_imgs.shape
#print F_landmarks.shape
for i in range(len(F_imgs)):
#if image_id % 100 == 0:
#print('image id : ', image_id)
#np.where(a,b,c):a为真时,返回b,不然返回c
if np.sum(np.where(F_landmarks[i] <= 0, 1, 0)) > 0:
continue
if np.sum(np.where(F_landmarks[i] >= 1, 1, 0)) > 0:
continue
#当landmark的值在(0,1)之外时,舍弃错误的数据,将正确的landmark和图片保存
cv2.imwrite(join(dstdir,"%d.jpg" %(image_id)), F_imgs[i])
landmarks = map(str,list(F_landmarks[i]))
f.write(join(dstdir,"%d.jpg" %(image_id))+" -2 "+" ".join(landmarks)+"\n")
image_id = image_id + 1
#print F_imgs.shape
#print F_landmarks.shape
#F_imgs = processImage(F_imgs)
#shuffle_in_unison_scary(F_imgs, F_landmarks)
f.close()
return F_imgs,F_landmarks
if __name__ == '__main__':
dstdir = "../../DATA/12/train_PNet_landmark_aug"
OUTPUT = '../../DATA/12'
data_path = '../../DATA'
if not exists(OUTPUT):
os.mkdir(OUTPUT)
if not exists(dstdir):
os.mkdir(dstdir)
assert (exists(dstdir) and exists(OUTPUT)) #断言,以确保目录存在
# train data
net = "PNet"
#the file contains the names of all the landmark training data
train_txt = "trainImageList.txt"
imgs,landmarks = GenerateData(train_txt,data_path,net,argument=True )