最近跑通了MTCNN的训练代码,对其中生成positive,negative,part样本gen_48net_data2.py代码进行解读。
项目地址:https://github.com/dlunion/mtcnn
对应代码地址:https://github.com/dlunion/mtcnn/blob/master/train/gen_48net_data2.py
-
import sys
-
sys.path.append(
‘D:\\Anaconda2\\libs’)
# 在windows系统上,导入python库目录
-
import numpy
as np
-
import cv2
-
import os
-
import numpy.random
as npr
-
from utils
import IoU
-
-
# stdsize:随机crop的窗口大小,positive,negative,part样本都对应此大小
-
stdsize =
48
-
anno_file =
“E:/face_alignment/data/CelebA/Anno/mtcnn_train_label_2.txt”
-
im_dir =
“E:/face_alignment/data/CelebA/Img/img_celeba.7z/img_celeba/”
-
pos_save_dir = str(stdsize) +
“/positive”
-
part_save_dir = str(stdsize) +
“/part”
-
neg_save_dir = str(stdsize) +
‘/negative’
-
save_dir =
“./” + str(stdsize)
-
-
# 生成文件夹函数
-
def mkr(dr):
-
if
not os.path.exists(dr):
-
os.mkdir(dr)
-
-
mkr(save_dir)
-
mkr(pos_save_dir)
-
mkr(part_save_dir)
-
mkr(neg_save_dir)
-
-
# 打开保存pos,neg,part文件名、标签的txt文件,这三个是生成文件
-
f1 = open(os.path.join(save_dir,
‘pos_’ + str(stdsize) +
‘.txt’),
‘w’)
-
f2 = open(os.path.join(save_dir,
‘neg_’ + str(stdsize) +
‘.txt’),
‘w’)
-
f3 = open(os.path.join(save_dir,
‘part_’ + str(stdsize) +
‘.txt’),
‘w’)
-
# 打开原始图片标注txt文件
-
with open(anno_file,
‘r’)
as f:
-
annotations = f.readlines()
-
num = len(annotations)
-
print
“%d pics in total” % num
-
p_idx =
0
# positive
-
n_idx =
0
# negative
-
d_idx =
0
# part
-
idx =
0
-
box_idx =
0
-
-
# 原始图片根据标注的bbox,生成negative,posotive,part图片,标注形式也做相应变化
-
for annotation
in annotations:
#逐行读取,按作者的方式,每行为一个原图
-
annotation = annotation.strip().split(
’ ‘)
#对读取的每一行,按空格进行切片
-
im_path = annotation[
0]
# 第1个为图片名
-
bbox = map(float, annotation[
1:
-10])
#第2个~~倒数第11个为bbox
-
# pts = map(float, annotation[5:])
-
pts = map(float, annotation[
-10:])
#倒数第10个~~倒数第1个为landmark
-
# 对bbox进行reshape,4个一列
-
boxes = np.array(bbox, dtype=np.float32).reshape(
-1,
4)
-
im_path = im_dir + im_path
#图片地址拼接
-
img = cv2.imread(im_path)
#读取图片
-
idx +=
1
-
if idx %
100 ==
0:
-
print idx,
“images done”
-
-
height, width, channel = img.shape
-
-
neg_num =
0
-
# 生成nagative,每个原图生成100个negative sample
-
while neg_num <
100:
-
# size表示neg样本大小,在40和min(width, height)/2之间随机取一个整数
-
size = npr.randint(
40, min(width, height) /
2)
-
# neg的左上角坐标(x1,y1),在0和(width - size)之间随机取一个整数
-
nx = npr.randint(
0, width - size)
-
ny = npr.randint(
0, height - size)
-
# 随机生成的bbox位置(x1,y1),(x2,y2)
-
crop_box = np.array([nx, ny, nx + size, ny + size])
-
-
# 计算随机生成的bbox和原图中所有标注bboxs的交并比
-
Iou = IoU(crop_box, boxes)
-
-
# 在原图中crop对应的区域图片,作为negative sample
-
cropped_im = img[ny : ny + size, nx : nx + size, :]
-
# 对crop的图像进行resize,大小为stdsize*stdsize
-
resized_im = cv2.resize(cropped_im, (stdsize, stdsize), interpolation=cv2.INTER_LINEAR)
-
-
# 如果crop_box与所有boxes的Iou都小于0.3,那么认为它是nagative sample
-
if np.max(Iou) <
0.3:
-
# Iou with all gts must below 0.3
-
# 保存图片的地址和图片名
-
save_file = os.path.join(neg_save_dir,
“%s.jpg”%n_idx)
-
# 往neg_48.txt文件中写入该negative样本的图片地址和名字,分类标签
-
f2.write(str(stdsize)+
“/negative/%s”%n_idx +
’ 0\n’)
-
# 保存该负样本图片
-
cv2.imwrite(save_file, resized_im)
-
n_idx +=
1
-
neg_num +=
1
-
-
backupPts = pts[:]
# 该列表用于landmark
-
for box
in boxes:
#逐行读取,每次循环处理一个box
-
# box (x_left, y_top, x_right, y_bottom)
-
x1, y1, x2, y2 = box
-
w = x2 - x1 +
1
-
h = y2 - y1 +
1
-
-
# 忽略小脸
-
# in case the ground truth boxes of small faces are not accurate
-
if max(w, h) <
12
or x1 <
0
or y1 <
0:
-
continue
-
-
# 生成 positive examples and part faces
-
# 每个box随机生成50个box,Iou>=0.65的作为positive examples,0.4<=Iou<0.65的作为part faces,其他忽略
-
for i
in range(
50):
-
pts = backupPts[:]
-
# size表示随机生成样本的大小,在int(min(w, h) * 0.8) 和 np.ceil(1.25 * max(w, h)) 之间
-
size = npr.randint(int(min(w, h) *
0.8), np.ceil(
1.25 * max(w, h)))
-
-
# delta 表示相对于标注box center的偏移量
-
delta_x = npr.randint(-w *
0.2, w *
0.2)
-
delta_y = npr.randint(-h *
0.2, h *
0.2)
-
-
# nx,ny表示偏移后的box坐标位置
-
nx1 = max(x1 + w /
2 + delta_x - size /
2,
0)
-
ny1 = max(y1 + h /
2 + delta_y - size /
2,
0)
-
nx2 = nx1 + size
-
ny2 = ny1 + size
-
-
# 去掉超出原图的box
-
if nx2 > width
or ny2 > height:
-
continue
-
crop_box = np.array([nx1, ny1, nx2, ny2])
-
-
#bbox偏移量的计算,由 x1 = nx1 + float(size)*offset_x1 推导而来,可以参考bounding box regression博客
-
offset_x1 = (x1 - nx1) / float(size)
-
offset_y1 = (y1 - ny1) / float(size)
-
offset_x2 = (x2 - nx1) / float(size)
-
offset_y2 = (y2 - ny1) / float(size)
-
-
# landmark偏移量的计算,即landmark相对于随机生成bbox的归一化相对位置。
-
for k
in range(len(pts) /
2):
-
pts[k*
2] = (pts[k*
2] - nx1) / float(size);
-
pts[k*
2+
1] = (pts[k*
2+
1] - ny1) / float(size);
-
-
cropped_im = img[int(ny1) : int(ny2), int(nx1) : int(nx2), :]
-
resized_im = cv2.resize(cropped_im, (stdsize, stdsize), interpolation=cv2.INTER_LINEAR)
-
-
# 将box reshape为一行
-
box_ = box.reshape(
1,
-1)
-
# Iou>=0.65的作为positive examples
-
if IoU(crop_box, box_) >=
0.65:
-
save_file = os.path.join(pos_save_dir,
“%s.jpg”%p_idx)
-
# 将图片路径,类别,偏移量写入到positive_48.txt文件中
-
f1.write(str(stdsize)+
“/positive/%s”%p_idx +
’ 1 %f %f %f %f’%(offset_x1, offset_y1, offset_x2, offset_y2))
-
-
# 将landmark写入到positive_48.txt文件中
-
for k
in range(len(pts)):
-
f1.write(
” %f” % pts[k])
-
f1.write(
“\n”)
-
cv2.imwrite(save_file, resized_im)
-
p_idx +=
1
-
-
# 0.4<=Iou<0.65的作为part faces
-
elif IoU(crop_box, box_) >=
0.4:
-
save_file = os.path.join(part_save_dir,
“%s.jpg”%d_idx)
-
f3.write(str(stdsize)+
“/part/%s”%d_idx +
’ -1 %f %f %f %f’%(offset_x1, offset_y1, offset_x2, offset_y2))
-
-
for k
in range(len(pts)):
-
f3.write(
” %f” % pts[k])
-
f3.write(
“\n”)
-
cv2.imwrite(save_file, resized_im)
-
d_idx +=
1
-
-
box_idx +=
1
-
print
“%s images done, pos: %s part: %s neg: %s”%(idx, p_idx, d_idx, n_idx)
-
-
f1.close()
-
f2.close()
-
f3.close()