transform.py
import os
import sys
sys.path.append(os.getcwd())
from wider_loader import WIDER
import cv2
import time
"""
modify .mat to .txt
"""
#wider face original images path
path_to_image = './data_set/face_detection/WIDER_train/images'
#matlab file path
file_to_label = './data_set/face_detection/wider_face_split/wider_face_split/wider_face_train.mat'
#target file path
target_file = './anno_store/anno_train.txt'
wider = WIDER(file_to_label, path_to_image)
#next()函数,next(iterable, default)
#next()函数返回迭代器中的下一个元素。
#可以添加默认的返回值,以在迭代结束时返回。
#next() 函数要和生成迭代器的 iter() 函数一起使用。
#next(iterable, default)
#iterable必需的参数,一个可迭代的对象。
#default可选的。如果迭代器已结束,则返回默认值。
line_count = 0
box_count = 0
print('start transforming....')
t = time.time()
with open(target_file, 'w+') as f:
#读取文件,等同下面代码。
#try:
# f = open('/path/', 'r')
# print(f.read())
#finally:
# if f:
# f.close()
# press ctrl-C to stop the process
for data in wider.next():
line = [] #列表[],元组(),字典{key:value},集合{}。
line.append(str(data.image_name))
line_count += 1
for i,box in enumerate(data.bboxes):
#enumerate 同时获得索引和值
box_count += 1
for j,bvalue in enumerate(box):
line.append(str(bvalue))
line.append('\n')
line_str = ' '.join(line)
#将字符串串联,''每个字符串以冒号内符号隔开。
f.write(line_str)
st = time.time()-t
print('end transforming')
print('spend time:%d'%st)
print('total line(images):%d'%line_count)
print('total boxes(faces):%d'%box_count)
gen_Pnet_train_data.py
"""
2018-10-20 15:50:20
generate positive, negative, positive images whose size are 12*12 and feed into PNet
"""
import sys
import numpy as np
import cv2
import os
sys.path.append(os.getcwd())
import numpy as np
from mtcnn.data_preprocess.utils import IoU
prefix = ''
anno_file = "./anno_store/anno_train.txt"#存放wider_face数据集中的图片名,和图像中的人脸框参数
im_dir = "./data_set/face_detection/WIDER_train/images" # ./ 当前目录。../ 父级目录。 / 根目录
pos_save_dir = "./data_set/train/12/positive"#存放人脸
part_save_dir = "./data_set/train/12/part" #存放部分人脸
neg_save_dir = './data_set/train/12/negative' #非人脸
if not os.path.exists(pos_save_dir):
os.mkdir(pos_save_dir)
if not os.path.exists(part_save_dir):
os.mkdir(part_save_dir)
if not os.path.exists(neg_save_dir):
os.mkdir(neg_save_dir)
# store labels of positive, negative, part images
f1 = open(os.path.join('./anno_store', 'pos_12.txt'), 'w')#将多个路径组合后返回,并打开pos_12.txt文件,进行写入操作
f2 = open(os.path.join('./anno_store', 'neg_12.txt'), 'w')
f3 = open(os.path.join('./anno_store', 'part_12.txt'), 'w')
# anno_file: store labels of the wider face training data
with open(anno_file, 'r') as f:
annotations = f.readlines()
#依次读取每行,readlines() 方法用于读取所有行(直到结束符 EOF)并返回列表,
#如果碰到结束符 EOF 则返回空字符串。
num = len(annotations)#行数,也是图像总数
print("%d pics in total" % num)
p_idx = 0 # positive
n_idx = 0 # negative
d_idx = 0 # dont care
idx = 0
box_idx = 0
for annotation in annotations:#遍历每一行(每一个图像)
annotation = annotation.strip().split(' ')#1.去掉每行的头尾空白,2.按‘ ’进行分割
im_path = os.path.join(prefix, annotation[0])
#join() 方法用于将序列中的元素以指定的字符连接生成一个新的字符串。
print(im_path)
bbox = list(map(float, annotation[1:]))#图像中人脸框的4个参数
#map()是 Python 内置的高阶函数,它接收一个函数 f 和一个 list,并通过把函数 f 依次作用在 list 的每个元素上,得到一个新的 list 并返回。
boxes = np.array(bbox, dtype=np.int32).reshape(-1, 4)
img = cv2.imread(im_path)#根据路径和图像名读取图像
idx += 1
if idx % 100 == 0:
print(idx, "images done")
height, width, channel = img.shape
neg_num = 0
#1---->50
# keep crop random parts, until have 50 negative examples
# 每张图像生成50个负样本
while neg_num < 50:
# neg_num's size [40,min(width, height) / 2],min_size:40
# size is a random number between 12 and min(width,height)
size = np.random.randint(12, min(width, height) / 2)#随机生成截取区域的size,因为网络输入时12*12,所以截取尺寸大于12.
nx = np.random.randint(0, width - size)#随机生成截取区域的左上点坐标
ny = np.random.randint(0, height - size)
crop_box = np.array([nx, ny, nx + size, ny + size])#截取区域的矩形框的参数
Iou = IoU(crop_box, boxes) #计算IOU
#nx,ny:随机生成截取矩形区域的左上角坐标。
#crop_box=[x,y,w,h]表示一个矩形框。
#resized_im:从原图中提取矩形框区域,并resize成12*12
cropped_im = img[ny: ny + size, nx: nx + size, :]#截取区域
resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR)#区域缩放成12*12,用于PNet训练输入
if np.max(Iou) < 0.3:#IOU<0.3的截取区域设为负样本
# Iou with all gts must below 0.3
save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
f2.write(save_file + ' 0\n')#将负样本的信息写入TXT文件中
cv2.imwrite(save_file, resized_im)#保存负样本
n_idx += 1
neg_num += 1
for box in boxes:
# box (x_left, y_top, x_right, y_bottom)
#x1, y1, x2, y2 = box
x1, y1, w, h = box
# w = x2 - x1 + 1
# h = y2 - y1 + 1
x2 = x1 + w - 1
y2 = y1 + h - 1
# ignore small faces
# in case the ground truth boxes of small faces are not accurate
# 对于人脸框小于20,以及人脸框坐标在图像外的样本,进行忽略
if max(w, h) < 40 or x1 < 0 or y1 < 0:
continue
# 在人脸框附近,crop5个矩形区域,使IOU《0.5,将其作为负样本,这样的样本识别难度很大,应用于训练模型,有利于提高模型的准确度
# generate negative examples that have overlap with gt
for i in range(5):
size = np.random.randint(12, min(width, height) / 2)
# delta_x and delta_y are offsets of (x1, y1)
# delta_x and delta_y 是相对于(x1, y1)的偏移量
delta_x = np.random.randint(max(-size, -x1), w)#max函数的目的是确保,如果delta为负,保证 x1+delta_x >0
delta_y = np.random.randint(max(-size, -y1), h)
nx1 = max(0, x1 + delta_x)# 这里的max不再需要
ny1 = max(0, y1 + delta_y)
if nx1 + size > width or ny1 + size > height:#如果矩形区域右下角超出图像边界,则跳过此次训练,即舍弃该矩形区域
continue
crop_box = np.array([nx1, ny1, nx1 + size, ny1 + size])
Iou = IoU(crop_box, boxes)
cropped_im = img[ny1: ny1 + size, nx1: nx1 + size, :] #截取矩形区域,并转化为12*12尺寸,用于PNet网络训练输入
resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR)
if np.max(Iou) < 0.3:#IOU<0.3,满足负样本条件,保存图像,并将图像路径+label写入TXT文件
# Iou with all gts must below 0.3
save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
f2.write(save_file + ' 0\n')
cv2.imwrite(save_file, resized_im)
n_idx += 1
# generate positive examples and part faces
for i in range(20):#根据数据集的人脸图像,生成人脸样本和部分人脸样本,共20个
# pos and part face size [minsize*0.8,maxsize*1.25]
size = np.random.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h)))#随机初始化截取的样本的size
if w<5: #太小的不考虑
print (w)
continue
#print (box)
# delta here is the offset of box center
delta_x = np.random.randint(-w * 0.2, w * 0.2) #相对于人脸框中心点的偏移量
delta_y = np.random.randint(-h * 0.2, h * 0.2)
# 生成一个新的人脸框,用于做边框回归预测
nx1 = max(x1 + w / 2 + delta_x - size / 2, 0)# nx1 = max(x1+w/2-size/2+delta_x),(x1+w/2,y1 + h / 2)表示数据集图像中人脸框的中心
ny1 = max(y1 + h / 2 + delta_y - size / 2, 0)
nx2 = nx1 + size
ny2 = ny1 + size
if nx2 > width or ny2 > height:#越界就删除
continue
crop_box = np.array([nx1, ny1, nx2, ny2])
#计算两个人脸框的偏移量,将来用于边框回归
offset_x1 = (x1 - nx1) / float(size)
offset_y1 = (y1 - ny1) / float(size)
offset_x2 = (x2 - nx2) / float(size)
offset_y2 = (y2 - ny2) / float(size)
cropped_im = img[int(ny1): int(ny2), int(nx1): int(nx2), :]
resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR)
box_ = box.reshape(1, -1)
if IoU(crop_box, box_) >= 0.65: # iou >= 0.65,设为正样本,label设为 1
save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx)
f1.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2))
cv2.imwrite(save_file, resized_im)
p_idx += 1
elif IoU(crop_box, box_) >= 0.4:#设为部分人脸样本,label设为 -1
save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx)
f3.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2))
cv2.imwrite(save_file, resized_im)
d_idx += 1
box_idx += 1
print("%s images done, pos: %s part: %s neg: %s" % (idx, p_idx, d_idx, n_idx))
f1.close()
f2.close()
f3.close()
train_P_net.py
import argparse
import sys
import os
sys.path.append(os.getcwd())
from mtcnn.core.imagedb import ImageDB
from mtcnn.train_net.train import train_pnet
import mtcnn.config as config
annotation_file = './anno_store/imglist_anno_12.txt'
model_store_path = './model_store'
end_epoch = 10
frequent = 200
lr = 0.01
batch_size = 512
use_cuda = False
def train_net(annotation_file, model_store_path,
end_epoch=16, frequent=200, lr=0.01, batch_size=128, use_cuda=False):
imagedb = ImageDB(annotation_file)
gt_imdb = imagedb.load_imdb()
gt_imdb = imagedb.append_flipped_images(gt_imdb)
train_pnet(model_store_path=model_store_path, end_epoch=end_epoch, imdb=gt_imdb, batch_size=batch_size, frequent=frequent, base_lr=lr, use_cuda=use_cuda)
def parse_args():
parser = argparse.ArgumentParser(description='Train PNet',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--anno_file', dest='annotation_file',
default=os.path.join(config.ANNO_STORE_DIR,config.PNET_TRAIN_IMGLIST_FILENAME), help='training data annotation file', type=str)
parser.add_argument('--model_path', dest='model_store_path', help='training model store directory',
default=config.MODEL_STORE_DIR, type=str)
parser.add_argument('--end_epoch', dest='end_epoch', help='end epoch of training',
default=config.END_EPOCH, type=int)
parser.add_argument('--frequent', dest='frequent', help='frequency of logging',
default=200, type=int)
parser.add_argument('--lr', dest='lr', help='learning rate',
default=config.TRAIN_LR, type=float)
parser.add_argument('--batch_size', dest='batch_size', help='train batch size',
default=config.TRAIN_BATCH_SIZE, type=int)
parser.add_argument('--gpu', dest='use_cuda', help='train with gpu',
default=config.USE_CUDA, type=bool)
parser.add_argument('--prefix_path', dest='', help='training data annotation images prefix root path', type=str)
args = parser.parse_args()
return args
if __name__ == '__main__':
# args = parse_args()
print('train Pnet argument:')
# print(args)
train_net(annotation_file, model_store_path,
end_epoch, frequent, lr, batch_size, use_cuda)
# train_net(annotation_file=args.annotation_file, model_store_path=args.model_store_path,
# end_epoch=args.end_epoch, frequent=args.frequent, lr=args.lr, batch_size=args.batch_size, use_cuda=args.use_cuda)
mtcnn_test.py
import cv2
from mtcnn.core.detect import create_mtcnn_net, MtcnnDetector
from mtcnn.core.vision import vis_face
if __name__ == '__main__':
#pnet, rnet, onet = create_mtcnn_net(p_model_path="./original_model/pnet_epoch.pt", r_model_path="./original_model/rnet_epoch.pt", o_model_path="./original_model/onet_epoch.pt", use_cuda=False)
pnet, rnet, onet = create_mtcnn_net(p_model_path="./model_store/pnet_epoch.pt", use_cuda=False)
mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, onet=onet, min_face_size=24)
img = cv2.imread("./112.jpg")
img_bg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#b, g, r = cv2.split(img)
#img2 = cv2.merge([r, g, b])
bboxs, landmarks = mtcnn_detector.detect_face(img)
# print box_align
save_name = 'r_4.jpg'
vis_face(img_bg,bboxs,landmarks, save_name)