代码下载地址:
这里采用第三方的MXNet实现版本:https://github.com/pangyupo/mxnet_mtcnn_face_detection
参考网页
https://blog.csdn.net/u014380165/article/details/78906898
概要:
代码主要是说怎么使用MTCNN算法进行人脸检测,不涉及到训练过程
主要包含三个脚本:main.py、mtcnn_detector.py、helper.py。
main.py是代码的入口
mtcnn_detector.py是主要的执行函数
helper.py一系列辅助函数
代码修改
代码直接运行会有问题,添加了一些修改,已经提交到本人git上
1.在main.py中添加一个main函数包含所有代码,然后
if __name__ == "__main__":
main()
因为使用multiprocessing库时会报错
2.python3中使用from itertools import izip有错误
改为
try: from itertools import izip except ImportError: #python3.x izip = zip
3.还有一些float需要转为int的过程,代码运行的时候就知道了
代码解析
main.py
主要代码就是main函数,在代码里面详细解释
def main():
'''
主要的解析函数类的初始化
model_folder:文件夹
num_worker:使用进程的个数
accurate_landmark :是否需要计算高精度的标记点
'''
detector = MtcnnDetector(model_folder='model', ctx=mx.cpu(0), num_worker = 4 , accurate_landmark = False)
img = cv2.imread('test2.jpg')
# run detector
#主解析函数
results = detector.detect_face(img)
#显示并保存图片
if results is not None:
total_boxes = results[0]
points = results[1]
# extract aligned face chips
chips = detector.extract_image_chips(img, points, 144, 0.37)
for i, chip in enumerate(chips):
cv2.imshow('chip_'+str(i), chip)
cv2.imwrite('chip_'+str(i)+'.png', chip)
draw = img.copy()
for b in total_boxes:
cv2.rectangle(draw, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (255, 255, 255))
for p in points:
for i in range(5):
cv2.circle(draw, (p[i], p[i + 5]), 1, (0, 0, 255), 2)
cv2.imshow("detection result", draw)
cv2.waitKey(0)
mtcnn_detector.py
首先看初始化
def __init__(self,
model_folder='.',#训练好的模型位置
minsize = 20,#最小检测的脸部大小
threshold = [0.6, 0.7, 0.8],#对3个网络的阈值
factor = 0.709,#每次图片缩放比例
num_worker = 1,#在检测时使用的进程池个数
accurate_landmark = False,#是否计算高精度标记点
ctx=mx.cpu()):
"""
Initialize the detector
Parameters:
----------
model_folder : string
path for the models
minsize : float number
minimal face to detect
threshold : float number
detect threshold for 3 stages
factor: float number
scale factor for image pyramid
num_worker: int number
number of processes we use for first stage
accurate_landmark: bool
use accurate landmark localization or not
"""
self.num_worker = num_worker
self.accurate_landmark = accurate_landmark
# load 4 models from folder
#加载模型
models = ['det1', 'det2', 'det3','det4']
models = [ os.path.join(model_folder, f) for f in models]
self.PNets = []
for i in range(num_worker):
workner_net = mx.model.FeedForward.load(models[0], 1, ctx=ctx)
self.PNets.append(workner_net)
self.Pool = Pool(num_worker)
self.RNet = mx.model.FeedForward.load(models[1], 1, ctx=ctx)
self.ONet = mx.model.FeedForward.load(models[2], 1, ctx=ctx)
self.LNet = mx.model.FeedForward.load(models[3], 1, ctx=ctx)
self.minsize = float(minsize)
self.factor = float(factor)
self.threshold = threshold
detect_face
主要函数,分为初始化,3个阶段,高精度计算
1初始化
def detect_face(self, img):
"""
detect face over img
Parameters:
----------
img: numpy array, bgr order of shape (1, 3, n, m)
input image
Retures:
-------
bboxes: numpy array, n x 5 (x1,y2,x2,y2,score)
bboxes
points: numpy array, n x 10 (x1, x2 ... x5, y1, y2 ..y5)
landmarks
"""
# check input
MIN_DET_SIZE = 12 #设计最小检测的图片大小
if img is None:
return None
# only works for color image
if len(img.shape) != 3:#长、宽、颜色
return None
# detected boxes
total_boxes = []
height, width, _ = img.shape
minl = min( height, width)
"""
构件一系列 scales,使得图片最终大小为MIN_DET_SIZE
"""
# get all the valid scales
scales = []
m = MIN_DET_SIZE/self.minsize
minl *= m
factor_count = 0
while minl > MIN_DET_SIZE:
scales.append(m*self.factor**factor_count)
minl *= self.factor
factor_count += 1
2.第一阶段,使用PNet构造一系列box
使用了几个函数
"""
将 number根据进程池大小(num_worker)分段
比如输入number=10,num_worker=4
输出[[0,1,2,3],[4,5,6,7],[8,9]]
"""
def slice_index(self, number):
"""
slice the index into (n,n,m), m < n
Parameters:
----------
number: int number
number
"""
def chunks(l, n):
"""Yield successive n-sized chunks from l."""
for i in range(0, len(l), n):
yield l[i:i + n]
num_list = range(number)
return list(chunks(num_list, self.num_worker))
"""
nms算法
将输入的多个box根据置信度排序
如果置信度高的box和另外一个box的重合度大于阈值,删除置信度低的box
"""
def nms(boxes, overlap_threshold, mode='Union'):
"""
输入参数
boxes:一系列box,每个box有5个参数,起点xy,终点xy,置信度(起点比终点小)
overlap_threshold:阈值
"""
"""
non max suppression
Parameters:
----------
box: numpy array n x 5
input bbox array
overlap_threshold: float number
threshold of overlap
mode: float number
how to compute overlap ratio, 'Union' or 'Min'
Returns:
-------
index array of the selected bbox
"""
# if there are no boxes, return an empty list
if len(boxes) == 0:
return []
# if the bounding boxes integers, convert them to floats
if boxes.dtype.kind == "i":
boxes = boxes.astype("float")
# initialize the list of picked indexes
pick = []
#获得坐标数据
# grab the coordinates of the bounding boxes
x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)]
area = (x2 - x1 + 1) * (y2 - y1 + 1)
#根据置信度排序得到index
idxs = np.argsort(score)
# keep looping while some indexes still remain in the indexes list
while len(idxs) > 0:
# grab the last index in the indexes list and add the index value to the list of picked indexes
last = len(idxs) - 1
i = idxs[last]
pick.append(i)
"""
根据输入的数据,返回2者之间大的那个,最终得到一个list,妹妹的,开始我还以为是找最大值
所以如果两个box不相交,面积就会是0
"""
xx1 = np.maximum(x1[i], x1[idxs[:last]])
yy1 = np.maximum(y1[i], y1[idxs[:last]])
xx2 = np.minimum(x2[i], x2[idxs[:last]])
yy2 = np.minimum(y2[i], y2[idxs[:last]])
# compute the width and height of the bounding box
w = np.maximum(0, xx2 - xx1 + 1)
h = np.maximum(0, yy2 - yy1 + 1)
inter = w * h#重叠面积
if mode == 'Min':
overlap = inter / np.minimum(area[i], area[idxs[:last]])
else:
overlap = inter / (area[i] + area[idxs[:last]] - inter)
# delete all indexes from the index list that have
idxs = np.delete(idxs, np.concatenate(([last],
np.where(overlap > overlap_threshold)[0])))
return pick
'''
第一阶段代码
'''
sliced_index = self.slice_index(len(scales))
total_boxes = []
'''
total_boxes是一个box的list,box的shape为K*9,K就是bbox的数量,9包含4个坐标点信息,一个置信度score和4个用来调整前面4个坐标点的偏移信息
偏移信息是百分比的形式,
'''
for batch in sliced_index:
#将多个数据放入进程池里面,这里是4个进程
local_boxes = self.Pool.map( detect_first_stage_warpper, \
izip(repeat(img), self.PNets[:len(batch)], [scales[i] for i in batch], repeat(self.threshold[0])) )
total_boxes.extend(local_boxes)
# remove the Nones
total_boxes = [ i for i in total_boxes if i is not None]
if len(total_boxes) == 0:
return None
total_boxes = np.vstack(total_boxes)
if total_boxes.size == 0:
return None
#使用nms算法删除box
# merge the detection from first stage
pick = nms(total_boxes[:, 0:5], 0.7, 'Union')
total_boxes = total_boxes[pick]
bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1
#按照偏移值重新计算box
# refine the bboxes
total_boxes = np.vstack([total_boxes[:, 0]+total_boxes[:, 5] * bbw,
total_boxes[:, 1]+total_boxes[:, 6] * bbh,
total_boxes[:, 2]+total_boxes[:, 7] * bbw,
total_boxes[:, 3]+total_boxes[:, 8] * bbh,
total_boxes[:, 4]
])
total_boxes = total_boxes.T
total_boxes = self.convert_to_square(total_boxes)
total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])
第二三阶段类似,可以忽略了