MTCNN算法及代码笔记

最新推荐文章于 2023-03-19 15:28:37 发布

zm147451753

最新推荐文章于 2023-03-19 15:28:37 发布

阅读量903

点赞数

本文链接：https://blog.csdn.net/zm147451753/article/details/87891242

版权

代码下载地址：

这里采用第三方的MXNet实现版本：https://github.com/pangyupo/mxnet_mtcnn_face_detection

参考网页

https://blog.csdn.net/u014380165/article/details/78906898

概要：

代码主要是说怎么使用MTCNN算法进行人脸检测，不涉及到训练过程

主要包含三个脚本：main.py、mtcnn_detector.py、helper.py。

main.py是代码的入口

mtcnn_detector.py是主要的执行函数

helper.py一系列辅助函数

代码修改

代码直接运行会有问题，添加了一些修改，已经提交到本人git上

1.在main.py中添加一个main函数包含所有代码，然后

if __name__ == "__main__":

main()

因为使用multiprocessing库时会报错

2.python3中使用from itertools import izip有错误

改为

try: from itertools import izip except ImportError: #python3.x izip = zip

3.还有一些float需要转为int的过程，代码运行的时候就知道了

代码解析

main.py

主要代码就是main函数，在代码里面详细解释

def main():

'''

主要的解析函数类的初始化

model_folder:文件夹

num_worker:使用进程的个数

accurate_landmark :是否需要计算高精度的标记点



'''

detector = MtcnnDetector(model_folder='model', ctx=mx.cpu(0), num_worker = 4 , accurate_landmark = False)

img = cv2.imread('test2.jpg')

# run detector

#主解析函数

results = detector.detect_face(img)

#显示并保存图片

if results is not None:

total_boxes = results[0]

points = results[1]

# extract aligned face chips

chips = detector.extract_image_chips(img, points, 144, 0.37)

for i, chip in enumerate(chips):

cv2.imshow('chip_'+str(i), chip)

cv2.imwrite('chip_'+str(i)+'.png', chip)



draw = img.copy()

for b in total_boxes:

cv2.rectangle(draw, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (255, 255, 255))



for p in points:

for i in range(5):

cv2.circle(draw, (p[i], p[i + 5]), 1, (0, 0, 255), 2)



cv2.imshow("detection result", draw)

cv2.waitKey(0)

mtcnn_detector.py

首先看初始化

def __init__(self,

model_folder='.',#训练好的模型位置

minsize = 20,#最小检测的脸部大小

threshold = [0.6, 0.7, 0.8],#对3个网络的阈值

factor = 0.709,#每次图片缩放比例

num_worker = 1,#在检测时使用的进程池个数

accurate_landmark = False,#是否计算高精度标记点

ctx=mx.cpu()):

"""

Initialize the detector



Parameters:

----------

model_folder : string

path for the models

minsize : float number

minimal face to detect

threshold : float number

detect threshold for 3 stages

factor: float number

scale factor for image pyramid

num_worker: int number

number of processes we use for first stage

accurate_landmark: bool

use accurate landmark localization or not



"""

self.num_worker = num_worker

self.accurate_landmark = accurate_landmark



# load 4 models from folder

#加载模型

models = ['det1', 'det2', 'det3','det4']

models = [ os.path.join(model_folder, f) for f in models]

self.PNets = []

for i in range(num_worker):

workner_net = mx.model.FeedForward.load(models[0], 1, ctx=ctx)

self.PNets.append(workner_net)



self.Pool = Pool(num_worker)



self.RNet = mx.model.FeedForward.load(models[1], 1, ctx=ctx)

self.ONet = mx.model.FeedForward.load(models[2], 1, ctx=ctx)

self.LNet = mx.model.FeedForward.load(models[3], 1, ctx=ctx)



self.minsize = float(minsize)

self.factor = float(factor)

self.threshold = threshold

detect_face

主要函数，分为初始化，3个阶段，高精度计算

1初始化

def detect_face(self, img):

"""

detect face over img

Parameters:

----------

img: numpy array, bgr order of shape (1, 3, n, m)

input image

Retures:

-------

bboxes: numpy array, n x 5 (x1,y2,x2,y2,score)

bboxes

points: numpy array, n x 10 (x1, x2 ... x5, y1, y2 ..y5)

landmarks

"""



# check input

MIN_DET_SIZE = 12 #设计最小检测的图片大小



if img is None:

return None



# only works for color image

if len(img.shape) != 3:#长、宽、颜色

return None



# detected boxes

total_boxes = []



height, width, _ = img.shape

minl = min( height, width)

"""

构件一系列 scales,使得图片最终大小为MIN_DET_SIZE
"""
# get all the valid scales

scales = []

m = MIN_DET_SIZE/self.minsize

minl *= m

factor_count = 0

while minl > MIN_DET_SIZE:

scales.append(m*self.factor**factor_count)

minl *= self.factor

factor_count += 1

2.第一阶段，使用PNet构造一系列box

使用了几个函数

"""

将 number根据进程池大小(num_worker)分段

比如输入number=10，num_worker=4

输出[[0,1,2,3],[4,5,6,7],[8,9]]

"""

def slice_index(self, number):

"""

slice the index into (n,n,m), m < n

Parameters:

----------

number: int number

number

"""

def chunks(l, n):

"""Yield successive n-sized chunks from l."""

for i in range(0, len(l), n):

yield l[i:i + n]

num_list = range(number)

return list(chunks(num_list, self.num_worker))

"""

nms算法

将输入的多个box根据置信度排序

如果置信度高的box和另外一个box的重合度大于阈值，删除置信度低的box

"""



def nms(boxes, overlap_threshold, mode='Union'):

"""

输入参数

boxes：一系列box，每个box有5个参数，起点xy，终点xy，置信度（起点比终点小）

overlap_threshold：阈值

"""

"""

non max suppression



Parameters:

----------

box: numpy array n x 5

input bbox array

overlap_threshold: float number

threshold of overlap

mode: float number

how to compute overlap ratio, 'Union' or 'Min'

Returns:

-------

index array of the selected bbox

"""

# if there are no boxes, return an empty list

if len(boxes) == 0:

return []



# if the bounding boxes integers, convert them to floats

if boxes.dtype.kind == "i":

boxes = boxes.astype("float")



# initialize the list of picked indexes

pick = []

#获得坐标数据

# grab the coordinates of the bounding boxes

x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)]



area = (x2 - x1 + 1) * (y2 - y1 + 1)

#根据置信度排序得到index

idxs = np.argsort(score)



# keep looping while some indexes still remain in the indexes list

while len(idxs) > 0:

# grab the last index in the indexes list and add the index value to the list of picked indexes

last = len(idxs) - 1

i = idxs[last]

pick.append(i)

"""

根据输入的数据，返回2者之间大的那个，最终得到一个list，妹妹的，开始我还以为是找最大值

所以如果两个box不相交，面积就会是0

"""

xx1 = np.maximum(x1[i], x1[idxs[:last]])

yy1 = np.maximum(y1[i], y1[idxs[:last]])

xx2 = np.minimum(x2[i], x2[idxs[:last]])

yy2 = np.minimum(y2[i], y2[idxs[:last]])



# compute the width and height of the bounding box

w = np.maximum(0, xx2 - xx1 + 1)

h = np.maximum(0, yy2 - yy1 + 1)



inter = w * h#重叠面积

if mode == 'Min':

overlap = inter / np.minimum(area[i], area[idxs[:last]])

else:

overlap = inter / (area[i] + area[idxs[:last]] - inter)



# delete all indexes from the index list that have

idxs = np.delete(idxs, np.concatenate(([last],

np.where(overlap > overlap_threshold)[0])))



return pick

'''

第一阶段代码

'''

sliced_index = self.slice_index(len(scales))

total_boxes = []



'''

total_boxes是一个box的list，box的shape为K*9，K就是bbox的数量，9包含4个坐标点信息，一个置信度score和4个用来调整前面4个坐标点的偏移信息

偏移信息是百分比的形式,

'''

for batch in sliced_index:

#将多个数据放入进程池里面，这里是4个进程

local_boxes = self.Pool.map( detect_first_stage_warpper, \

izip(repeat(img), self.PNets[:len(batch)], [scales[i] for i in batch], repeat(self.threshold[0])) )

total_boxes.extend(local_boxes)





# remove the Nones

total_boxes = [ i for i in total_boxes if i is not None]



if len(total_boxes) == 0:

return None



total_boxes = np.vstack(total_boxes)



if total_boxes.size == 0:

return None

#使用nms算法删除box

# merge the detection from first stage

pick = nms(total_boxes[:, 0:5], 0.7, 'Union')

total_boxes = total_boxes[pick]



bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1

bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1



#按照偏移值重新计算box

# refine the bboxes

total_boxes = np.vstack([total_boxes[:, 0]+total_boxes[:, 5] * bbw,

total_boxes[:, 1]+total_boxes[:, 6] * bbh,

total_boxes[:, 2]+total_boxes[:, 7] * bbw,

total_boxes[:, 3]+total_boxes[:, 8] * bbh,

total_boxes[:, 4]

])



total_boxes = total_boxes.T

total_boxes = self.convert_to_square(total_boxes)

total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])

第二三阶段类似，可以忽略了