2.mtcnn人脸检测

最新推荐文章于 2022-07-29 16:35:47 发布

我是小z呀

最新推荐文章于 2022-07-29 16:35:47 发布

阅读量200

点赞数

分类专栏： AIGC

本文链接：https://blog.csdn.net/weixin_38226321/article/details/107531511

版权

AIGC 专栏收录该内容

47 篇文章 3 订阅

订阅专栏

**mtcnn是构建图像金字塔来进行目标检测>>>>>为什么只有按原图缩小，我觉得也可以搞按比例放大，将这些放大缩小图一起用网络来提取特征，会不会效果更好？ **

1.网络结构

from tensorflow.keras.layers import Conv2D,Input,MaxPool2D,Reshape,Activation,Flatten,Dense,Permute,PReLU
from tensorflow.keras.models import Model,Sequential
import numpy as np
import cv2
def create_Pnet(weight_path):
    inputs=Input(shape=[None,None,3])
    x=Conv2D(10,(3,3),strides=1,padding='valid',name='conv1')(inputs)
    x=PReLU(shared_axes=[1,2],name='PReLU1')(x)
    x=MaxPool2D(pool_size=2)(x)
    
    x=Conv2D(16,(3,3),strides=1,padding='valid',name='conv2')(x)
    x=PReLU(shared_axes=[1,2],name='PReLU2')(x)
    
    x=Conv2D(32,(3,3),strides=1,padding='valid',name='conv3')(x)
    x=PReLU(shared_axes=[1,2],name='PReLU3')(x)
    
    classifier=Conv2D(2,(1,1),activation='softmax',name='conv4-1')(x)
    bbox_regress=Conv2D(4,(1,1),name='conv4-2')(x)
    
    model=Model([inputs],[classifier,bbox_regress])
    model.load_weights(weight_path,by_name=True)
    return model

def create_Rnet(weight_path):
    inputs=Input(shape=[24,24,3])
    x=Conv2D(28,(3,3),strides=1,padding='valid',name='conv1')(inputs)
    x=PReLU(shared_axes=[1,2],name='prelu1')(x)
    x=MaxPool2D(pool_size=3,strides=2,padding='same')(x)#11,11,28
    
    x=Conv2D(48,(3,3),strides=1,padding='valid',name='conv2')(x)
    x=PReLU(shared_axes=[1,2],name='prelu2')(x)
    x=MaxPool2D(pool_size=3,strides=2)(x)#4,4,48
    
    x=Conv2D(64,(2,2),strides=1,padding='valid',name='conv3')(x)
    x=PReLU(shared_axes=[1,2],name='prelu3')(x)#3,3,64
    
    x=Permute((3,2,1))(x)#3，3，64》》》》46，3，3
    x=Flatten()(x)#576
    
    x=Dense(128,name='conv4')(x)
    x=PReLU(name='prelu4')(x)#128
    
    classifier=Dense(2,activation='softmax',name='conv5-1')(x)
    bbox_regress=Dense(4,name='conv5-2')(x)
    model=Model([inputs],[classifier,bbox_regress])
    model.load_weights(weight_path,by_name=True)
    return model
def create_Onet(weight_path):
    inputs=Input(shape=[48,48,3])
    
    x=Conv2D(32,(3,3),strides=1,padding='valid',name='conv1')(inputs)
    x=PReLU(shared_axes=[1,2],name='prelu1')(x)
    x=MaxPool2D(pool_size=3,strides=2,padding='same')(x)#23,23,32
    
    x=Conv2D(64,(3,3),strides=1,padding='valid',name='conv2')(x)
    x=PReLU(shared_axes=[1,2],name='prelu2')(x)
    x=MaxPool2D(pool_size=3,strides=2)(x)#10,10,64
    
    x=Conv2D(64,(3,3),strides=1,padding='valid',name='conv3')(x)
    x=PReLU(shared_axes=[1,2],name='prelu3')(x)
    x=MaxPool2D(pool_size=2)(x)#4,4,64
    
    x=Conv2D(128,(2,2),strides=1,padding='valid',name='conv4')(x)
    x=PReLU(shared_axes=[1,2],name='prelu4')(x)#3,3,128
    
    x=Permute((3,2,1))(x)#128,3,3
    x=Flatten()(x)#1152
    x=Dense(256,name='conv5')(x)
    x=PReLU(name='prelu5')(x)#256
    
    classifier=Dense(2,activation='softmax',name='conv6-1')(x)
    bbox_regress=Dense(4,name='conv6-2')(x)
    landmark_regress=Dense(10,name='conv6-3')(x)
    
    model=Model([inputs],[classifier,bbox_regress,landmark_regress])
    model.load_weights(weight_path,by_name=True)
    
    return model

2.网络过程

2.1构建图像金字塔输入到Pnet

#构建图像金字塔，计算图像的缩放比例
def calculateScales(img):
    copy_img=img.copy()
    h,w,_=copy_img.shape
    #计算第一次缩放比例，图像有一个边=500，这样可以使得图片不会太大也不会太小
    pr_scale=1.0
    if min(w,h)>500:
        pre_scale=500.0/min(h,w)
        w=int(w*pr_scale)
        h=int(h*pr_scale)#这时最小边长=500
    elif max(w,h)<500:
        pre_scale=500.0/max(h,w)
        w=int(w*pr_scale)
        h=int(h*pr_scale)#这时，最大边长=500
    scales=[]
    factor=0.709
    factor_count=0
    minl=min(h,w)
    #使得图像缩放到最小边不小于12，就结束
    while minl>=12:
        scales.append(pr_scale*pow(factor,factor_count))#scales=[pre_scal*【1，0.709，0.709*0.709，0.709*0.709*0.709，...】]
        minl*=factor
        factor_count+=1
    return scales
#   将长方形调整为正方形
def rect2square(rectangles):
    w = rectangles[:,2] - rectangles[:,0]
    h = rectangles[:,3] - rectangles[:,1]
    l = np.maximum(w,h).T
    rectangles[:,0] = rectangles[:,0] + w*0.5 - l*0.5#左上H
    rectangles[:,1] = rectangles[:,1] + h*0.5 - l*0.5 #左上w
    rectangles[:,2:4] = rectangles[:,0:2] + np.repeat([l], 2, axis = 0).T #右下=左上+MAX边长
    return rectangles
#   非极大抑制
#-------------------------------------#
def NMS(rectangles,threshold):
    if len(rectangles)==0:
        return rectangles
    boxes = np.array(rectangles)
    x1 = boxes[:,0]#左上
    y1 = boxes[:,1]
    x2 = boxes[:,2]#右下
    y2 = boxes[:,3]
    s  = boxes[:,4]#置信度
    area = np.multiply(x2-x1+1, y2-y1+1)#面积
    I = np.array(s.argsort())#按置信度排序
    pick = []
    while len(I)>0:
        xx1 = np.maximum(x1[I[-1]], x1[I[0:-1]]) 
        yy1 = np.maximum(y1[I[-1]], y1[I[0:-1]])
        xx2 = np.minimum(x2[I[-1]], x2[I[0:-1]])#找出框最小的右下脚的点
        yy2 = np.minimum(y2[I[-1]], y2[I[0:-1]])
        w = np.maximum(0.0, xx2 - xx1 + 1)
        h = np.maximum(0.0, yy2 - yy1 + 1)
        inter = w * h
        o = inter / (area[I[-1]] + area[I[0:-1]] - inter)
        pick.append(I[-1])
        I = I[np.where(o<=threshold)[0]]
    result_rectangle = boxes[pick].tolist()
    return result_rectangle

2.2对Pnet的输出进行处理以输入到Rnet

#对Pnet的输出进行筛选
def detect_face_12net(cls_prob,roi,_out_side,scale,width,height,threshold):
    cls_prob=np.swapaxes(cls_prob,0,1)
    roi=np.swapaxes(roi,0,2)
    stride=0
    if _out_side!=1:
        stride=float(2*_out_side-1)/(_out_side-1)
    (x,y)=np.where(cls_prob>=threshold)#找到人脸概率大于阈值的那个位置
    boundingbox=np.array([x,y]).T
    #找到对应原图的位置
    bb1=np.fix((stride*(boundingbox)+0)*scale)
    bb2=np.fix((stride*(boundingbox)+11)*scale)
    boundingbox=np.concatenate((bb1,bb2),axis=1)
    dx1=roi[0][x,y]#人脸位置大于阈值的框的左上（x,y）值
    dx2=roi[1][x,y]#
    dx3=roi[2][x,y]#右上
    dx4=roi[3][x,y]
    score=np.array([cls_prob[x,y]]).T
    offset=np.array([dx1,dx2,dx3,dx4]).T
    boundingbox=boundingbox+offset*12.0*scale
    rectangles=np.concatenate((boundingbox,score),axis=1)
    rectangles=rect2square(rectangles)
    pick=[]
    for i in range(len(rectangles)):
        x1 = int(max(0     ,rectangles[i][0]))
        y1 = int(max(0     ,rectangles[i][1]))
        x2 = int(min(width ,rectangles[i][2]))
        y2 = int(min(height,rectangles[i][3]))
        sc = rectangles[i][4]
        if x2>x1 and y2>y1:
            pick.append([x1,y1,x2,y2,sc])
    return NMS(pick,0.3)

2.3对Rnet的输入处理以输入到Onet

#对rnet的输出进行修剪
def filter_face_24net(cls_prob,roi,rectangles,width,height,threshold):
    
    prob = cls_prob[:,1]#取出所有框的概率值
    pick = np.where(prob>=threshold)#大于阈值的拿出来
    rectangles = np.array(rectangles)

    x1  = rectangles[pick,0]#rnet输入的图像尺寸
    y1  = rectangles[pick,1]
    x2  = rectangles[pick,2]
    y2  = rectangles[pick,3]
    
    sc  = np.array([prob[pick]]).T

    dx1 = roi[pick,0]#rnet网络输出的框的调整参数
    dx2 = roi[pick,1]
    dx3 = roi[pick,2]
    dx4 = roi[pick,3]

    w   = x2-x1
    h   = y2-y1

    x1  = np.array([(x1+dx1*w)[0]]).T#在rnet输入的图像上对框进行调整
    y1  = np.array([(y1+dx2*h)[0]]).T
    x2  = np.array([(x2+dx3*w)[0]]).T
    y2  = np.array([(y2+dx4*h)[0]]).T

    rectangles = np.concatenate((x1,y1,x2,y2,sc),axis=1)
    rectangles = rect2square(rectangles)
    pick = []
    for i in range(len(rectangles)):#在原图的位置上调整框
        x1 = int(max(0     ,rectangles[i][0]))
        y1 = int(max(0     ,rectangles[i][1]))
        x2 = int(min(width ,rectangles[i][2]))
        y2 = int(min(height,rectangles[i][3]))
        sc = rectangles[i][4]
        if x2>x1 and y2>y1:
            pick.append([x1,y1,x2,y2,sc])
    return NMS(pick,0.3)

2.4对Onet的输出进行处理

#   对onet处理后的结果进行处理
def filter_face_48net(cls_prob,roi,pts,rectangles,width,height,threshold):
    
    prob = cls_prob[:,1]
    pick = np.where(prob>=threshold)
    rectangles = np.array(rectangles)

    x1  = rectangles[pick,0]
    y1  = rectangles[pick,1]
    x2  = rectangles[pick,2]
    y2  = rectangles[pick,3]

    sc  = np.array([prob[pick]]).T

    dx1 = roi[pick,0]#框的调整参数
    dx2 = roi[pick,1]
    dx3 = roi[pick,2]
    dx4 = roi[pick,3]

    w   = x2-x1
    h   = y2-y1

    pts0= np.array([(w*pts[pick,0]+x1)[0]]).T#五个点在onet输入图像上的位置
    pts1= np.array([(h*pts[pick,5]+y1)[0]]).T
    pts2= np.array([(w*pts[pick,1]+x1)[0]]).T
    pts3= np.array([(h*pts[pick,6]+y1)[0]]).T
    pts4= np.array([(w*pts[pick,2]+x1)[0]]).T
    pts5= np.array([(h*pts[pick,7]+y1)[0]]).T
    pts6= np.array([(w*pts[pick,3]+x1)[0]]).T
    pts7= np.array([(h*pts[pick,8]+y1)[0]]).T
    pts8= np.array([(w*pts[pick,4]+x1)[0]]).T
    pts9= np.array([(h*pts[pick,9]+y1)[0]]).T

    x1  = np.array([(x1+dx1*w)[0]]).T#框在onet输入图像上的位置
    y1  = np.array([(y1+dx2*h)[0]]).T
    x2  = np.array([(x2+dx3*w)[0]]).T
    y2  = np.array([(y2+dx4*h)[0]]).T

    rectangles=np.concatenate((x1,y1,x2,y2,sc,pts0,pts1,pts2,pts3,pts4,pts5,pts6,pts7,pts8,pts9),axis=1)

    pick = []
    for i in range(len(rectangles)):#调整到原图上
        x1 = int(max(0     ,rectangles[i][0]))
        y1 = int(max(0     ,rectangles[i][1]))
        x2 = int(min(width ,rectangles[i][2]))
        y2 = int(min(height,rectangles[i][3]))
        if x2>x1 and y2>y1:
            pick.append([x1,y1,x2,y2,rectangles[i][4],
                 rectangles[i][5],rectangles[i][6],rectangles[i][7],rectangles[i][8],rectangles[i][9],rectangles[i][10],rectangles[i][11],rectangles[i][12],rectangles[i][13],rectangles[i][14]])
    return NMS(pick,0.3)

3.构建网络输入到输出的mtcnn类

class mtcnn():
    
    def __init__(self):
        self.Pnet=create_Pnet('model_data/pnet.h5')
        self.Rnet=create_Rnet('model_data/rnet.h5')
        self.Onet=create_Onet('model_data/onet.h5')
    def detectFace(self,img,threshold):
        copy_img=(img.copy()-127.5)/127.5
        origin_h,origin_w,_=copy_img.shape
        scales=calculateScales(img)#计算这张图片的所有缩放比例值
        out=[]#将缩放后每张图片输入pnet，并得到两个输出
        for scale in scales:
            hs=int(origin_h*scale)
            ws=int(origin_w*scale)
            scale_img=cv2.resize(copy_img,(ws,hs))
            inputs=scale_img.reshape(1,*scale_img.shape)#****************************************
            output=self.Pnet.predict(inputs)
            out.append(output)#将这一张图片的所有比例大小的图片经过pnet后的两个输出值加入列表
        image_num=len(scales)
        rectangles=[]#对pnet的输出进行筛选
        for i in range(image_num):
            cls_prob=out[i][0][0][:,:,1]#[i]第i个缩放比率图片，[0][0]classifier，[:,:,1]有人脸的概率的那一层特征
            roi=out[i][1][0]#[i]第i个缩放比率图片，[1][0]bbox_regress，对应的位置
            #取出每个缩放后经过pnet的图片长宽
            out_h,out_w=cls_prob.shape#每个缩放后图片的classifier输出的第二个特征层（有人脸的概率）的尺寸
            out_side=max(out_h,out_w)
            print(cls_prob.shape)
            #解码
            rectangle=detect_face_12net(cls_prob,roi,out_side,1/scales[i],origin_w,origin_h,threshold[0])#1/scales[i]是pnet输入图像的尺寸比例，origin是真是原图尺寸
            rectangles.extend(rectangle)
        rectangles = NMS(rectangles, 0.7)

        if len(rectangles) == 0:
            return rectangles
        #   Rnet部分稍微精确计算人脸框
        predict_24_batch = []
        for rectangle in rectangles:
            crop_img = copy_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])]#将pnet处理完的框裁剪下来
            scale_img = cv2.resize(crop_img,(24, 24))#Rnet网络要求输入图片的大小是（24，24）
            predict_24_batch.append(scale_img)
        predict_24_batch = np.array(predict_24_batch)
        out = self.Rnet.predict(predict_24_batch)#这些裁剪下来的框输入到Rnet
        cls_prob = out[0]
        cls_prob = np.array(cls_prob)#rnet输出的有人脸的概率值
        roi_prob = out[1]
        roi_prob = np.array(roi_prob)#rnet输出的坐标调整参数
        rectangles = filter_face_24net(cls_prob, roi_prob, rectangles, origin_w, origin_h, threshold[1])#rectangles,（Pnet出入的图像尺寸），origin_w_h是原图尺寸
        if len(rectangles) == 0:
            return rectangles
        #   onet部分计算人脸框
        predict_batch = []
        for rectangle in rectangles:
            crop_img = copy_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])]
            scale_img = cv2.resize(crop_img, (48, 48))
            predict_batch.append(scale_img)

        predict_batch = np.array(predict_batch)
        output = self.Onet.predict(predict_batch)#把rnet的结果裁剪下来调整成48，48，输入到Onet中
        cls_prob = output[0]#人脸概率
        roi_prob = output[1]#框的调整参数
        pts_prob = output[2]#人脸的五个点的位置坐标

        rectangles = filter_face_48net(cls_prob, roi_prob, pts_prob, rectangles, origin_w, origin_h, threshold[2])

        return rectangles

4.根据与训练权重与模型结构进行实际检测

#根据搭建的网络和与训练参数进行预测
img=cv2.imread('img/timg.jpg')
model=mtcnn()
threshold=[0.5,0.6,0.7]
rectangles=model.detectFace(img,threshold)
draw=img.copy()
for rectangle in rectangles:
    if rectangle is not None:
        w=int(rectangle[2])-int(rectangle[0])
        h=int(rectangle[3])-int(rectangle[1])
        padding_h=0.01*h
        padding_w=0.02*h#画笔的粗细
        crop_img=img[int(rectangle[1]+padding_h):int(rectangle[3]-padding_h), int(rectangle[0]-padding_w):int(rectangle[2]+padding_w)]
        if crop_img is None:
            continue
        if crop_img.shape[0]<0 or crop_img.shape[1]<0:
            continue
        cv2.rectangle(draw,(int(rectangle[0]), int(rectangle[1])), (int(rectangle[2]), int(rectangle[3])), (255, 0, 0), 1)
        for i in range(5,15,2):#画出五个人脸点
            cv2.circle(draw, (int(rectangle[i + 0]), int(rectangle[i + 1])), 2, (0, 255, 0))
cv2.imwrite('img/out.jpg',draw)
cv2.imshow('test',draw)
c=cv2.waitKey(0)