**mtcnn是构建图像金字塔来进行目标检测>>>>>为什么只有按原图缩小,我觉得也可以搞按比例放大,将这些放大缩小图一起用网络来提取特征,会不会效果更好? **
1.网络结构
from tensorflow.keras.layers import Conv2D,Input,MaxPool2D,Reshape,Activation,Flatten,Dense,Permute,PReLU
from tensorflow.keras.models import Model,Sequential
import numpy as np
import cv2
def create_Pnet(weight_path):
inputs=Input(shape=[None,None,3])
x=Conv2D(10,(3,3),strides=1,padding='valid',name='conv1')(inputs)
x=PReLU(shared_axes=[1,2],name='PReLU1')(x)
x=MaxPool2D(pool_size=2)(x)
x=Conv2D(16,(3,3),strides=1,padding='valid',name='conv2')(x)
x=PReLU(shared_axes=[1,2],name='PReLU2')(x)
x=Conv2D(32,(3,3),strides=1,padding='valid',name='conv3')(x)
x=PReLU(shared_axes=[1,2],name='PReLU3')(x)
classifier=Conv2D(2,(1,1),activation='softmax',name='conv4-1')(x)
bbox_regress=Conv2D(4,(1,1),name='conv4-2')(x)
model=Model([inputs],[classifier,bbox_regress])
model.load_weights(weight_path,by_name=True)
return model
def create_Rnet(weight_path):
inputs=Input(shape=[24,24,3])
x=Conv2D(28,(3,3),strides=1,padding='valid',name='conv1')(inputs)
x=PReLU(shared_axes=[1,2],name='prelu1')(x)
x=MaxPool2D(pool_size=3,strides=2,padding='same')(x)#11,11,28
x=Conv2D(48,(3,3),strides=1,padding='valid',name='conv2')(x)
x=PReLU(shared_axes=[1,2],name='prelu2')(x)
x=MaxPool2D(pool_size=3,strides=2)(x)#4,4,48
x=Conv2D(64,(2,2),strides=1,padding='valid',name='conv3')(x)
x=PReLU(shared_axes=[1,2],name='prelu3')(x)#3,3,64
x=Permute((3,2,1))(x)#3,3,64》》》》46,3,3
x=Flatten()(x)#576
x=Dense(128,name='conv4')(x)
x=PReLU(name='prelu4')(x)#128
classifier=Dense(2,activation='softmax',name='conv5-1')(x)
bbox_regress=Dense(4,name='conv5-2')(x)
model=Model([inputs],[classifier,bbox_regress])
model.load_weights(weight_path,by_name=True)
return model
def create_Onet(weight_path):
inputs=Input(shape=[48,48,3])
x=Conv2D(32,(3,3),strides=1,padding='valid',name='conv1')(inputs)
x=PReLU(shared_axes=[1,2],name='prelu1')(x)
x=MaxPool2D(pool_size=3,strides=2,padding='same')(x)#23,23,32
x=Conv2D(64,(3,3),strides=1,padding='valid',name='conv2')(x)
x=PReLU(shared_axes=[1,2],name='prelu2')(x)
x=MaxPool2D(pool_size=3,strides=2)(x)#10,10,64
x=Conv2D(64,(3,3),strides=1,padding='valid',name='conv3')(x)
x=PReLU(shared_axes=[1,2],name='prelu3')(x)
x=MaxPool2D(pool_size=2)(x)#4,4,64
x=Conv2D(128,(2,2),strides=1,padding='valid',name='conv4')(x)
x=PReLU(shared_axes=[1,2],name='prelu4')(x)#3,3,128
x=Permute((3,2,1))(x)#128,3,3
x=Flatten()(x)#1152
x=Dense(256,name='conv5')(x)
x=PReLU(name='prelu5')(x)#256
classifier=Dense(2,activation='softmax',name='conv6-1')(x)
bbox_regress=Dense(4,name='conv6-2')(x)
landmark_regress=Dense(10,name='conv6-3')(x)
model=Model([inputs],[classifier,bbox_regress,landmark_regress])
model.load_weights(weight_path,by_name=True)
return model
2.网络过程
2.1构建图像金字塔输入到Pnet
#构建图像金字塔,计算图像的缩放比例
def calculateScales(img):
copy_img=img.copy()
h,w,_=copy_img.shape
#计算第一次缩放比例,图像有一个边=500,这样可以使得图片不会太大也不会太小
pr_scale=1.0
if min(w,h)>500:
pre_scale=500.0/min(h,w)
w=int(w*pr_scale)
h=int(h*pr_scale)#这时最小边长=500
elif max(w,h)<500:
pre_scale=500.0/max(h,w)
w=int(w*pr_scale)
h=int(h*pr_scale)#这时,最大边长=500
scales=[]
factor=0.709
factor_count=0
minl=min(h,w)
#使得图像缩放到最小边不小于12,就结束
while minl>=12:
scales.append(pr_scale*pow(factor,factor_count))#scales=[pre_scal*【1,0.709,0.709*0.709,0.709*0.709*0.709,...】]
minl*=factor
factor_count+=1
return scales
# 将长方形调整为正方形
def rect2square(rectangles):
w = rectangles[:,2] - rectangles[:,0]
h = rectangles[:,3] - rectangles[:,1]
l = np.maximum(w,h).T
rectangles[:,0] = rectangles[:,0] + w*0.5 - l*0.5#左上H
rectangles[:,1] = rectangles[:,1] + h*0.5 - l*0.5 #左上w
rectangles[:,2:4] = rectangles[:,0:2] + np.repeat([l], 2, axis = 0).T #右下=左上+MAX边长
return rectangles
# 非极大抑制
#-------------------------------------#
def NMS(rectangles,threshold):
if len(rectangles)==0:
return rectangles
boxes = np.array(rectangles)
x1 = boxes[:,0]#左上
y1 = boxes[:,1]
x2 = boxes[:,2]#右下
y2 = boxes[:,3]
s = boxes[:,4]#置信度
area = np.multiply(x2-x1+1, y2-y1+1)#面积
I = np.array(s.argsort())#按置信度排序
pick = []
while len(I)>0:
xx1 = np.maximum(x1[I[-1]], x1[I[0:-1]])
yy1 = np.maximum(y1[I[-1]], y1[I[0:-1]])
xx2 = np.minimum(x2[I[-1]], x2[I[0:-1]])#找出框最小的右下脚的点
yy2 = np.minimum(y2[I[-1]], y2[I[0:-1]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
o = inter / (area[I[-1]] + area[I[0:-1]] - inter)
pick.append(I[-1])
I = I[np.where(o<=threshold)[0]]
result_rectangle = boxes[pick].tolist()
return result_rectangle
2.2对Pnet的输出进行处理以输入到Rnet
#对Pnet的输出进行筛选
def detect_face_12net(cls_prob,roi,_out_side,scale,width,height,threshold):
cls_prob=np.swapaxes(cls_prob,0,1)
roi=np.swapaxes(roi,0,2)
stride=0
if _out_side!=1:
stride=float(2*_out_side-1)/(_out_side-1)
(x,y)=np.where(cls_prob>=threshold)#找到人脸概率大于阈值的那个位置
boundingbox=np.array([x,y]).T
#找到对应原图的位置
bb1=np.fix((stride*(boundingbox)+0)*scale)
bb2=np.fix((stride*(boundingbox)+11)*scale)
boundingbox=np.concatenate((bb1,bb2),axis=1)
dx1=roi[0][x,y]#人脸位置大于阈值的框的左上(x,y)值
dx2=roi[1][x,y]#
dx3=roi[2][x,y]#右上
dx4=roi[3][x,y]
score=np.array([cls_prob[x,y]]).T
offset=np.array([dx1,dx2,dx3,dx4]).T
boundingbox=boundingbox+offset*12.0*scale
rectangles=np.concatenate((boundingbox,score),axis=1)
rectangles=rect2square(rectangles)
pick=[]
for i in range(len(rectangles)):
x1 = int(max(0 ,rectangles[i][0]))
y1 = int(max(0 ,rectangles[i][1]))
x2 = int(min(width ,rectangles[i][2]))
y2 = int(min(height,rectangles[i][3]))
sc = rectangles[i][4]
if x2>x1 and y2>y1:
pick.append([x1,y1,x2,y2,sc])
return NMS(pick,0.3)
2.3对Rnet的输入处理以输入到Onet
#对rnet的输出进行修剪
def filter_face_24net(cls_prob,roi,rectangles,width,height,threshold):
prob = cls_prob[:,1]#取出所有框的概率值
pick = np.where(prob>=threshold)#大于阈值的拿出来
rectangles = np.array(rectangles)
x1 = rectangles[pick,0]#rnet输入的图像尺寸
y1 = rectangles[pick,1]
x2 = rectangles[pick,2]
y2 = rectangles[pick,3]
sc = np.array([prob[pick]]).T
dx1 = roi[pick,0]#rnet网络输出的框的调整参数
dx2 = roi[pick,1]
dx3 = roi[pick,2]
dx4 = roi[pick,3]
w = x2-x1
h = y2-y1
x1 = np.array([(x1+dx1*w)[0]]).T#在rnet输入的图像上对框进行调整
y1 = np.array([(y1+dx2*h)[0]]).T
x2 = np.array([(x2+dx3*w)[0]]).T
y2 = np.array([(y2+dx4*h)[0]]).T
rectangles = np.concatenate((x1,y1,x2,y2,sc),axis=1)
rectangles = rect2square(rectangles)
pick = []
for i in range(len(rectangles)):#在原图的位置上调整框
x1 = int(max(0 ,rectangles[i][0]))
y1 = int(max(0 ,rectangles[i][1]))
x2 = int(min(width ,rectangles[i][2]))
y2 = int(min(height,rectangles[i][3]))
sc = rectangles[i][4]
if x2>x1 and y2>y1:
pick.append([x1,y1,x2,y2,sc])
return NMS(pick,0.3)
2.4对Onet的输出进行处理
# 对onet处理后的结果进行处理
def filter_face_48net(cls_prob,roi,pts,rectangles,width,height,threshold):
prob = cls_prob[:,1]
pick = np.where(prob>=threshold)
rectangles = np.array(rectangles)
x1 = rectangles[pick,0]
y1 = rectangles[pick,1]
x2 = rectangles[pick,2]
y2 = rectangles[pick,3]
sc = np.array([prob[pick]]).T
dx1 = roi[pick,0]#框的调整参数
dx2 = roi[pick,1]
dx3 = roi[pick,2]
dx4 = roi[pick,3]
w = x2-x1
h = y2-y1
pts0= np.array([(w*pts[pick,0]+x1)[0]]).T#五个点在onet输入图像上的位置
pts1= np.array([(h*pts[pick,5]+y1)[0]]).T
pts2= np.array([(w*pts[pick,1]+x1)[0]]).T
pts3= np.array([(h*pts[pick,6]+y1)[0]]).T
pts4= np.array([(w*pts[pick,2]+x1)[0]]).T
pts5= np.array([(h*pts[pick,7]+y1)[0]]).T
pts6= np.array([(w*pts[pick,3]+x1)[0]]).T
pts7= np.array([(h*pts[pick,8]+y1)[0]]).T
pts8= np.array([(w*pts[pick,4]+x1)[0]]).T
pts9= np.array([(h*pts[pick,9]+y1)[0]]).T
x1 = np.array([(x1+dx1*w)[0]]).T#框在onet输入图像上的位置
y1 = np.array([(y1+dx2*h)[0]]).T
x2 = np.array([(x2+dx3*w)[0]]).T
y2 = np.array([(y2+dx4*h)[0]]).T
rectangles=np.concatenate((x1,y1,x2,y2,sc,pts0,pts1,pts2,pts3,pts4,pts5,pts6,pts7,pts8,pts9),axis=1)
pick = []
for i in range(len(rectangles)):#调整到原图上
x1 = int(max(0 ,rectangles[i][0]))
y1 = int(max(0 ,rectangles[i][1]))
x2 = int(min(width ,rectangles[i][2]))
y2 = int(min(height,rectangles[i][3]))
if x2>x1 and y2>y1:
pick.append([x1,y1,x2,y2,rectangles[i][4],
rectangles[i][5],rectangles[i][6],rectangles[i][7],rectangles[i][8],rectangles[i][9],rectangles[i][10],rectangles[i][11],rectangles[i][12],rectangles[i][13],rectangles[i][14]])
return NMS(pick,0.3)
3.构建网络输入到输出的mtcnn类
class mtcnn():
def __init__(self):
self.Pnet=create_Pnet('model_data/pnet.h5')
self.Rnet=create_Rnet('model_data/rnet.h5')
self.Onet=create_Onet('model_data/onet.h5')
def detectFace(self,img,threshold):
copy_img=(img.copy()-127.5)/127.5
origin_h,origin_w,_=copy_img.shape
scales=calculateScales(img)#计算这张图片的所有缩放比例值
out=[]#将缩放后每张图片输入pnet,并得到两个输出
for scale in scales:
hs=int(origin_h*scale)
ws=int(origin_w*scale)
scale_img=cv2.resize(copy_img,(ws,hs))
inputs=scale_img.reshape(1,*scale_img.shape)#****************************************
output=self.Pnet.predict(inputs)
out.append(output)#将这一张图片的所有比例大小的图片经过pnet后的两个输出值加入列表
image_num=len(scales)
rectangles=[]#对pnet的输出进行筛选
for i in range(image_num):
cls_prob=out[i][0][0][:,:,1]#[i]第i个缩放比率图片,[0][0]classifier,[:,:,1]有人脸的概率的那一层特征
roi=out[i][1][0]#[i]第i个缩放比率图片,[1][0]bbox_regress,对应的位置
#取出每个缩放后经过pnet的图片长宽
out_h,out_w=cls_prob.shape#每个缩放后图片的classifier输出的第二个特征层(有人脸的概率)的尺寸
out_side=max(out_h,out_w)
print(cls_prob.shape)
#解码
rectangle=detect_face_12net(cls_prob,roi,out_side,1/scales[i],origin_w,origin_h,threshold[0])#1/scales[i]是pnet输入图像的尺寸比例,origin是真是原图尺寸
rectangles.extend(rectangle)
rectangles = NMS(rectangles, 0.7)
if len(rectangles) == 0:
return rectangles
# Rnet部分稍微精确计算人脸框
predict_24_batch = []
for rectangle in rectangles:
crop_img = copy_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])]#将pnet处理完的框裁剪下来
scale_img = cv2.resize(crop_img,(24, 24))#Rnet网络要求输入图片的大小是(24,24)
predict_24_batch.append(scale_img)
predict_24_batch = np.array(predict_24_batch)
out = self.Rnet.predict(predict_24_batch)#这些裁剪下来的框输入到Rnet
cls_prob = out[0]
cls_prob = np.array(cls_prob)#rnet输出的有人脸的概率值
roi_prob = out[1]
roi_prob = np.array(roi_prob)#rnet输出的坐标调整参数
rectangles = filter_face_24net(cls_prob, roi_prob, rectangles, origin_w, origin_h, threshold[1])#rectangles,(Pnet出入的图像尺寸),origin_w_h是原图尺寸
if len(rectangles) == 0:
return rectangles
# onet部分计算人脸框
predict_batch = []
for rectangle in rectangles:
crop_img = copy_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])]
scale_img = cv2.resize(crop_img, (48, 48))
predict_batch.append(scale_img)
predict_batch = np.array(predict_batch)
output = self.Onet.predict(predict_batch)#把rnet的结果裁剪下来调整成48,48,输入到Onet中
cls_prob = output[0]#人脸概率
roi_prob = output[1]#框的调整参数
pts_prob = output[2]#人脸的五个点的位置坐标
rectangles = filter_face_48net(cls_prob, roi_prob, pts_prob, rectangles, origin_w, origin_h, threshold[2])
return rectangles
4.根据与训练权重与模型结构进行实际检测
#根据搭建的网络和与训练参数进行预测
img=cv2.imread('img/timg.jpg')
model=mtcnn()
threshold=[0.5,0.6,0.7]
rectangles=model.detectFace(img,threshold)
draw=img.copy()
for rectangle in rectangles:
if rectangle is not None:
w=int(rectangle[2])-int(rectangle[0])
h=int(rectangle[3])-int(rectangle[1])
padding_h=0.01*h
padding_w=0.02*h#画笔的粗细
crop_img=img[int(rectangle[1]+padding_h):int(rectangle[3]-padding_h), int(rectangle[0]-padding_w):int(rectangle[2]+padding_w)]
if crop_img is None:
continue
if crop_img.shape[0]<0 or crop_img.shape[1]<0:
continue
cv2.rectangle(draw,(int(rectangle[0]), int(rectangle[1])), (int(rectangle[2]), int(rectangle[3])), (255, 0, 0), 1)
for i in range(5,15,2):#画出五个人脸点
cv2.circle(draw, (int(rectangle[i + 0]), int(rectangle[i + 1])), 2, (0, 255, 0))
cv2.imwrite('img/out.jpg',draw)
cv2.imshow('test',draw)
c=cv2.waitKey(0)
参考:https://blog.csdn.net/weixin_44791964/article/details/103530206