论文:CenterFace: Joint Face Detection and Alignment Using Face as Point
Github:https://github.com/Star-Clouds/CenterFace
论文基于centerNet进行改进,提出了anchor free形式的人脸检测框架,可以同时实现人脸检测+关键点检测。精度和速度都优于主流的MTCNN,Face Box等框架。
主要贡献:
- 提出了anchor free的人脸检测设计,将人脸检测问题转化为关键点估计问题。相比之前的检测算法,该方法的的模型输出的下采样率只为4。
- 基于多任务学习策略,同时学习人脸检测+关键点定位
- 网络结构采用了FPN结构
- 大量的实验表明,速度和精度都空前的好
级联检测器的缺点:
- 推理速度受图片中人脸数目的影响,当人脸数目增多的时候,推理速度也会大大降低。
- 每一个模型都单独训练,训练过程繁琐。非端到端的训练模式,整体精度有限。
网络结构:
网络整体结构采用MobileNetV2结构,MobileNetV2进行了5次下采样,在MobileNetV2的最后一层,增加了3个上采样层。最终输出的大小进行了2次下采样,输出维度为原图的1/4。
Loss函数:
人脸分类loss,
其中,α = 2 ,β = 4
人脸框中心点偏移loss,
人脸框宽,高的loss,
关键点的loss,
整体loss,
实验结果:
推理速度,
FDDB精度,
WIDER FACE 精度,
Onnx推理:
Onnx模型格式,可以方便的使用程序进行op的增删改查操作。包括节点的增加,去除,输入输出维度的修改等。同时,基于onnx runtime的推理可以获得比基于pytorch推理略快的速度。缺点就是整个graph已经固定,不支持动态输入大小。
首先使用change_onnx.py修改作者提供的onnx模型的输入维度,
import onnx
model = onnx.load("../models/onnx/centerface.onnx")
# The model is represented as a protobuf structure and it can be accessed
# using the standard python-for-protobuf methods
# iterate through inputs of the graph
for input in model.graph.input:
print (input.name, end=": ")
# get type of input tensor
tensor_type = input.type.tensor_type
# check if it has a shape:
if (tensor_type.HasField("shape")):
# iterate through dimensions of the shape:
for num,d in enumerate(tensor_type.shape.dim):
# the dimension may have a definite (integer) value or a symbolic identifier or neither:
if (d.HasField("dim_value")):
if num ==0:
d.dim_value = 1
if num ==2:
d.dim_value = 480
if num ==3:
d.dim_value = 640
print (d.dim_value, end=", ") # known dimension
elif (d.HasField("dim_param")):
print (d.dim_param, end=", ") # unknown dimension with symbolic name
else:
print ("?", end=", ") # unknown dimension with no name
else:
print ("unknown rank", end="")
print()
break
onnx.checker.check_model(model)
onnx.save(model, 'out.onnx')
"""
model = onnx.load('models/centerface.onnx')
model.graph.input[0].type.tensor_type.shape.dim[0].dim_param = '?'
model.graph.input[0].type.tensor_type.shape.dim[1].dim_param = '3'
model.graph.input[0].type.tensor_type.shape.dim[2].dim_param = '?'
model.graph.input[0].type.tensor_type.shape.dim[3].dim_param = '?'
onnx.save(model, 'dynamic_model.onnx')
"""
模型另存在out.onnx,和原始的centerface.onnx的输入维度进行对比,
推理代码,centerface.py,
import numpy as np
import cv2
import datetime
import torch
import onnxruntime
import onnx
class CenterFace(object):
def __init__(self, landmarks=True):
self.landmarks = landmarks
self.session = onnxruntime.InferenceSession("out.onnx")
self.inputs = self.session.get_inputs()[0].name
self.outputs = ["537", "538", "539", '540']
self.img_h_new, self.img_w_new, self.scale_h, self.scale_w = 0, 0, 0, 0
def __call__(self, img, height, width, threshold=0.5):
#self.img_h_new, self.img_w_new, self.scale_h, self.scale_w = self.transform(height, width)
self.img_h_new, self.img_w_new, self.scale_h, self.scale_w = 480, 640 , 480/height, 640/width
return self.inference_opencv(img, threshold)
def inference_opencv(self, img, threshold):
begin = datetime.datetime.now()
image = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
image =cv2.resize(image,(self.img_w_new, self.img_h_new))
input_image = np.expand_dims(np.swapaxes(np.swapaxes(image,0,2),1,2),0).astype(np.float32)
heatmap,scale , offset ,lms = self.session.run(None, {self.inputs: input_image})
end = datetime.datetime.now()
print("cpu times = ", end - begin)
return self.postprocess(heatmap, lms, offset, scale, threshold)
def transform(self, h, w):
img_h_new, img_w_new = int(np.ceil(h / 32) * 32), int(np.ceil(w / 32) * 32)
scale_h, scale_w = img_h_new / h, img_w_new / w
return img_h_new, img_w_new, scale_h, scale_w
def postprocess(self, heatmap, lms, offset, scale, threshold):
if self.landmarks:
dets, lms = self.decode(heatmap, scale, offset, lms, (self.img_h_new, self.img_w_new), threshold=threshold)
else:
dets = self.decode(heatmap, scale, offset, None, (self.img_h_new, self.img_w_new), threshold=threshold)
if len(dets) > 0:
dets[:, 0:4:2], dets[:, 1:4:2] = dets[:, 0:4:2] / self.scale_w, dets[:, 1:4:2] / self.scale_h
if self.landmarks:
lms[:, 0:10:2], lms[:, 1:10:2] = lms[:, 0:10:2] / self.scale_w, lms[:, 1:10:2] / self.scale_h
else:
dets = np.empty(shape=[0, 5], dtype=np.float32)
if self.landmarks:
lms = np.empty(shape=[0, 10], dtype=np.float32)
if self.landmarks:
return dets, lms
else:
return dets
def decode(self, heatmap, scale, offset, landmark, size, threshold=0.1):
heatmap = np.squeeze(heatmap)
scale0, scale1 = scale[0, 0, :, :], scale[0, 1, :, :]
offset0, offset1 = offset[0, 0, :, :], offset[0, 1, :, :]
c0, c1 = np.where(heatmap > threshold)
if self.landmarks:
boxes, lms = [], []
else:
boxes = []
if len(c0) > 0:
for i in range(len(c0)):
s0, s1 = np.exp(scale0[c0[i], c1[i]]) * 4, np.exp(scale1[c0[i], c1[i]]) * 4
o0, o1 = offset0[c0[i], c1[i]], offset1[c0[i], c1[i]]
s = heatmap[c0[i], c1[i]]
x1, y1 = max(0, (c1[i] + o1 + 0.5) * 4 - s1 / 2), max(0, (c0[i] + o0 + 0.5) * 4 - s0 / 2)
x1, y1 = min(x1, size[1]), min(y1, size[0])
boxes.append([x1, y1, min(x1 + s1, size[1]), min(y1 + s0, size[0]), s])
if self.landmarks:
lm = []
for j in range(5):
lm.append(landmark[0, j * 2 + 1, c0[i], c1[i]] * s1 + x1)
lm.append(landmark[0, j * 2, c0[i], c1[i]] * s0 + y1)
lms.append(lm)
boxes = np.asarray(boxes, dtype=np.float32)
keep = self.nms(boxes[:, :4], boxes[:, 4], 0.3)
boxes = boxes[keep, :]
if self.landmarks:
lms = np.asarray(lms, dtype=np.float32)
lms = lms[keep, :]
if self.landmarks:
return boxes, lms
else:
return boxes
def nms(self, boxes, scores, nms_thresh):
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = np.argsort(scores)[::-1]
num_detections = boxes.shape[0]
suppressed = np.zeros((num_detections,), dtype=np.bool)
keep = []
for _i in range(num_detections):
i = order[_i]
if suppressed[i]:
continue
keep.append(i)
ix1 = x1[i]
iy1 = y1[i]
ix2 = x2[i]
iy2 = y2[i]
iarea = areas[i]
for _j in range(_i + 1, num_detections):
j = order[_j]
if suppressed[j]:
continue
xx1 = max(ix1, x1[j])
yy1 = max(iy1, y1[j])
xx2 = min(ix2, x2[j])
yy2 = min(iy2, y2[j])
w = max(0, xx2 - xx1 + 1)
h = max(0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (iarea + areas[j] - inter)
if ovr >= nms_thresh:
suppressed[j] = True
return keep
推理代码,demo.py
import cv2
import scipy.io as sio
import os
from centerface import CenterFace
def camera():
cap = cv2.VideoCapture(0)
ret, frame = cap.read()
h, w = frame.shape[:2]
centerface = CenterFace()
while True:
ret, frame = cap.read()
dets, lms = centerface(frame, h, w, threshold=0.35)
for det in dets:
boxes, score = det[:4], det[4]
cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
for lm in lms:
for i in range(0, 5):
cv2.circle(frame, (int(lm[i * 2]), int(lm[i * 2 + 1])), 2, (0, 0, 255), -1)
cv2.imshow('out', frame)
# Press Q on keyboard to stop recording
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
def test_image():
#frame = cv2.imread('000388.jpg')
frame = cv2.imread('test.jpg')
h, w = frame.shape[:2]
landmarks = True
centerface = CenterFace(landmarks=landmarks)
if landmarks:
dets, lms = centerface(frame, h, w, threshold=0.35)
else:
dets = centerface(frame, threshold=0.35)
for det in dets:
boxes, score = det[:4], det[4]
cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
if landmarks:
for lm in lms:
for i in range(0, 5):
cv2.circle(frame, (int(lm[i * 2]), int(lm[i * 2 + 1])), 2, (0, 0, 255), -1)
#cv2.imshow('out', frame)
cv2.imwrite('out.jpg',frame)
#cv2.waitKey(0)
def test_image_tensorrt():
frame = cv2.imread('000388.jpg')
h, w = 480, 640 # must be 480* 640
landmarks = True
centerface = CenterFace(landmarks=landmarks, backend="tensorrt")
if landmarks:
dets, lms = centerface(frame, h, w, threshold=0.35)
else:
dets = centerface(frame, threshold=0.35)
for det in dets:
boxes, score = det[:4], det[4]
cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
if landmarks:
for lm in lms:
for i in range(0, 5):
cv2.circle(frame, (int(lm[i * 2]), int(lm[i * 2 + 1])), 2, (0, 0, 255), -1)
cv2.imshow('out', frame)
cv2.waitKey(0)
def test_widerface():
Path = 'widerface/WIDER_val/images/'
wider_face_mat = sio.loadmat('widerface/wider_face_split/wider_face_val.mat')
event_list = wider_face_mat['event_list']
file_list = wider_face_mat['file_list']
save_path = 'save_out/'
for index, event in enumerate(event_list):
file_list_item = file_list[index][0]
im_dir = event[0][0]
# print(save_path + im_dir)
if not os.path.exists(save_path + im_dir):
os.makedirs(save_path + im_dir)
landmarks = True
centerface = CenterFace(landmarks=landmarks)
for num, file in enumerate(file_list_item):
im_name = file[0][0]
zip_name = '%s/%s.jpg' % (im_dir, im_name)
print(os.path.join(Path, zip_name))
img = cv2.imread(os.path.join(Path, zip_name))
h, w = img.shape[:2]
if landmarks:
dets, lms = centerface(img, h, w, threshold=0.05)
else:
dets = centerface(img, threshold=0.05)
f = open(save_path + im_dir + '/' + im_name + '.txt', 'w')
f.write('{:s}\n'.format('%s/%s.jpg' % (im_dir, im_name)))
f.write('{:d}\n'.format(len(dets)))
for b in dets:
x1, y1, x2, y2, s = b
f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'.format(x1, y1, (x2 - x1 + 1), (y2 - y1 + 1), s))
f.close()
print('event:%d num:%d' % (index + 1, num + 1))
if __name__ == '__main__':
# camera()
test_image()
# test_widerface()
最终效果,
人脸检测小江湖:
自己的测试,时间包括网络推理+后处理时间,face++为服务接口测试,相比不是很公平。
方法 | MTCNN | ultra-face | Retina-Face-mobilenetv1 | Retina-Face-resnet50 | Centerface-mobileNetv2 | face++ |
模型大小 | 2.9M | 1M | 1.8M | 105M | 7.2M | - |
CPU(640*480) Intel(R) Xeon(R) Silver 4216 | 600ms | 200ms | 140ms | 2000ms | 130ms | 10ms |
GPU(640*480) 2080TI | 110ms | 18ms | 38ms | 50ms | 8ms | |
CPU(1280*720) Intel(R) Xeon(R) Silver 4216 | 1000ms | 500ms | 350ms | 3500ms | 300ms | 10ms |
GPU(1280*720) 2080TI | 200ms | 40ms | 100ms | 120ms | 25ms | |
CPU(1920*1080) Intel(R) Xeon(R) Silver 4216 | 1600ms | - | 800ms | 8000ms | 750ms | 10ms |
GPU(1920*1080) 2080TI | 330ms | - | 200ms | 250ms | 50ms | |
精度 | 框略好,5点好 | 框检出少 | 框差,5点差 | 框误检大,5点一般 | 框好,5点非常差 | 框好,点好 |