基于MTCNN+arcface的人脸检测和人脸识别

置顶老于找工作

已于 2022-05-13 10:40:54 修改

阅读量5.2k

点赞数

文章标签： pytorch 深度学习神经网络

于 2022-04-21 13:12:08 首次发布

本文链接：https://blog.csdn.net/weixin_46611502/article/details/124318670

版权

本文介绍了一种基于PyTorch的MTCNN和ArcFace的人脸识别系统，包括人脸检测和识别两个步骤。通过PNet、RNet、ONet三层神经网络实现从图像中定位人脸，并利用ArcFace模型进行精确识别。程序在复杂环境下表现出一定的鲁棒性，具有较高的识别成功率和较低的误识别率。

摘要由CSDN通过智能技术生成

有些时候，你真的为一些人感到脸盲。比如下面三位。
请添加图片描述

请添加图片描述
在这里插入图片描述

他们是谁？？？？？？
不过我们可以借助神经网络来实现人脸识别。
人脸识别主要分为两个步骤，人脸检测（face-detective)和人脸识别（face—recognition）前一步告诉你怎么在一张大图中找到他的脸，第二步用找到的人脸和数据库去对比确定是谁。
本文采用pytorch版本的MTCNN进行人脸检测，arcface进行人脸识别。本程序在较为严苛的阈值要求且在复杂环境下，仍能保证一定的鲁棒性。
下面是三人的识别效果图。
请添加图片描述
在这里插入图片描述

废话不说直接上代码。
代码头没啥好讲的

import argparse
from utils.utils import generate_bbox, py_nms, convert_to_square
from utils.utils import pad, calibrate_box, processed_image
from arc_face import *
from torch.nn import DataParallel
import os
import cv2
import numpy as np
parser = argparse.ArgumentParser()
parser.add_argument('--model_path', type=str, default='infer_models_weights',      help='PNet、RNet、ONet三个模型文件存在的文件夹路径')
args = parser.parse_args()
device = torch.device("cuda")

获取P R N 三个模型

# 获取P模型
pnet = torch.jit.load(os.path.join(args.model_path, 'PNet.pth'))
pnet.to(device)
softmax_p = torch.nn.Softmax(dim=0)
pnet.eval()

# 获取R模型
rnet = torch.jit.load(os.path.join(args.model_path, 'RNet.pth'))
rnet.to(device)
softmax_r = torch.nn.Softmax(dim=-1)
rnet.eval()

# 获取R模型
onet = torch.jit.load(os.path.join(args.model_path, 'ONet.pth'))
onet.to(device)
softmax_o = torch.nn.Softmax(dim=-1)
onet.eval()

使用prn 预测并获取人脸识别结果，boxes_c为人脸框坐标

# 使用PNet模型预测
def predict_pnet(infer_data):
    # 添加待预测的图片
    infer_data = torch.tensor(infer_data, dtype=torch.float32, device=device)
    infer_data = torch.unsqueeze(infer_data, dim=0)
    # 执行预测
    cls_prob, bbox_pred, _ = pnet(infer_data)
    cls_prob = torch.squeeze(cls_prob)
    cls_prob = softmax_p(cls_prob)
    bbox_pred = torch.squeeze(bbox_pred)
    return cls_prob.detach().cpu().numpy(), bbox_pred.detach().cpu().numpy()
# 使用RNet模型预测
def predict_rnet(infer_data):
    # 添加待预测的图片
    infer_data = torch.tensor(infer_data, dtype=torch.float32, device=device)
    # 执行预测
    cls_prob, bbox_pred, _ = rnet(infer_data)
    cls_prob = softmax_r(cls_prob)
    return cls_prob.detach().cpu().numpy(), bbox_pred.detach().cpu().numpy()
# 使用ONet模型预测
def predict_onet(infer_data):
    # 添加待预测的图片
    infer_data = torch.tensor(infer_data, dtype=torch.float32, device=device)
    # 执行预测
    cls_prob, bbox_pred, landmark_pred = onet(infer_data)
    cls_prob = softmax_o(cls_prob)
    return cls_prob.detach().cpu().numpy(), bbox_pred.detach().cpu().numpy(), landmark_pred.detach().cpu().numpy()
# 获取PNet网络输出结果
def detect_pnet(im, min_face_size, scale_factor, thresh):
    """通过pnet筛选box和landmark
    参数：
      im:输入图像[h,2,3]
    """
    net_size = 12
    # 人脸和输入图像的比率
    current_scale = float(net_size) / min_face_size
    im_resized = processed_image(im, current_scale)
    _, current_height, current_width = im_resized.shape
    all_boxes = list()
    # 图像金字塔
    while min(current_height, current_width) > net_size:
        # 类别和box
        cls_cls_map, reg = predict_pnet(im_resized)
        boxes = generate_bbox(cls_cls_map[1, :, :], reg, current_scale, thresh)
        current_scale *= scale_factor  # 继续缩小图像做金字塔
        im_resized = processed_image(im, current_scale)
        _, current_height, current_width = im_resized.shape
        if boxes.size == 0:
            continue
        # 非极大值抑制留下重复低的box
        keep = py_nms(boxes[:, :5], 0.5, mode='Union')
        boxes = boxes[keep]
        all_boxes.append(boxes)
    if len(all_boxes) == 0:
        return None
    all_boxes = np.vstack(all_boxes)
    # 将金字塔之后的box也进行非极大值抑制
    keep = py_nms(all_boxes[:, 0:5], 0.7, mode='Union')
    all_boxes = all_boxes[keep]
    # box的长宽
    bbw = all_boxes[:, 2] - all_boxes[:, 0] + 1
    bbh = all_boxes[:, 3] - all_boxes[:, 1] + 1
    # 对应原图的box坐标和分数
    boxes_c = np.vstack([all_boxes[:, 0] + all_boxes[:, 5] * bbw,
                         all_boxes[:, 1] + all_boxes[:, 6] * bbh,
                         all_boxes[:, 2] + all_boxes[:, 7] * bbw,
                         all_boxes[:, 3] + all_boxes[:, 8] * bbh,
                         all_boxes[:, 4]])
    boxes_c = boxes_c.T
    del all_boxes
    return boxes_c
# 获取RNet网络输出结果
def detect_rnet(im, dets, thresh):
    h, w, c = im.shape
    # 将pnet的box变成包含它的正方形，可以避免信息损失
    dets = convert_to_square(dets)
    dets[:, 0:4] = np.round(dets[:, 0:4])
    # 调整超出图像的box
    [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(dets, w, h)
    delete_size = np.ones_like(tmpw) * 20
    ones = np.ones_like(tmpw)
    zeros = np.zeros_like(tmpw)
    num_boxes = np.sum(np.where((np.minimum(tmpw, tmph) >= delete_size), ones, zeros))
    cropped_ims = np.zeros((num_boxes, 3, 24, 24), dtype=np.float32)
    for i in range(int(num_boxes)):
        # 将pnet生成的box相对与原图进行裁剪，超出部分用0补
        if tmph[i] < 20 or tmpw[i] < 20:
            continue
        tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
        try:
            tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :]
            img = cv2.resize(tmp, (24, 24), interpolation=cv2.INTER_LINEAR)
            img = img.transpose((2, 0, 1))
            img = (img - 127.5) / 128
            cropped_ims[i, :, :, :] = img
        except:
            continue
    cls_scores, reg = predict_rnet(cropped_ims)
    cls_scores = cls_scores[:, 1]
    keep_inds = np.where(cls_scores > thresh)[0]
    if len(keep_inds) > 0:
        boxes = dets[keep_inds]
        boxes[:, 4] = cls_scores[keep_inds]
        reg = reg[keep_inds]
    else:
        return None
    keep = py_nms(boxes, 0.6, mode='Union')
    boxes = boxes[keep]
    # 对pnet截取的图像的坐标进行校准，生成rnet的人脸框对于原图的绝对坐标
    boxes_c = calibrate_box(boxes, reg[keep])
    return boxes_c
# 获取ONet模型预测结果
def detect_onet(im, dets, thresh):
    """将onet的选框继续筛选基本和rnet差不多但多返回了landmark"""
    h, w, c = im.shape
    dets = convert_to_square(dets)
    dets[:, 0:4] = np.round(dets[:, 0:4])
    [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(dets, w, h)
    num_boxes = dets.shape[0]
    cropped_ims = np.zeros((num_boxes, 3, 48, 48), dtype=np.float32)
    for i in range(num_boxes):
        tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
        tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :]
        img = cv2.resize(tmp, (48, 48), interpolation=cv2.INTER_LINEAR)
        img = img.transpose((2, 0, 1))
        img = (img - 127.5) / 128
        cropped_ims[i, :, :, :] = img
    cls_scores, reg, landmark = predict_onet(cropped_ims)
    cls_scores = cls_scores[:, 1]
    keep_inds = np.where(cls_scores > thresh)[0]
    if len(keep_inds) > 0:
        boxes = dets[keep_inds]
        boxes[:, 4] = cls_scores[keep_inds]
        reg = reg[keep_inds]
        landmark = landmark[keep_inds]
    else:
        return None, None
    w = boxes[:, 2] - boxes[:, 0] + 1
    h = boxes[:, 3] - boxes[:, 1] + 1
    landmark[:, 0::2] = (np.tile(w, (5, 1)) * landmark[:, 0::2].T + np.tile(boxes[:, 0], (5, 1)) - 1).T
    landmark[:, 1::2] = (np.tile(h, (5, 1)) * landmark[:, 1::2].T + np.tile(boxes[:, 1], (5, 1)) - 1).T
    boxes_c = calibrate_box(boxes, reg)
    keep = py_nms(boxes_c, 0.6, mode='Minimum')
    boxes_c = boxes_c[keep]
    landmark = landmark[keep]
    return boxes_c, landmark

人脸检测调用程序

# 人脸检测
def face_detective(im):
    # 调用第一个模型预测
    boxes_c = detect_pnet(im, 20, 0.79, 0.9)
    if boxes_c is None:
        return None, None
    # 调用第二个模型预测
    boxes_c = detect_rnet(im, boxes_c, 0.6)
    if boxes_c is None:
        return None, None
    # 调用第三个模型预测
    boxes_c, landmark = detect_onet(im, boxes_c, 0.7)
    if boxes_c is None:
        return None, None
    return boxes_c, landmark

人脸特征提取程序

def load_image(img_path):
    image = cv2.imread(img_path, 0)
    if image is None:
        return None
    image = cv2.resize(image,(128,128))
    image = np.dstack((image, np.fliplr(image)))
    image = image.transpose((2, 0, 1))
    image = image[:, np.newaxis, :, :]
    image = image.astype(np.float32, copy=False)
    image -= 127.5
    image /= 127.5
    return image
def get_featuresdict(model, dir):
    list1 = os.listdir(dir)
    person_dict = {}
    for i,each in enumerate(list1):
        image = load_image(f"pic/{each}")
        data = torch.from_numpy(image)
        data = data.to(torch.device("cuda"))
        output = model(data)  # 获取特征
        output = output.data.cpu().numpy()
        # 获取不重复图片 并分组
        fe_1 = output[0]
        fe_2 = output[1]
        feature = np.hstack((fe_1, fe_2))
        person_dict[each] = feature
    return person_dict

比对两个特征的余弦距离
和
画人脸框标注人名

def cosin_metric(x1, x2):
    #计算余弦距离
    return np.dot(x1, x2) / (np.linalg.norm(x1) * np.linalg.norm(x2))
def draw_face(img, boxes_c,label):
    corpbbox = [int(boxes_c[0]), int(boxes_c[1]), int(boxes_c[2]), int(boxes_c[3])]
    # 画人脸框
    cv2.rectangle(img, (corpbbox[0], corpbbox[1]),
                  (corpbbox[2], corpbbox[3]), (255, 0, 0), 2)
    # 填写识别名字
    cv2.putText(img, label,
                (corpbbox[0], corpbbox[1] - 2),
                cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 2)

人脸识别程序，返回人名

# 人脸识别
def face_recognition(img):
    img0 = img
    boxes_c, landmarks = face_detective(img)
    label2= "none"
    if boxes_c is not None:
        for i, det in enumerate(boxes_c):
            det[det < 0] = 0  # 坐标会有负值，一律给0
            face_img = img[int(det[1]):int(det[3]), int(det[0]):int(det[2])]
            face_img = cv2.resize(face_img, (128, 128))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2GRAY)
            face_img = np.dstack((face_img, np.fliplr(face_img)))
            face_img = face_img.transpose((2, 0, 1))
            face_img = face_img[:, np.newaxis, :, :]
            face_img = face_img.astype(np.float32, copy=False)
            face_img -= 127.5
            face_img /= 127.5
            face_data = torch.from_numpy(face_img)
            face_data = face_data.to(device)
            _output = arcface_model(face_data)  # 获取特征
            _output = _output.data.cpu().numpy()
            fe_1 = _output[0]
            fe_2 = _output[1]
            _feature = np.hstack((fe_1, fe_2))
            label = "none"
            list3 = os.listdir(dir)
            max_f = 0
            t = 0
            for i, each in enumerate(list3):
                t = cosin_metric(features[each], _feature)
                if t > max_f:
                    max_f = t
                    max_n = each
                if (max_f > 0.45):#门限阈值
                    label = max_n[:-4]
            print('可信度:'+str(max_f))
            draw_face(img0, det, label)
            if label!= "none":
                label2=label
    return (img0,label2)

图像旋转程序，用来矫正人脸歪斜

# 旋转angle角度，缺失背景白色（255, 255, 255）填充
def rotate_bound_white_bg(image, angle):
    (h, w) = image.shape[:2]
    (cX, cY) = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
    cos = np.abs(M[0, 0])
    sin = np.abs(M[0, 1])
    nW = int((h * sin) + (w * cos))
    nH = int((h * cos) + (w * sin))
    M[0, 2] += (nW / 2) - cX
    M[1, 2] += (nH / 2) - cY
    return cv2.warpAffine(image, M, (nW, nH), borderValue=(255, 255, 255))

主程序，支持摄像头识别和文件夹内的图片识别。使用时，
source为空时启动摄像头识别，输入文件名时，启动图片识别，并把结果输出到output文件夹内。

if __name__ == '__main__':
    save_img = False
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    dir = "pic"#图片库
    arcface_model = resnet_face18(False)
    out='output'#输出库
    arcface_model = DataParallel(arcface_model)
    arcface_model.load_state_dict(torch.load(r'infer_models_weights/resnet18_110.pth'), strict=False)
    arcface_model.to(device).eval()
    features = get_featuresdict(arcface_model, dir)
    vid_path, vid_writer = None, None
    source='pubajia'#空为摄像头采集，可填文件夹
    if source =='':
        cap = cv2.VideoCapture(0)
        while True:
            ret, img = cap.read()
            if ret:
                img0,label = face_recognition(img)#识别人脸
                cv2.imshow('face rec', img0)
                cv2.waitKey(100)
    else:
        n=0
        m=0
        error=0
        for filename in os.listdir(source):
            print('当前帧:' +str(m+1))
            img = cv2.imread(source+'/'+filename)
            img0,label=face_recognition(img)
            if label== "none":
                print('当前空标帧:'+str(n))
                img0 = rotate_bound_white_bg(img, 15)#识别识别旋转15°
                img0, label = face_recognition(img0)
                if label== "none":
                    print('第二次空标帧:'+str(n))
                    img0 = rotate_bound_white_bg(img, -15)#识别识别反向旋转15°
                    img0, label = face_recognition(img)
            if label==source:
                n=n+1
                print('当前有效帧' + str(n))
            elif label!="none":
                error=error+1
                print('当前错误帧' + str(error))
            cv2.imshow('face rec', img0)
            cv2.waitKey(100)
            m = m + 1
            cv2.imwrite(out + '/' + str(m) + '.jpg', img0)
        print('识别成功率' +str( n/m))
        print('张冠李戴率' + str(error/m))