基于ONNX人脸识别实例(SCRFD/ArcFace)-Python版

Yang.O

已于 2023-01-08 21:53:17 修改

阅读量5.4k

点赞数 4

分类专栏：机器学习文章标签：计算机视觉 insightface 人脸识别

于 2022-11-30 18:37:38 首次发布

本文链接：https://blog.csdn.net/yangowen/article/details/128078481

版权

机器学习专栏收录该内容

6 篇文章

订阅专栏

人脸识别三步：人脸检测、人脸对齐、特征提取。

一、依赖包

numpy==1.18.0
onnxruntime==1.13.1
onnxruntime_directml==1.10.0
opencv_python==4.6.0.66
scikit_image==0.19.3
scikit_learn==1.1.3

二、人脸检测(Face Detection)

from __future__ import division
import datetime
import numpy as np
import onnxruntime
import os.path
import cv2


class SCRFD:
    def __init__(self, model_file, providers=None, options=None, nms_thresh=0.4):
        assert os.path.exists(model_file)

        if providers is None:
            providers = ['CPUExecutionProvider']
        if options is None:
            options = onnxruntime.SessionOptions()

        self.session = onnxruntime.InferenceSession(model_file, providers=providers, sess_options=options)
        input_cfg = self.session.get_inputs()[0]
        input_shape = input_cfg.shape
        input_name = input_cfg.name
        self.input_size = tuple(input_shape[2:4][::-1])
        self.input_name = input_name
        self.nms_thresh = nms_thresh
        self.center_cache = {}

    def forward(self, img, thresh):
        scores_list = []
        bboxes_list = []
        kpss_list = []
        input_size = tuple(img.shape[0:2][::-1])
        blob = cv2.dnn.blobFromImage(img, 1.0 / 128, input_size, (127.5, 127.5, 127.5), swapRB=True)
        net_outs = self.session.run([], {self.input_name: blob})

        input_height = blob.shape[2]
        input_width = blob.shape[3]
        _feat_stride_fpn = [8, 16, 32]
        for idx, stride in enumerate(_feat_stride_fpn):
            scores = net_outs[idx][0]
            bbox_preds = net_outs[idx + 3 * 1][0] * stride
            kps_preds = net_outs[idx + 3 * 2][0] * stride

            height = input_height // stride
            width = input_width // stride
            key = (height, width, stride)
            if key in self.center_cache:
                anchor_centers = self.center_cache[key]
            else:
                anchor_centers = np.stack(np.mgrid[:height, :width][::-1], axis=-1).astype(np.float32)
                anchor_centers = (anchor_centers * stride).reshape((-1, 2))
                anchor_centers = np.stack([anchor_centers] * 2, axis=1).reshape((-1, 2))
                if len(self.center_cache) < 100:
                    self.center_cache[key] = anchor_centers

            pos_inds = np.where(scores >= thresh)[0]
            if len(pos_inds) > 0:
                bboxes = self.distance2bbox(anchor_centers, bbox_preds)
                pos_scores = scores[pos_inds]
                pos_bboxes = bboxes[pos_inds]
                scores_list.append(pos_scores)
                bboxes_list.append(pos_bboxes)
                kpss = self.distance2kps(anchor_centers, kps_preds)
                kpss = kpss.reshape((kpss.shape[0], -1, 2))
                pos_kpss = kpss[pos_inds]
                kpss_list.append(pos_kpss)
        return scores_list, bboxes_list, kpss_list

    def detect(self, img, threshold=0.5, max_num=0, metric='default'):
        input_size = self.input_size
        im_ratio = float(img.shape[0]) / img.shape[1]
        model_ratio = float(input_size[1]) / input_size[0]
        if im_ratio > model_ratio:
            new_height = input_size[1]
            new_width = int(new_height / im_ratio)
        else:
            new_width = input_size[0]
            new_height = int(new_width * im_ratio)
        det_scale = float(new_height) / img.shape[0]
        resized_img = cv2.resize(img, (new_width, new_height))
        det_img = np.zeros((input_size[1], input_size[0], 3), dtype=np.uint8)
        det_img[:new_height, :new_width, :] = resized_img

        scores_list, bboxes_list, kpss_list = self.forward(det_img, threshold)

        if len(scores_list) == 0:
            return np.empty(0), np.empty(0)

        scores = np.vstack(scores_list)
        scores_ravel = scores.ravel()
        order = scores_ravel.argsort()[::-1]
        bboxes = np.vstack(bboxes_list) / det_scale
        kpss = np.vstack(kpss_list) / det_scale
        pre_det = np.hstack((bboxes, scores)).astype(np.float32, copy=False)
        pre_det = pre_det[order, :]
        keep = self.nms(pre_det)
        det = pre_det[keep, :]
        kpss = kpss[order, :, :]
        kpss = kpss[keep, :, :]

        if max_num > 0 and det.shape[0] > max_num:
            area = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1])
            img_center = img.shape[0] // 2, img.shape[1] // 2
            offsets = np.vstack([
                (det[:, 0] + det[:, 2]) / 2 - img_center[1],
                (det[:, 1] + det[:, 3]) / 2 - img_center[0]
            ])
            offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
            if metric == 'max':
                values = area
            else:
                values = area - offset_dist_squared * 2.0  # some extra weight on the centering
            bindex = np.argsort(values)[::-1]  # some extra weight on the centering
            bindex = bindex[0:max_num]
            det = det[bindex, :]
            if kpss is not None:
                kpss = kpss[bindex, :]
        return det, kpss

    def nms(self, dets):
        thresh = self.nms_thresh
        x1 = dets[:, 0]
        y1 = dets[:, 1]
        x2 = dets[:, 2]
        y2 = dets[:, 3]
        scores = dets[:, 4]

        areas = (x2 - x1 + 1) * (y2 - y1 + 1)
        order = scores.argsort()[::-1]

        keep = []
        while order.size > 0:
            i = order[0]
            keep.append(i)
            xx1 = np.maximum(x1[i], x1[order[1:]])
            yy1 = np.maximum(y1[i], y1[order[1:]])
            xx2 = np.minimum(x2[i], x2[order[1:]])
            yy2 = np.minimum(y2[i], y2[order[1:]])

            w = np.maximum(0.0, xx2 - xx1 + 1)
            h = np.maximum(0.0, yy2 - yy1 + 1)
            inter = w * h
            ovr = inter / (areas[i] + areas[order[1:]] - inter)

            inds = np.where(ovr <= thresh)[0]
            order = order[inds + 1]

        return keep

    def distance2bbox(self, points, distance, max_shape=None):
        """Decode distance prediction to bounding box.

        Args:
            points (Tensor): Shape (n, 2), [x, y].
            distance (Tensor): Distance from the given point to 4
                boundaries (left, top, right, bottom).
            max_shape (tuple): Shape of the image.

        Returns:
            Tensor: Decoded bboxes.
        """
        x1 = points[:, 0] - distance[:, 0]
        y1 = points[:, 1] - distance[:, 1]
        x2 = points[:, 0] + distance[:, 2]
        y2 = points[:, 1] + distance[:, 3]
        if max_shape is not None:
            x1 = x1.clamp(min=0, max=max_shape[1])
            y1 = y1.clamp(min=0, max=max_shape[0])
            x2 = x2.clamp(min=0, max=max_shape[1])
            y2 = y2.clamp(min=0, max=max_shape[0])
        return np.stack([x1, y1, x2, y2], axis=-1)

    def distance2kps(self, points, distance, max_shape=None):
        preds = []
        for i in range(0, distance.shape[1], 2):
            px = points[:, i % 2] + distance[:, i]
            py = points[:, i % 2 + 1] + distance[:, i + 1]
            if max_shape is not None:
                px = px.clamp(min=0, max=max_shape[1])
                py = py.clamp(min=0, max=max_shape[0])
            preds.append(px)
            preds.append(py)
        return np.stack(preds, axis=-1)

三、人脸对齐(Face Alignment)

import numpy as np
import cv2
from skimage import transform

################### 标准脸的关键点 ###################
REFERENCE_FACIAL_POINTS = np.array([
    # 112×112
    [38.2946, 51.6963],
    [73.5318, 51.5014],
    [56.0252, 71.7366],
    [41.5493, 92.3655],
    [70.7299, 92.2041]
], np.float32)


def align(image, landmark, dsize=(112, 112)):
    # 变换矩阵
    trans = transform.SimilarityTransform()
    trans.estimate(landmark, REFERENCE_FACIAL_POINTS)
    M = trans.params[:2, :]

    # 此方法等同上面变换
    # Q = np.zeros((10, 4))
    # S = REFERENCE_FACIAL_POINTS.reshape(-1, 1)
    # for i in range(5):
    #     x, y = landmark[i]
    #     Q[i * 2 + 0] = x, y, 1, 0
    #     Q[i * 2 + 1] = y, -x, 0, 1
    # M = (np.linalg.inv(Q.T @ Q) @ Q.T @ S).squeeze()
    # M = np.array([
    #     [M[0], M[1], M[2]],
    #     [-M[1], M[0], M[3]]
    # ])
    
    face_img = cv2.warpAffine(image, M, dsize, borderValue=3.0)
    
    return face_img

四、特征提取(Feature Extraction)

import numpy as np
import cv2
import os.path
from sklearn import preprocessing
import onnxruntime
import alignface

class ArcFace:
    def __init__(self, model_file, providers=None, options=None):
        assert os.path.exists(model_file)

        self.input_mean = 127.5
        self.input_std = 127.5

        if providers is None:
            providers = ['CPUExecutionProvider']
        if options is None:
            options = onnxruntime.SessionOptions()

        self.session = onnxruntime.InferenceSession(model_file, providers=providers, sess_options=options)
        input_cfg = self.session.get_inputs()[0]
        input_shape = input_cfg.shape
        input_name = input_cfg.name
        self.input_size = tuple(input_shape[2:4][::-1])
        self.input_name = input_name

    def get_feature(self, img, landmark):
        _img = alignface.align(img, landmark=landmark)
        embedding = self.forward(_img).flatten()
        embedding = np.array(embedding).reshape((1, -1))
        embedding = preprocessing.normalize(embedding)
        return embedding

    def forward(self, imgs):
        if not isinstance(imgs, list):
            imgs = [imgs]
        input_size = self.input_size

        blob = cv2.dnn.blobFromImages(imgs, 1.0 / self.input_std, input_size,
                                      (self.input_mean, self.input_mean, self.input_mean), swapRB=True)
        net_out = self.session.run([], {self.input_name: blob})[0]
        return net_out

五、人脸识别(Face Recognition)

import cv2
import os
import numpy as np
import onnxruntime
from scrfd import SCRFD
from arcface import ArcFace


class FaceRecognition:
    def __init__(self, dete_model=None, reco_model=None, ctx_id=0, dete_threshold=0.50, reco_threshold=1.24):
        # 人脸识别工具类
        # param ctx_id: 非负数为GPU的ID，负数为使用CPU
        # param reco_threshold: 人脸识别阈值
        # param reco_threshold: 人脸检测阈值

        providers = ['CPUExecutionProvider']
        options = onnxruntime.SessionOptions()
        if ctx_id >= 0:
            providers = ['DmlExecutionProvider']
            options.enable_mem_pattern = False
            options.execution_mode = onnxruntime.ExecutionMode.ORT_SEQUENTIAL

        self.detector = SCRFD(model_file=dete_model, providers=providers, options=options)
        self.recognizer = ArcFace(model_file=reco_model, providers=providers, options=options)

        self.reco_threshold = reco_threshold
        self.dete_threshold = dete_threshold

        self.faces_embedding = []

    # 加载人脸库中的人脸
    def load_faces(self, face_db_path):
        if not os.path.exists(face_db_path):
            os.makedirs(face_db_path)
        for root, dirs, files in os.walk(face_db_path):
            for file in files:
                input_image = cv2.imdecode(np.fromfile(os.path.join(root, file), dtype=np.uint8), 1)
                user_id = file.split(".")[0]
                result = self.register(input_image, user_id)
                if result != 'success':
                    print(file + ':' + result)

    # 人脸识别
    def recognize(self, image):
        dets, landmarks = self.detector.detect(image, threshold=self.dete_threshold)
        results = list()
        for det, landmark in zip(dets, landmarks):
            embedding = self.recognizer.get_feature(image, landmark)

            for com_face in self.faces_embedding:
                r = self.feature_compare(embedding, com_face["feature"])
                print("recognize: {}/{}".format(com_face["user_id"], r))
                if r < self.reco_threshold:
                    result = dict()
                    result["user_id"] = com_face["user_id"]
                    result["bbox"] = (np.array(det)[:4]).astype(np.int32).tolist()
                    result["landmark"] = np.array(landmark).astype(np.int32).tolist()
                    results.append(result)
        return results

    @staticmethod
    def feature_compare(feature1, feature2):
        diff = np.subtract(feature1, feature2)
        dist = np.sum(np.square(diff), 1)
        return dist

    # 注册人脸库
    def register(self, image, user_id):
        bboxes, landmarks = self.detector.detect(image, threshold=self.dete_threshold)

        if bboxes.shape[0] == 0:
            return '检测不到人脸'
        if bboxes.shape[0] > 1:
            return '检测到多个人脸'

        # 判断人脸是否存在
        embedding = self.recognizer.get_feature(image, landmarks[0])
        for com_face in self.faces_embedding:
            r = self.feature_compare(embedding, com_face["feature"])
            if r < self.reco_threshold:
                return com_face["user_id"] + ' 已存在'

        # 符合注册条件保存图片，同时把特征添加到人脸特征库中
        # cv2.imencode('.png', image)[1].tofile(os.path.join(self.face_db, '%s.png' % user_id))
        self.faces_embedding.append({
            "user_id": user_id,
            "feature": embedding
        })
        return "success"

注意：providers可以更具自己硬件环境修改。

六、示例

if __name__ == '__main__':

    dete_model = r'X:\xxx\scrfd_10g_bnkps_shape640x640.onnx'
    reco_model = r'X:\xxx\w600k_r50.onnx'
    face_db_path = r'X:\xxx\face_db'

    face_reco = FaceRecognition(dete_model=dete_model, reco_model=reco_model, ctx_id=0)
    face_reco.load_faces(face_db_path=face_db_path)

    img = cv2.imread(r'X:\xxx\test.jpg')

    ta = datetime.datetime.now()
    results = face_reco.recognize(img)
    print('all cost(ms):', (datetime.datetime.now() - ta).total_seconds() * 1000)

    for result in results:
        x1, y1, x2, y2 = result["bbox"]
        cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2)
        cv2.putText(img=img, text=result["user_id"], org=(x1, y1 - 10),
                    fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.5, color=(0, 255, 0), thickness=1)
        for landmark in result["landmark"]:
            cv2.circle(img, tuple(landmark), 1, (0, 0, 255), 2)

    cv2.imshow('Image', img)
    cv2.waitKey()
    cv2.destroyAllWindows()