深度神经网络——基于 DNN 的人脸检测与识别 OpenCV v4.8.0

最新推荐文章于 2024-07-04 12:56:30 发布

HIRSHDZ327

最新推荐文章于 2024-07-04 12:56:30 发布

阅读量329

点赞数 1

分类专栏： OpenCV 4.8.0 说明文档文章标签： dnn opencv 人工智能

OpenCV 4.8.0 说明文档专栏收录该内容

139 篇文章 10 订阅

订阅专栏

上一个教程 ： 高级 API：文本检测模型和文本识别模型

下一个教程 ： 转换 PyTorch 分类模型并使用 OpenCV Python 发布

原作者	Chengrui Wang, Yuantao Feng
兼容性	OpenCV >= 4.5.4

简介

本节将介绍用于人脸检测的 cv::FaceDetectorYN 类和用于人脸识别的 cv::FaceRecognizerSF 类。

模型

本模块需要两个预先训练好的模型（ONNX 格式）：

人脸检测
- 大小：338KB
- 在 WIDER Face Val set 上的结果： 0.830（易）, 0.824（中）, 0.708（难）
人脸识别
- 大小： 36.9MB
- 结果：

数据库	准确率	阈值 (normL2)	阈值 (余弦)
LFW	99.60%	1.128	0.363
CALFW	93.95%	1.149	0.340
CPLFW	91.05%	1.204	0.275
AgeDB-30	94.90%	1.202	0.277
CFP-FP	94.80%	1.253	0.212

代码

C++

可下载代码：点击此处
代码一览：

#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/objdetect.hpp>
#include <iostream>
using namespace cv;
using namespace std;
static
void visualize(Mat& input, int frame, Mat& faces, double fps, int thickness = 2)
{
    std::string fpsString = cv::format("FPS : %.2f", (float)fps);
    if (frame >= 0)
        cout << "Frame " << frame << ", ";
    cout << "FPS: " << fpsString << endl;
    for (int i = 0; i < faces.rows; i++)
    {
        // 打印结果
        cout << "Face " << i
             << ", top-left coordinates: (" << faces.at<float>(i, 0) << ", " << faces.at<float>(i, 1) << "), "
             << "box width: " << faces.at<float>(i, 2)  << ", box height: " << faces.at<float>(i, 3) << ", "
             << "score: " << cv::format("%.2f", faces.at<float>(i, 14))
             << endl;
        // 绘制边界框
        rectangle(input, Rect2i(int(faces.at<float>(i, 0)), int(faces.at<float>(i, 1)), int(faces.at<float>(i, 2)), int(faces.at<float>(i, 3))), Scalar(0, 255, 0), thickness)；
        // 绘制地标
        circle(input, Point2i(int(faces.at<float>(i, 4)), int(faces.at<float>(i, 5))), 2, Scalar(255, 0, 0), thickness);
        circle(input, Point2i(int(faces.at<float>(i, 6)), int(faces.at<float>(i, 7))), 2, Scalar(0, 0, 255), thickness);
        circle(input, Point2i(int(faces.at<float>(i, 8)), int(faces.at<float>(i, 9))), 2, Scalar(0, 255, 0), thickness);
        circle(input, Point2i(int(faces.at<float>(i, 10)), int(faces.at<float>(i, 11))), 2, Scalar(255, 0, 255), thickness);
        circle(input, Point2i(int(faces.at<float>(i, 12)), int(faces.at<float>(i, 13))), 2, Scalar(0, 255, 255), thickness);
    }
    putText(input, fpsString, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0), 2);
}
int main(int argc, char** argv)
{
    CommandLineParser parser(argc, argv,
        "{help  h           |            | Print this message}"
        "{image1 i1         |            | Path to the input image1. Omit for detecting through VideoCapture}"
        "{image2 i2         |            | Path to the input image2. When image1 and image2 parameters given then the program try to find a face on both images and runs face recognition algorithm}"
        "{video v           | 0          | Path to the input video}"
        "{scale sc          | 1.0        | Scale factor used to resize input video frames}"
        "{fd_model fd       | face_detection_yunet_2021dec.onnx| Path to the model. Download yunet.onnx in https://github.com/opencv/opencv_zoo/tree/master/models/face_detection_yunet}"
        "{fr_model fr       | face_recognition_sface_2021dec.onnx | Path to the face recognition model. Download the model at https://github.com/opencv/opencv_zoo/tree/master/models/face_recognition_sface}"
        "{score_threshold   | 0.9        | Filter out faces of score < score_threshold}"
        "{nms_threshold     | 0.3        | Suppress bounding boxes of iou >= nms_threshold}"
        "{top_k             | 5000       | Keep top_k bounding boxes before NMS}"
        "{save s            | false      | Set true to save results. This flag is invalid when using camera}"
    );
    if (parser.has("help"))
    {
        parser.printMessage();
        return 0;
    }
    String fd_modelPath = parser.get<String>("fd_model");
    String fr_modelPath = parser.get<String>("fr_model");
    float scoreThreshold = parser.get<float>("score_threshold");
    float nmsThreshold = parser.get<float>("nms_threshold");
    int topK = parser.get<int>("top_k");
    bool save = parser.get<bool>("save");
    float scale = parser.get<float>("scale");
    double cosine_similar_thresh = 0.363;
    double l2norm_similar_thresh = 1.128;
    // 初始化 FaceDetectorYN
    Ptr<FaceDetectorYN> detector = FaceDetectorYN::create(fd_modelPath, "", Size(320, 320), scoreThreshold, nmsThreshold, topK);
    TickMeter tm;
    // 如果输入是图像
    if (parser.has("image1"))
    {
        String input1 = parser.get<String>("image1");
        Mat image1 = imread(samples::findFile(input1));
        if (image1.empty())
        {
            std::cerr << "Cannot read image: " << input1 << std::endl;
            return 2;
        }
        int imageWidth = int(image1.cols * scale);
        int imageHeight = int(image1.rows * scale);
        resize(image1, image1, Size(imageWidth, imageHeight));
        tm.start();
        // 在推理之前设置输入大小
        detector->setInputSize(image1.size());
        Mat faces1;
        detector->detect(image1, faces1);
        if (faces1.rows < 1)
        {
            std::cerr << "Cannot find a face in " << input1 << std::endl;
            return 1;
        }
        tm.stop();
        // 在输入图像上绘制结果
        visualize(image1, -1, faces1, tm.getFPS())；
        // 如果保存为 true，则保存结果
        if (save)
        {
            cout << "Saving result.jpg...\n";
            imwrite("result.jpg", image1);
        }
        // 可视化结果
        imshow("image1", image1)；
        pollKey(); // 处理用户界面事件以显示内容
        if (parser.has("image2"))
        {
            String input2 = parser.get<String>("image2");
            Mat image2 = imread(samples::findFile(input2));
            if (image2.empty())
            {
                std::cerr << "Cannot read image2: " << input2 << std::endl;
                return 2;
            }
            tm.reset();
            tm.start();
            detector->setInputSize(image2.size());
            Mat faces2;
            detector->detect(image2, faces2);
            if (faces2.rows < 1)
            {
                std::cerr << "Cannot find a face in " << input2 << std::endl;
                return 1;
            }
            tm.stop();
            visualize(image2, -1, faces2, tm.getFPS());
            if (save)
            {
                cout << "Saving result2.jpg...\n";
                imwrite("result2.jpg", image2);
            }
            imshow("image2", image2);
            pollKey();
            // 初始化 FaceRecognizerSF
            Ptr<FaceRecognizerSF> faceRecognizer = FaceRecognizerSF::create(fr_modelPath, "")；
            // 通过检测到的第一个人脸对齐和裁剪人脸图像。
            Mat aligned_face1, aligned_face2;
            faceRecognizer->alignCrop(image1, faces1.row(0), aligned_face1);
            faceRecognizer->alignCrop(image2, faces2.row(0), aligned_face2);
            // 使用给定的对齐面进行特征提取
            Mat feature1, feature2;
            faceRecognizer->feature(aligned_face1, feature1);
            feature1 = feature1.clone();
            faceRecognizer->feature(aligned_face2, feature2);
            feature2 = feature2.clone();
            double cos_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_COSINE);
            double L2_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_NORM_L2);
            if (cos_score >= cosine_similar_thresh)
            {
                std::cout << "They have the same identity;";
            }
            else
            {
                std::cout << "They have different identities;";
            }
            std::cout << " Cosine Similarity: " << cos_score << ", threshold: " << cosine_similar_thresh << ". (higher value means higher similarity, max 1.0)\n";
            if (L2_score <= l2norm_similar_thresh)
            {
                std::cout << "They have the same identity;";
            }
            else
            {
                std::cout << "They have different identities.";
            }
            std::cout << " NormL2 Distance: " << L2_score << ", threshold: " << l2norm_similar_thresh << ". (lower value means higher similarity, min 0.0)\n";
        }
        cout << "Press any key to exit..." << endl;
        waitKey(0);
    }
    else
    {
        int frameWidth, frameHeight;
        VideoCapture capture;
        std::string video = parser.get<string>("video");
        if (video.size() == 1 && isdigit(video[0]))
            capture.open(parser.get<int>("video"));
        else
            capture.open(samples::findFileOrKeep(video));  // 保留 GStreamer 管道
        if (capture.isOpened())
        {
            frameWidth = int(capture.get(CAP_PROP_FRAME_WIDTH) * scale);
            frameHeight = int(capture.get(CAP_PROP_FRAME_HEIGHT) * scale);
            cout << "Video " << video
                << ": width=" << frameWidth
                << ", height=" << frameHeight
                << endl;
        }
        else
        {
            cout << "Could not initialize video capturing: " << video << "\n";
            return 1;
        }
        detector->setInputSize(Size(frameWidth, frameHeight));
        cout << "Press 'SPACE' to save frame, any other key to exit..." << endl;
        int nFrame = 0;
        for (;;)
        {
            // 获取帧
            Mat frame；
            if (!capture.read(frame))
            {
                cerr << "Can't grab frame！Stop\n"；
                break；
            }
            resize(frame, frame, Size(frameWidth, frameHeight))；
            // 推理
            Mat faces;
            tm.start();
            detector->detect(frame, faces);
            tm.stop();
            Mat result = frame.clone();
            // 在输入图像上绘制结果
            visualize(result, nFrame, faces, tm.getFPS())；
            // 可视化结果
            imshow("Live", result);
            int key = waitKey(1);
            bool saveFrame = save;
            if (key == ' ')
            {
                saveFrame = true;
                key = 0;  // 处理
            }
            if (saveFrame)
            {
                std::string frame_name = cv::format("frame_%05d.png", nFrame);
                std::string result_name = cv::format("result_%05d.jpg", nFrame);
                cout << "Saving '" << frame_name << "' and '" << result_name << "' ...\n";
                imwrite(frame_name, frame);
                imwrite(result_name, result);
            }
            ++nFrame;
            if (key > 0)
                break;
        }
        cout << "Processed " << nFrame << " frames" << endl;
    }
    cout << "Done." << endl;
    return 0;
}

Python

可下载代码：点击此处
代码一览：

import argparse
import numpy as np
import cv2 as cv
def str2bool(v):
    if v.lower() in ['on', 'yes', 'true', 'y', 't']:
        return True
    elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
        return False
    else:
        raise NotImplementedError
parser = argparse.ArgumentParser()
parser.add_argument('--image1', '-i1', type=str, help='Path to the input image1. Omit for detecting on default camera.')
parser.add_argument('--image2', '-i2', type=str, help='Path to the input image2. When image1 and image2 parameters given then the program try to find a face on both images and runs face recognition algorithm.')
parser.add_argument('--video', '-v', type=str, help='Path to the input video.')
parser.add_argument('--scale', '-sc', type=float, default=1.0, help='Scale factor used to resize input video frames.')
parser.add_argument('--face_detection_model', '-fd', type=str, default='face_detection_yunet_2021dec.onnx', help='Path to the face detection model. Download the model at https://github.com/opencv/opencv_zoo/tree/master/models/face_detection_yunet')
parser.add_argument('--face_recognition_model', '-fr', type=str, default='face_recognition_sface_2021dec.onnx', help='Path to the face recognition model. Download the model at https://github.com/opencv/opencv_zoo/tree/master/models/face_recognition_sface')
parser.add_argument('--score_threshold', type=float, default=0.9, help='Filtering out faces of score < score_threshold.')
parser.add_argument('--nms_threshold', type=float, default=0.3, help='Suppress bounding boxes of iou >= nms_threshold.')
parser.add_argument('--top_k', type=int, default=5000, help='Keep top_k bounding boxes before NMS.')
parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.')
args = parser.parse_args()
def visualize(input, faces, fps, thickness=2):
    if faces[1] is not None:
        for idx, face in enumerate(faces[1]):
            print('Face {}, top-left coordinates: ({:.0f}, {:.0f}), box width: {:.0f}, box height {:.0f}, score: {:.2f}'.format(idx, face[0], face[1], face[2], face[3], face[-1]))
            coords = face[:-1].astype(np.int32)
            cv.rectangle(input, (coords[0], coords[1]), (coords[0]+coords[2], coords[1]+coords[3]), (0, 255, 0), thickness)
            cv.circle(input, (coords[4], coords[5]), 2, (255, 0, 0), thickness)
            cv.circle(input, (coords[6], coords[7]), 2, (0, 0, 255), thickness)
            cv.circle(input, (coords[8], coords[9]), 2, (0, 255, 0), thickness)
            cv.circle(input, (coords[10], coords[11]), 2, (255, 0, 255), thickness)
            cv.circle(input, (coords[12], coords[13]), 2, (0, 255, 255), thickness)
    cv.putText(input, 'FPS: {:.2f}'.format(fps), (1, 16), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
if __name__ == '__main__':
    
    detector = cv.FaceDetectorYN.create(
        args.face_detection_model,
        "",
        (320, 320),
        args.score_threshold,
        args.nms_threshold,
        args.top_k
    )
    
    tm = cv.TickMeter()
    # 如果输入是图像
    if args.image1 is not None:
        img1 = cv.imread(cv.samples.findFile(args.image1))
        img1Width = int(img1.shape[1]*args.scale)
        img1Height = int(img1.shape[0]*args.scale)
        img1 = cv.resize(img1, (img1Width, img1Height))
        tm.start()
        
        detector.setInputSize((img1Width, img1Height))
        faces1 = detector.detect(img1)
        
        tm.stop()
        assert faces1[1] is not None, 'Cannot find a face in {}'.format(args.image1)
        # 在输入图像上绘制结果
        visualize(img1, faces1, tm.getFPS())
        # 如果保存为 true，则保存结果
        if args.save:
            print('Results saved to result.jpg\n')
            cv.imwrite('result.jpg', img1)
        # 在新窗口中显示结果
        cv.imshow("image1", img1)
        if args.image2 is not None:
            img2 = cv.imread(cv.samples.findFile(args.image2))
            tm.reset()
            tm.start()
            detector.setInputSize((img2.shape[1], img2.shape[0]))
            faces2 = detector.detect(img2)
            tm.stop()
            assert faces2[1] is not None, 'Cannot find a face in {}'.format(args.image2)
            visualize(img2, faces2, tm.getFPS())
            cv.imshow("image2", img2)
            
            recognizer = cv.FaceRecognizerSF.create(
            args.face_recognition_model,"")
            
            
            face1_align = recognizer.alignCrop(img1, faces1[1][0])
            face2_align = recognizer.alignCrop(img2, faces2[1][0])
            # 提取特征
            face1_feature = recognizer.feature(face1_align)
            face2_feature = recognizer.feature(face2_align)
            
            cosine_similarity_threshold = 0.363
            l2_similarity_threshold = 1.128
            
            cosine_score = recognizer.match(face1_feature, face2_feature, cv.FaceRecognizerSF_FR_COSINE)
            l2_score = recognizer.match(face1_feature, face2_feature, cv.FaceRecognizerSF_FR_NORM_L2)
            
            msg = 'different identities'
            if cosine_score >= cosine_similarity_threshold:
                msg = 'the same identity'
            print('They have {}. Cosine Similarity: {}, threshold: {} (higher value means higher similarity, max 1.0).'.format(msg, cosine_score, cosine_similarity_threshold))
            msg = 'different identities'
            if l2_score <= l2_similarity_threshold:
                msg = 'the same identity'
            print('They have {}. NormL2 Distance: {}, threshold: {} (lower value means higher similarity, min 0.0).'.format(msg, l2_score, l2_similarity_threshold))
        cv.waitKey(0)
    else: # 省略输入，调用默认摄像机
        if args.video is not None:
            deviceId = args.video
        else:
            deviceId = 0
        cap = cv.VideoCapture(deviceId)
        frameWidth = int(cap.get(cv.CAP_PROP_FRAME_WIDTH)*args.scale)
        frameHeight = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT)*args.scale)
        detector.setInputSize([frameWidth, frameHeight])
        while cv.waitKey(1) < 0:
            hasFrame, frame = cap.read()
            if not hasFrame:
                print('No frames grabbed!')
                break
            frame = cv.resize(frame, (frameWidth, frameHeight))
            # 推断
            tm.start()
            faces = detector.detect(frame) # faces 是一个元组
            tm.stop()
            # 在输入图像上绘制结果
            visualize(frame, faces, tm.getFPS())
            # 可视化结果
            cv.imshow('Live', frame)
    cv.destroyAllWindows()

说明

C++

    // Initialize FaceDetectorYN
    Ptr<FaceDetectorYN> detector = FaceDetectorYN::create(fd_modelPath, "", Size(320, 320), scoreThreshold, nmsThreshold, topK);

        // Set input size before inference
        detector->setInputSize(image1.size());
        Mat faces1;
        detector->detect(image1, faces1);
        if (faces1.rows < 1)
        {
            std::cerr << "Cannot find a face in " << input1 << std::endl;
            return 1;
        }

Python

    detector = cv.FaceDetectorYN.create(
        args.face_detection_model,
        "",
        (320, 320),
        args.score_threshold,
        args.nms_threshold,
        args.top_k
    )

        # Set input size before inference
        detector.setInputSize((img1Width, img1Height))
        faces1 = detector.detect(img1)

检测输出的人脸是一个 CV_32F 类型的二维数组，其行是检测到的人脸实例，列是人脸的位置和 5 个人脸地标。每行的格式如下

x1，y1，w，h，x_re，y_re，x_le，y_le，x_nt，y_nt，x_rcm，y_rcm，x_lcm，y_lcm

其中，x1、y1、w、h 分别为人脸边界框的左上角坐标、宽度和高度，{x, y}_{re, le, nt, rcm, lcm} 分别代表右眼、左眼、鼻尖、右嘴角和左嘴角的坐标。

人脸识别

人脸检测后，运行以下代码从面部图像中提取人脸特征。
C++

            // Initialize FaceRecognizerSF
            Ptr<FaceRecognizerSF> faceRecognizer = FaceRecognizerSF::create(fr_modelPath, "");

            // Aligning and cropping facial image through the first face of faces detected.
            Mat aligned_face1, aligned_face2;
            faceRecognizer->alignCrop(image1, faces1.row(0), aligned_face1);
            faceRecognizer->alignCrop(image2, faces2.row(0), aligned_face2);
            // Run feature extraction with given aligned_face
            Mat feature1, feature2;
            faceRecognizer->feature(aligned_face1, feature1);
            feature1 = feature1.clone();
            faceRecognizer->feature(aligned_face2, feature2);
            feature2 = feature2.clone();

Python

            recognizer = cv.FaceRecognizerSF.create(
            args.face_recognition_model,"")

            # Align faces
            face1_align = recognizer.alignCrop(img1, faces1[1][0])
            face2_align = recognizer.alignCrop(img2, faces2[1][0])
            # Extract features
            face1_feature = recognizer.feature(face1_align)
            face2_feature = recognizer.feature(face2_align)

获得两张人脸图像的人脸特征 feature1 和 feature2 后，运行下面的代码来计算两张人脸的身份差异。
C++

            double cos_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_COSINE);
            double L2_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_NORM_L2);

Python

            cosine_score = recognizer.match(face1_feature, face2_feature, cv.FaceRecognizerSF_FR_COSINE)
            l2_score = recognizer.match(face1_feature, face2_feature, cv.FaceRecognizerSF_FR_NORM_L2)

例如，如果余弦距离大于或等于 0.363，或 normL2 距离小于或等于 1.128，则两张脸具有相同的身份。

参考文献

致谢

感谢余教授和冯教授训练并提供人脸检测模型。

感谢邓教授、钟博士生和王硕士训练并提供的人脸识别模型。

HIRSHDZ327

关注

1
点赞
踩
4

收藏

觉得还不错? 一键收藏
0
评论
深度神经网络——基于 DNN 的人脸检测与识别 OpenCV v4.8.0

检测输出的人脸是一个 CV_32F 类型的二维数组，其行是检测到的人脸实例，列是人脸的位置和 5 个人脸地标。获得两张人脸图像的人脸特征 feature1 和 feature2 后，运行下面的代码来计算两张人脸的身份差异。例如，如果余弦距离大于或等于 0.363，或 normL2 距离小于或等于 1.128，则两张脸具有相同的身份。分别代表右眼、左眼、鼻尖、右嘴角和左嘴角的坐标。人脸检测后，运行以下代码从面部图像中提取人脸特征。分别为人脸边界框的左上角坐标、宽度和高度，训练并提供的人脸识别模型。
复制链接

扫一扫