人脸识别（三）

最新推荐文章于 2024-01-14 18:33:20 发布

绝世神棍

最新推荐文章于 2024-01-14 18:33:20 发布

阅读量425

点赞数 1

分类专栏：图像识别 C++

本文链接：https://blog.csdn.net/comhaqs/article/details/103873827

版权

C++ 同时被 2 个专栏收录

51 篇文章 2 订阅

订阅专栏

图像识别

7 篇文章 0 订阅

订阅专栏

源码位置：https://github.com/comhaqs/face_find.git 分支： develop_step2

第二阶段，通过opencv实现人脸识别。opencv的Mat类只支持BGR模式图像，所以需要进行图像转换。这里ffmpeg解码出来是AV_PIX_FMT_YUV420P，先转换成AV_PIX_FMT_BGR24以便opencv进行人脸识别和人脸检测。完成并标记人脸后，再转换成AV_PIX_FMT_RGBA供QImage显示。其实opencv也可以进行图像格式转换，不过这里全部使用ffmpeg进行图像格式转换。

face_detection::face_detection()
{
    // 加载opencv提供的人脸检测模型，识别率比较低
    std::string face_file("./haarcascade_frontalface_alt2.xml");
    if(boost::filesystem::exists(face_file)){
        mp_cascade = std::make_shared<cv::CascadeClassifier>();
        mp_cascade->load(face_file);
    }else{
        LOG_ERROR("找不到对应的文件检测模型文件:"<<face_file);
    }
}


bool face_detection::detection_face(std::vector<info_face_ptr> & faces, unsigned char* p_data, int width, int height, bool flag_dectection){
    // 这里p_data的图像格式是AV_PIX_FMT_BGR24
    if(!mp_cascade){
        return false;
    }

    // 太耗资源了，所以这里25帧后才检测一次
    static int c = 0;
    if(0 != (c++ % 25)){
        return true;
    }

    // 转换成灰度图像，消耗资源相对比较少
    cv::Mat bgr(cv::Size(width, height), CV_8UC3);
    bgr.data = p_data;
    cv::Mat gray;
    gray.create(bgr.size(), bgr.type());
    cv::cvtColor(bgr, gray, cv::COLOR_BGR2GRAY);

    // 人脸检测
    std::vector<cv::Rect> rect;
    mp_cascade->detectMultiScale(gray, rect, 1.1, 3, 0);
    for (auto& r : rect)
    {
        cv::rectangle(bgr, r, CV_RGB(255, 0, 0), 2);
        auto p_info = std::make_shared<info_face>();
        p_info->p_data = p_data;
        p_info->x = r.x;
        p_info->y = r.y;
        p_info->width = r.width;
        p_info->height = r.height;
        faces.push_back(p_info);
    }
    return true;
}

下面是解码类

#include "media_decoder.h"
#include "utility_tool.h"
#include <vector>
#include "face_detection.h"
#include <opencv2/opencv.hpp>

#define FREE_SWS(P) {if(nullptr != P){sws_freeContext(P);P = nullptr;}}
#define FREE_FRAME(P) {if(nullptr != P){av_frame_free(&P);P = nullptr;}}
#define FREE_DATA(P) {if(nullptr != P){av_free(P);P = nullptr;}}

media_decoder::media_decoder(const std::string& url, fun_type fun):m_url(url), m_fun(fun)
{

}

media_decoder::~media_decoder(){

}

void media_decoder::start(){
    mp_flag_stop = std::make_shared<std::atomic_bool>(false);
    m_thread = std::thread(std::bind(media_decoder::handle_media, m_url, m_fun, mp_flag_stop));
}

void media_decoder::stop(){
    mp_flag_stop && (*mp_flag_stop = true);
    m_thread.join();
}

void media_decoder::handle_media(std::string url, fun_type fun, std::shared_ptr<std::atomic_bool> p_stop){
    try {
        work(url, fun, 960, 540, p_stop);
    } catch (const std::exception& e) {
        LOG_ERROR("发生错误:"<<e.what());
    }
}

void media_decoder::work(std::string url, fun_type fun, int width_out, int height_out, std::shared_ptr<std::atomic_bool> p_stop){
    face_detection_ptr p_detection = std::make_shared<face_detection>();
    AVFormatContext *p_context = nullptr;
    // 打开视频
    auto ret = avformat_open_input(&p_context, url.c_str(), nullptr, nullptr);
    if (0 != ret)
    {
        LOG_ERROR("打开url失败;  错误代码:" << ret << "; 路径:" << url);
        return ;
    }
    // 获取视频信息
    ret = avformat_find_stream_info(p_context, nullptr);
    if (0 > ret)
    {
        LOG_ERROR("获取流信息失败; 错误代码:" << ret);
        return;
    }

    // 查找视频流
    AVCodec *pi_code = nullptr;
    auto index_video = av_find_best_stream(p_context, AVMEDIA_TYPE_VIDEO, -1, -1, &(pi_code), 0);
    if (0 > index_video)
    {
        LOG_ERROR("获取视频索引失败:" << index_video);
        return;
    }
    auto p_video_stream = p_context->streams[index_video];
    auto p_video_code_ctx = avcodec_alloc_context3(pi_code);
    // 复制视频流中相关参数到视频上下文，不然p_video_code_ctx某些参数会丢失
    ret = avcodec_parameters_to_context(p_video_code_ctx, p_video_stream->codecpar);
    if(0 > ret){
        return;
    }
    ret = avcodec_open2(p_video_code_ctx, pi_code, nullptr);
    if(0 > ret){
        return;
    }

    // 申请了三个Frame，一个用来存储解码后的视频帧，一个用来存储OpenCV的帧AV_PIX_FMT_BGR24，一个存储AV_PIX_FMT_RGBA用来显示

    AVFrame *p_frame_yuv = av_frame_alloc();
    int num_yuv = av_image_get_buffer_size(p_video_code_ctx->pix_fmt, p_video_code_ctx->width, p_video_code_ctx->height, 1);
    uint8_t* p_data_yuv = static_cast<uint8_t *>(av_malloc(static_cast<std::size_t>(num_yuv)*sizeof(uint8_t)));
    av_image_fill_arrays(p_frame_yuv->data, p_frame_yuv->linesize, p_data_yuv, p_video_code_ctx->pix_fmt, p_video_code_ctx->width, p_video_code_ctx->height, 1);

    AVPixelFormat pix_fmt_bgr = AV_PIX_FMT_BGR24;
    int width_bgr = p_video_code_ctx->width, height_bgr = p_video_code_ctx->height;
    AVFrame *p_frame_bgr = av_frame_alloc();
    int num_bgr = av_image_get_buffer_size(pix_fmt_bgr, width_bgr, height_bgr, 1);
    uint8_t* p_data_bgr = static_cast<uint8_t *>(av_malloc(static_cast<std::size_t>(num_bgr)*sizeof(uint8_t)));
    av_image_fill_arrays(p_frame_bgr->data, p_frame_bgr->linesize, p_data_bgr, pix_fmt_bgr, width_bgr, height_bgr, 1);

    AVPixelFormat pix_fmt_out = AV_PIX_FMT_RGBA;
    AVFrame *p_frame_out = av_frame_alloc();
    int num_out = av_image_get_buffer_size(pix_fmt_out, width_out, height_out, 1);
    uint8_t* p_data_out = static_cast<uint8_t *>(av_malloc(static_cast<std::size_t>(num_out)*sizeof(uint8_t)));
    av_image_fill_arrays(p_frame_out->data, p_frame_out->linesize, p_data_out, pix_fmt_out, width_out, height_out, 1);

    // 获取图像转换相关对象
    struct SwsContext *p_sws_context_bgr = nullptr;
    p_sws_context_bgr = sws_getCachedContext(p_sws_context_bgr, p_video_code_ctx->width, p_video_code_ctx->height, p_video_code_ctx->pix_fmt,
        width_bgr, height_bgr, pix_fmt_bgr, SWS_BICUBIC, nullptr, nullptr, nullptr);
    if(nullptr == p_sws_context_bgr){
        return;
    }
    struct SwsContext *p_sws_context_out = nullptr;
    p_sws_context_out = sws_getCachedContext(p_sws_context_out, width_bgr, height_bgr, pix_fmt_bgr,
        width_out, height_out, pix_fmt_out, SWS_BICUBIC, nullptr, nullptr, nullptr);
    if(nullptr == p_sws_context_out){
        return;
    }
    while(!*p_stop){
        AVPacket pkt;
        ret = av_read_frame(p_context, &pkt);
        if (0 > ret){
            break;
        }
        if (pkt.stream_index != index_video){
            continue;
        }
        // 这里使用了新版的解码函数，avcodec_send_packet后不一定能avcodec_receive_frame到帧，涉及到I、P、B帧的解码流程
        int re = avcodec_send_packet(p_video_code_ctx ,&pkt);
        if (0 > ret)
        {
            av_packet_unref(&pkt);
            continue;
        }
        while(!*p_stop){
            // 要反复调用avcodec_receive_frame，直到无法获取到帧
            re = avcodec_receive_frame(p_video_code_ctx, p_frame_yuv);
            if (re != 0)
            {
                break;
            }

            if(nullptr != p_sws_context_bgr){
                auto h = sws_scale(p_sws_context_bgr, p_frame_yuv->data, p_frame_yuv->linesize, 0, p_video_code_ctx->height, p_frame_bgr->data, p_frame_bgr->linesize);
                if(0 < h && fun){
                    std::vector<info_face_ptr> faces;
                    p_detection->detection_face(faces, p_data_bgr, width_bgr, height_bgr);

                    // 需要将AV_PIX_FMT_BGR24转换为AV_PIX_FMT_RGBA，因为QImage需要这种格式
                    h = sws_scale(p_sws_context_out, p_frame_bgr->data, p_frame_bgr->linesize, 0, height_bgr, p_frame_out->data, p_frame_out->linesize);

                    auto p_info = std::make_shared<info_data>();
                    p_info->p_data = p_data_out;
                    p_info->width = width_out;
                    p_info->height = height_out;
                    p_info->data_max = static_cast<std::size_t>(num_out);
                    fun(p_info);
                }
                // 如果不延迟，因为解码的速度很快，显示又是依赖于解码，会导致播放速度非常快，这里不应该写死，应该按照视频流的time_base来计算延迟
                std::this_thread::sleep_for(std::chrono::milliseconds(40));
            }else{
                LOG_ERROR("获取缩放上下文失败; 宽度:"<<p_frame_yuv->width<<"; 高度:"<<p_frame_yuv->height<<"; 帧格式:"<<static_cast<AVPixelFormat>(p_frame_yuv->format));
                break;
            }
        }
        // 释放数据包
        av_packet_unref(&pkt);
    }

    // 清理相关对象
    FREE_SWS(p_sws_context_bgr);
    FREE_SWS(p_sws_context_out);
    FREE_FRAME(p_frame_out);
    FREE_DATA(p_data_out);
    FREE_FRAME(p_frame_bgr);
    FREE_DATA(p_data_bgr);
    FREE_FRAME(p_frame_yuv);
    FREE_DATA(p_data_yuv);
    if(nullptr != p_context){
        avformat_close_input(&p_context);
        p_context = nullptr;
    }
}