检测跟踪推理

翟雷雷

已于 2023-06-24 22:08:20 修改

阅读量158

点赞数

文章标签： opencv 计算机视觉人工智能 c++

于 2023-06-24 11:29:51 首次发布

本文链接：https://blog.csdn.net/Leinaldoo/article/details/131359587

版权

该代码实现了使用OpenCV库进行人脸识别，结合SqueezeNet和MTCNN进行特征提取和人脸检测，利用BYTETracker进行目标追踪，并计算MOTA指标评估追踪性能。代码首先读取文件夹中的图片或视频，然后进行图像预处理，通过检测和追踪算法处理每一帧，最后输出追踪结果并计算相关性能指标。

摘要由CSDN通过智能技术生成

#include <fstream>
#include <sstream>
#include <iostream>
#include <vector>
#include <string>
#include <windows.h>
#include <opencv2/imgproc.hpp>
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>

#include <math.h>
#include <codecvt>
#include <cassert>


#include "BYTETracker.h"
#include "scrfd.h"
#include "mtcnn.h"
//近似的仿射变换
cv::Mat getsrc_roi(std::vector<cv::Point2f> x0, std::vector<cv::Point2f> dst)
{
    int size = dst.size();
    cv::Mat A = cv::Mat::zeros(size * 2, 4, CV_32FC1);
    cv::Mat B = cv::Mat::zeros(size * 2, 1, CV_32FC1);

    //[ x1 -y1 1 0] [a]       [x_1]
    //[ y1  x1 0 1] [b]   =   [y_1]
    //[ x2 -y2 1 0] [c]       [x_2]
    //[ y2  x2 0 1] [d]       [y_2]	

    for (int i = 0; i < size; i++)
    {
        A.at<float>(i << 1, 0) = x0[i].x;// roi_dst[i].x;
        A.at<float>(i << 1, 1) = -x0[i].y;
        A.at<float>(i << 1, 2) = 1;
        A.at<float>(i << 1, 3) = 0;
        A.at<float>(i << 1 | 1, 0) = x0[i].y;
        A.at<float>(i << 1 | 1, 1) = x0[i].x;
        A.at<float>(i << 1 | 1, 2) = 0;
        A.at<float>(i << 1 | 1, 3) = 1;

        B.at<float>(i << 1) = dst[i].x;
        B.at<float>(i << 1 | 1) = dst[i].y;
    }

    cv::Mat roi = cv::Mat::zeros(2, 3, A.type());
    cv::Mat AT = A.t();
    cv::Mat ATA = A.t() * A;
    cv::Mat R = ATA.inv() * AT * B;

    //roi = [a -b c;b a d ];

    roi.at<float>(0, 0) = R.at<float>(0, 0);
    roi.at<float>(0, 1) = -R.at<float>(1, 0);
    roi.at<float>(0, 2) = R.at<float>(2, 0);
    roi.at<float>(1, 0) = R.at<float>(1, 0);
    roi.at<float>(1, 1) = R.at<float>(0, 0);
    roi.at<float>(1, 2) = R.at<float>(3, 0);
    return roi;

}

//人脸对齐
cv::Mat faceAlign(cv::Mat& imageAlign, Bbox& finalBboxAlign)
{
    double dst_landmark[10] = {
        38.2946, 73.5318, 55.0252, 41.5493, 70.7299,
        51.6963, 51.5014, 71.7366, 92.3655, 92.2041 };
    vector<cv::Point2f>coord5points;
    vector<cv::Point2f>facePointsByMtcnn;
    for (int i = 0; i < 5; i++) {
        coord5points.push_back(cv::Point2f(dst_landmark[i], dst_landmark[i + 5]));
    }

    for (int j = 0; j < 5; j = j + 1)
    {
        //cv::circle(image, cvPoint(finalBbox[i].ppoint[j], finalBbox[i].ppoint[j + 5]), 2, CV_RGB(0, 255, 0), CV_FILLED);
        facePointsByMtcnn.push_back(cv::Point(finalBboxAlign.ppoint[j], finalBboxAlign.ppoint[j + 5]));
    }

    cv::Mat warp_mat = cv::estimateAffinePartial2D(facePointsByMtcnn, coord5points);
    if (warp_mat.empty()) {
        warp_mat = getsrc_roi(facePointsByMtcnn, coord5points);
    }
    warp_mat.convertTo(warp_mat, CV_32FC1);
    cv::Mat alignFace = cv::Mat::zeros(112, 112, imageAlign.type());
    warpAffine(imageAlign, alignFace, warp_mat, alignFace.size());
    return alignFace;
}



float* getFeatByMobileFaceNetNCNN(ncnn::Extractor ex, cv::Mat img)
{
    //cout << "getFeatByMobileFaceNetNCNN" << endl;
    float* feat = new float[128];
    ncnn::Mat in = ncnn::Mat::from_pixels_resize(img.data, ncnn::Mat::PIXEL_BGR, img.cols, img.rows, 112, 112);
    ex.input("data", in);
    ncnn::Mat out;
    ex.extract("fc1", out);

    for (int j = 0; j < out.w; j++)
    {
        feat[j] = out[j];
    }
    return feat;
}


//正方形化
static cv::Rect SquarePadding(cv::Rect facebox, int margin_rows, int margin_cols, bool max_b)
{
    int c_x = facebox.x + facebox.width / 2;
    int c_y = facebox.y + facebox.height / 2;
    int large = 0;
    if (max_b)
        large = (std::max)(facebox.height, facebox.width) / 2;
    else
        large = (std::min)(facebox.height, facebox.width) / 2;
    cv::Rect rectNot(c_x - large, c_y - large, c_x + large, c_y + large);
    rectNot.x = (std::max)(0, rectNot.x);
    rectNot.y = (std::max)(0, rectNot.y);
    rectNot.height = (std::min)(rectNot.height, margin_rows - 1);
    rectNot.width = (std::min)(rectNot.width, margin_cols - 1);
    if (rectNot.height - rectNot.y != rectNot.width - rectNot.x)
        return SquarePadding(cv::Rect(rectNot.x, rectNot.y, rectNot.width - rectNot.x, rectNot.height - rectNot.y), margin_rows, margin_cols, false);

    return cv::Rect(rectNot.x, rectNot.y, rectNot.width - rectNot.x, rectNot.height - rectNot.y);
}

void processImage(const std::string& imagePath, cv::VideoWriter& videoWriter, const cv::Size& targetSize) {
    cv::Mat image = cv::imread(imagePath);

    // 调整图片大小
    cv::resize(image, image, targetSize);

    videoWriter.write(image);
}

//读取文件夹下的图片合并成视频
void processFolder(const std::string& folderPath, cv::VideoWriter& videoWriter, const cv::Size& targetSize) {
    cv::String pattern = folderPath + "/*.jpg"; // 匹配 JPG 格式的图片
    std::vector<cv::String> imagePaths;
    cv::glob(pattern, imagePaths);

    for (const auto& imagePath : imagePaths) {
        processImage(imagePath, videoWriter, targetSize);
    }
}

//读取文件夹下的所有子文件夹路径
void getDirectoryNames(const std::string& folderPath, std::vector<std::string>& directoryNames)
{
    std::string searchPattern = folderPath + "\\*";

    WIN32_FIND_DATAA findData;
    HANDLE hFind = FindFirstFileA(searchPattern.c_str(), &findData);

    if (hFind != INVALID_HANDLE_VALUE)
    {
        do
        {
            std::string entryName = findData.cFileName;

            if ((findData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) && entryName != "." && entryName != "..")
            {
                directoryNames.push_back(folderPath + "/" + entryName);
            }
        } while (FindNextFileA(hFind, &findData));

        FindClose(hFind);
    }
}


struct Object_Label {    
    double o_x, o_y;  // 中心坐标
    double o_width, o_height;  // 长宽
    int name;
};

std::vector<Object_Label> ReadObjectsFromTxt(const std::wstring& filePath)
{
    std::wifstream inputFile(filePath);

    if (!inputFile)
    {
        std::wcerr << L"Failed to open input file: " << filePath << std::endl;
        return {};
    }

    std::vector<Object_Label> objects;
    std::wstring line;
    while (std::getline(inputFile, line))
    {
        Object_Label obj;
        std::wstringstream ss(line);
        std::wstring token;

        // Parse the line using comma as the delimiter
        std::getline(ss, token, L',');

        double picname = std::stod(token.substr(token.find(L',') + 1));

        std::getline(ss, token, L',');

        obj.o_x = std::stod(token.substr(token.find(L',') + 1));

        std::getline(ss, token, L',');
        obj.o_y = std::stod(token.substr(token.find(L',') + 1));

        std::getline(ss, token, L',');
        obj.o_width = std::stod(token.substr(token.find(L',') + 1));

        std::getline(ss, token, L',');
        obj.o_height = std::stod(token.substr(token.find(L',') + 1));

        objects.push_back(obj);
    }

    inputFile.close();

    return objects;
}


double calculateIOU(double x1, double y1, double w1, double h1, double x2, double y2, double w2, double h2) {
    double left = std::max(x1, x2);
    double top = std::max(y1, y2);
    double right = std::min(x1 + w1, x2 + w2);
    double bottom = std::min(y1 + h1, y2 + h2);

    double intersection = std::max(0.0, right - left) * std::max(0.0, bottom - top);
    double union_area = w1 * h1 + w2 * h2 - intersection;

    return intersection / union_area;
}


int main(int argc, char *argv[])
{

    std::string modelPath = "./models";
    ncnn::Net squeezenet;
    SCRFD* detector = new SCRFD(modelPath);
    ONet* detector_mtcnn = new ONet(modelPath);
    squeezenet.load_param("./models/mobilefacenet.param");
    squeezenet.load_model("./models/mobilefacenet.bin");
    ncnn::Extractor ex = squeezenet.create_extractor();


    ofstream out("xreal.txt");
    


    vector<double> motaxreal;//存放mota 用于读取到txt文件中
    vector<double> motpxreal;//存放motp 用于读取到txt文件中
    vector<int> misxreal;//存放misDetection 用于读取到txt文件中
    vector<int> falsexreal;//存放falseDetection 用于读取到txt文件中
    vector<int> N_gtxreal;//存放N_gt 用于读取到txt文件中
    vector<int> IDSWxreal;//存放IDSW 用于读取到txt文件中
    vector<int> N_trxreal;//存放N_tr 用于读取到txt文件中
    vector<int> IDF1xreal;//存放IDF1 用于读取到txt文件中
    vector<int> HOTAaxreal;//存放HOTAa 用于读取到txt文件中






    // 设置输入文件夹和输出视频文件名
    std::string inputFolder = "./img";
    std::string outputVideo = "output_video.avi";
    std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;

    std::vector<std::string> onedirectoryNames;
    std::vector<std::string> finaldirectoryNames;//存储指定文件夹下的子目录路径
    // 设置目标图片大小
    //cv::Size targetSize(640, 480);
    getDirectoryNames(inputFolder, onedirectoryNames);
    for (const auto& path : onedirectoryNames) {
        //读取子目录中的标注信息
        std::wstring directory = converter.from_bytes(path);
        std::wstring searchPath = directory + L"\\*.txt";
        std::vector<Object_Label> objects_label;
        //以下参数用于计算MOTA(一个文件夹下的MOTA)
        int misDetection = 0;//漏检目标数FP
        int falseDetection = 0;//误检目标数FN
        int N_gt = 0;//实际目标总数
        int N_tr = 0;//检测到的目标总数TP
        double IDFP = 0.0;//整个过程id变化目标数
        double IDFN = 0.0;
        double IDSW = 0.0;//与上一帧对比id切换的次数；
        double IDTP = 1.0;//初始id为firstId ，整个过程为firstId的次数为IDTP,(TPA)
        double mota = 0.0;
        double sum_distance = 0.0;
        double motp = 0.0;
        double idf1 = 0.0;
        double Ac = 0.0;
        double HOTAa = 0.0;
        


        WIN32_FIND_DATAW fileData;
        HANDLE hFind = FindFirstFileW(searchPath.c_str(), &fileData);

        if (hFind != INVALID_HANDLE_VALUE)
        {
            do
            {
                std::wstring filePath = directory + L"\\" + fileData.cFileName;
                objects_label = ReadObjectsFromTxt(filePath);

                 Process the objects
                //for (const auto& obj : objects_label)
                //{
                //    // Do something with the object data
                //    std::cout << "Object: x=" << obj.o_x << ", y=" << obj.o_y
                //        << ", width=" << obj.o_width << ", height=" << obj.o_height << std::endl;
                //}
            } while (FindNextFileW(hFind, &fileData) != 0);

            FindClose(hFind);
        }


        getDirectoryNames(path, finaldirectoryNames);
        for (int i = 0; i < finaldirectoryNames.size(); i++) {
            
            // 读取第一张图像以获取视频的宽度和高度
            std::vector<cv::String> imagePaths;

            cv::glob(finaldirectoryNames[i], imagePaths);
            cv::Mat firstImage = cv::imread(imagePaths[0]);
            int width = firstImage.cols;
            int height = firstImage.rows;
            cv::Size targetSize(width, height);

            // 创建输出视频编写器
            cv::VideoWriter videoWriter(outputVideo, cv::VideoWriter::fourcc('M', 'J', 'P', 'G'), 25, cv::Size(width, height));

            if (!videoWriter.isOpened()) {
                std::cout << "无法创建输出视频编写器！" << std::endl;
                return 1;
            }

            // 处理文件夹中的图片
            processFolder(finaldirectoryNames[i], videoWriter, targetSize);


            // 释放资源
            videoWriter.release();

            cv::VideoCapture mVideoCapture(outputVideo);


            //cv::VideoCapture mVideoCapture(0);
            if (!mVideoCapture.isOpened()) {
                std::cout << "fail to openn!" << std::endl;
                return 1;
            }
            cv::Mat frame;
            mVideoCapture >> frame;
            int num_frames = 0;
            int fps = 30;
            BYTETracker tracker(fps, 3000);
            bool firstFrame = true;//判断是否是当前视频的检测到标注目标的第一帧
            int firstId = -1; //如果检测到标注目标的第一帧，把这第一帧的id记录下来，以备检查后面id是否变化
            

            while (true)
            {
                num_frames++;
                mVideoCapture >> frame;

                if (frame.empty()) {
                    // 处理视频帧读取完毕的情况
                    break;
                }

                std::vector<FaceObject> faceobjects;
                std::vector<Bbox> finalBbox;
                std::vector<cv::Rect> bbox;



                auto start = std::chrono::system_clock::now();
                detector->detect_scrfd(frame, faceobjects);
                auto end = std::chrono::system_clock::now();
                auto detect_time = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();//ms
                //detector->draw_faceobjects(frame, faceobjects);

                for (int i = 0; i < faceobjects.size(); i++) {
                    cv::Mat faceROI_Image;
                    frame(faceobjects[i].rect).copyTo(faceROI_Image);
                    ncnn::Mat in = ncnn::Mat::from_pixels_resize(faceROI_Image.data,
                        ncnn::Mat::PIXEL_BGR, faceROI_Image.cols, faceROI_Image.rows, 48, 48);
                    // 传入onet
                    Bbox faceBbox = detector_mtcnn->onetDetect(in, faceobjects[i].rect.x,
                        faceobjects[i].rect.y, faceROI_Image.cols, faceROI_Image.rows);
                    //faceBbox.score = faceobjects[i].prob;
                    finalBbox.push_back(faceBbox);
                    /*for (int j = 0; j < 10; j++) {
                        std::cout << "faceBbox[0].ppoint[" <<j<<"]=" << faceBbox.ppoint[j] << std::endl;
                    } */
                    for (int j = 0; j < 5; j++)
                    {
                        cv::Point p = cv::Point(faceBbox.ppoint[j], faceBbox.ppoint[j + 5]);

                        int r = 255;
                        int g = 255;
                        int b = 0;
                        cv::Scalar color = cv::Scalar(r, g, b);
                        //cv::circle(frame, p, 2, color, 2);
                    }
                }
                //添加人脸识别以识别方式以融合iou跟踪
                const int num_box = finalBbox.size();
                //std::vector<cv::Rect> bbox;
                bbox.resize(num_box);
                bool matchLibrary = FALSE;

                for (int i = 0; i < num_box; i++) {
                    bbox[i] = cv::Rect(finalBbox[i].x1, finalBbox[i].y1,
                        finalBbox[i].x2 - finalBbox[i].x1 + 1, finalBbox[i].y2 - finalBbox[i].y1 + 1);
                    bbox[i] = SquarePadding(bbox[i], frame.rows, frame.cols, true);

                    //根据finalBbox[i]中关键点进行人脸对齐
                    cv::Mat alignedFace = faceAlign(frame, finalBbox[i]);
                    //cv::imshow("alignedFace", alignedFace);
                    float* featDetect = getFeatByMobileFaceNetNCNN(ex, alignedFace);
                    for (int j = 0; j < 128; j++)
                    {
                        finalBbox[i].fects[j] = featDetect[j];
                        //cout << i << " " << featDetect[i] << "\n";
                    }
                }

                //detector->draw_faceobjects(frame, faceobjects);
                //std::cout << "--------------detecting---------------" << std::endl;

                start = std::chrono::system_clock::now();
                std::vector<STrack> output_stracks = tracker.update(finalBbox);
                end = std::chrono::system_clock::now();
                auto track_time = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();//us

                //std::cout << "output_stracks.size()" << output_stracks.size() << std::endl;

                //处理跟踪得来的数据，计算MOTA
                std::vector<Object_Label> detections;
                Object_Label object_label;
                bool correct_detection = false;
                N_gt++;
                int lastId;//上一帧的id,用判断与前一帧id是否变化
                double TPA = 1;
                double FNA = 0;
                double FPA = 0;


                bbox.resize(output_stracks.size());
                for (unsigned long i = 0; i < output_stracks.size(); i++)
                {
                    std::vector<float> tlwh = output_stracks[i].tlwh;
                    bool vertical = tlwh[2] / tlwh[3] > 1.6;
                    if (tlwh[2] * tlwh[3] > 30 && !vertical)
                    {
                        bbox[i] = cv::Rect(tlwh[0], tlwh[1], tlwh[2] + 1, tlwh[3] + 1);
                        bbox[i] = SquarePadding(bbox[i], frame.rows, frame.cols, true);
                        cv::Scalar s = tracker.get_color(output_stracks[i].track_id);
                        cv::putText(frame, cv::format("%d %.1f%%", output_stracks[i].track_id, 100 * output_stracks[i].score),
                            cv::Point(bbox[i].x, bbox[i].y - 5), 0, 0.6, cv::Scalar(0, 0, 255), 1, cv::LINE_AA);

                        cv::rectangle(frame, cv::Rect(bbox[i].x, bbox[i].y, bbox[i].width, bbox[i].height), s, 2);

                        object_label.name = output_stracks[i].track_id;
                        object_label.o_x = static_cast<double>(tlwh[0] + tlwh[2] / 2.0);
                        object_label.o_y = static_cast<double>(tlwh[1] + tlwh[3] / 2.0);
                        object_label.o_width = static_cast<double>(tlwh[2]);
                        object_label.o_height = static_cast<double>(tlwh[3]);
                        detections.push_back(object_label);
                    }

                }

                cv::putText(frame, cv::format("detect ms:%ld  track us:%ld  current frame:%d", detect_time, track_time, num_frames),
                    cv::Point(1, 40), cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar(255, 255, 255), 1, 8);
                cv::imshow("bytetracker", frame);

                
                
                
                for (const Object_Label& det : detections) {
                    double iou = calculateIOU(
                        det.o_x - det.o_width / 2.0, 
                        det.o_y - det.o_height / 2.0, 
                        det.o_width, det.o_height,
                        objects_label[0].o_x - objects_label[0].o_width / 2.0,
                        objects_label[0].o_y - objects_label[0].o_height / 2.0,
                        objects_label[0].o_width, objects_label[0].o_height);
                    double distance = 
                        std::sqrt(std::pow(det.o_x - objects_label[0].o_x, 2) + std::pow(det.o_y - objects_label[0].o_y, 2));
                   
                    if (iou >= 0.5 && distance <= 20.0) {
                        correct_detection = true;
                        N_tr++; //检测匹配成功的数量TP
                        sum_distance += distance;
                        if (firstFrame == true) {
                            firstId = det.name;
                            lastId = det.name;
                            firstFrame = false;
                        }else {
                            if (firstId != det.name) {
                                IDFP++;
                                IDFN++;

                                FPA ++;
                                FNA ++;

                            }
                            else {
                                IDTP++; 

                                TPA ++;
                            }
                        }
                        if (firstFrame == false) {
                            if (det.name != lastId) {
                                IDSW++;
                            }
                        }
                        Ac += (abs(TPA) / (abs(TPA) + abs(FNA) + abs(FPA)));
                    }
                }
                
                if (correct_detection == false) {
                    misDetection++;
                    falseDetection++;
                }
                if (cv::waitKey(30) == 27) // Wait for 'esc' key press to exit
                {
                    break;
                }
                //将标注信息的第一行删除，以便后面的匹配
                objects_label.erase(objects_label.begin());
                faceobjects.clear();
                finalBbox.clear();
            }
            mVideoCapture.release();

            std::cout << "视频播放完成！" << std::endl;
        }

        mota = 1 - (double)(misDetection + falseDetection + 2 * IDSW) / (double)N_gt;
        motp = sum_distance / (double)N_tr;

        
        idf1 = (2 * IDTP ) / (2 * IDTP + IDFP + IDFN);

        HOTAa = std::sqrt(abs(Ac / (double)(abs(N_tr) + abs(misDetection) + abs(falseDetection))));

        std::cout << "mota = " << mota << std::endl;
        std::cout << "motp = " << motp << std::endl;
        std::cout << "idf1 = " << idf1 << std::endl;
        std::cout << "HOTAa = " << HOTAa << std::endl;





        motaxreal.push_back(mota);
        motpxreal.push_back(motp);
        misxreal.push_back(misDetection);
        falsexreal.push_back(falseDetection);
        N_gtxreal.push_back(N_gt);
        IDSWxreal.push_back(IDSW);
        N_trxreal.push_back(N_tr);
        IDF1xreal.push_back(idf1);
        HOTAaxreal.push_back(HOTAa);


        finaldirectoryNames.clear();

    }


    for (int i = 0; i < onedirectoryNames.size(); i++)
    {
        //MOTA  MOTP  IDF1  HOTA  FP  FN  N_gt  IDs  N_tr
        out << i << "  " << motaxreal[i] 
                 << "  " << motpxreal[i]
                 << "  " << IDF1xreal[i]
                 << "  " << HOTAaxreal[i]
                 << "  " << misxreal[i]
                 << "  " << falsexreal[i]
                 << "  " << N_gtxreal[i]
                 << "  " << IDSWxreal[i]
                 << "  " << N_trxreal[i]               
                 <<"\n";
    }
  
   
    cv::destroyAllWindows();
}

#include <fstream>
#include <sstream>
#include <iostream>
#include <vector>
#include <string>
#include <windows.h>
#include <opencv2/imgproc.hpp>
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>

#include <math.h>
#include <codecvt>
#include <cassert>


#include "BYTETracker.h"
#include "scrfd.h"
#include "mtcnn.h"
//近似的仿射变换
cv::Mat getsrc_roi(std::vector<cv::Point2f> x0, std::vector<cv::Point2f> dst)
{
    int size = dst.size();
    cv::Mat A = cv::Mat::zeros(size * 2, 4, CV_32FC1);
    cv::Mat B = cv::Mat::zeros(size * 2, 1, CV_32FC1);

    //[ x1 -y1 1 0] [a]       [x_1]
    //[ y1  x1 0 1] [b]   =   [y_1]
    //[ x2 -y2 1 0] [c]       [x_2]
    //[ y2  x2 0 1] [d]       [y_2]	

    for (int i = 0; i < size; i++)
    {
        A.at<float>(i << 1, 0) = x0[i].x;// roi_dst[i].x;
        A.at<float>(i << 1, 1) = -x0[i].y;
        A.at<float>(i << 1, 2) = 1;
        A.at<float>(i << 1, 3) = 0;
        A.at<float>(i << 1 | 1, 0) = x0[i].y;
        A.at<float>(i << 1 | 1, 1) = x0[i].x;
        A.at<float>(i << 1 | 1, 2) = 0;
        A.at<float>(i << 1 | 1, 3) = 1;

        B.at<float>(i << 1) = dst[i].x;
        B.at<float>(i << 1 | 1) = dst[i].y;
    }

    cv::Mat roi = cv::Mat::zeros(2, 3, A.type());
    cv::Mat AT = A.t();
    cv::Mat ATA = A.t() * A;
    cv::Mat R = ATA.inv() * AT * B;

    //roi = [a -b c;b a d ];

    roi.at<float>(0, 0) = R.at<float>(0, 0);
    roi.at<float>(0, 1) = -R.at<float>(1, 0);
    roi.at<float>(0, 2) = R.at<float>(2, 0);
    roi.at<float>(1, 0) = R.at<float>(1, 0);
    roi.at<float>(1, 1) = R.at<float>(0, 0);
    roi.at<float>(1, 2) = R.at<float>(3, 0);
    return roi;

}

//人脸对齐
cv::Mat faceAlign(cv::Mat& imageAlign, Bbox& finalBboxAlign)
{
    double dst_landmark[10] = {
        38.2946, 73.5318, 55.0252, 41.5493, 70.7299,
        51.6963, 51.5014, 71.7366, 92.3655, 92.2041 };
    vector<cv::Point2f>coord5points;
    vector<cv::Point2f>facePointsByMtcnn;
    for (int i = 0; i < 5; i++) {
        coord5points.push_back(cv::Point2f(dst_landmark[i], dst_landmark[i + 5]));
    }

    for (int j = 0; j < 5; j = j + 1)
    {
        //cv::circle(image, cvPoint(finalBbox[i].ppoint[j], finalBbox[i].ppoint[j + 5]), 2, CV_RGB(0, 255, 0), CV_FILLED);
        facePointsByMtcnn.push_back(cv::Point(finalBboxAlign.ppoint[j], finalBboxAlign.ppoint[j + 5]));
    }

    cv::Mat warp_mat = cv::estimateAffinePartial2D(facePointsByMtcnn, coord5points);
    if (warp_mat.empty()) {
        warp_mat = getsrc_roi(facePointsByMtcnn, coord5points);
    }
    warp_mat.convertTo(warp_mat, CV_32FC1);
    cv::Mat alignFace = cv::Mat::zeros(112, 112, imageAlign.type());
    warpAffine(imageAlign, alignFace, warp_mat, alignFace.size());
    return alignFace;
}



float* getFeatByMobileFaceNetNCNN(ncnn::Extractor ex, cv::Mat img)
{
    //cout << "getFeatByMobileFaceNetNCNN" << endl;
    float* feat = new float[128];
    ncnn::Mat in = ncnn::Mat::from_pixels_resize(img.data, ncnn::Mat::PIXEL_BGR, img.cols, img.rows, 112, 112);
    ex.input("data", in);
    ncnn::Mat out;
    ex.extract("fc1", out);

    for (int j = 0; j < out.w; j++)
    {
        feat[j] = out[j];
    }
    return feat;
}


//正方形化
static cv::Rect SquarePadding(cv::Rect facebox, int margin_rows, int margin_cols, bool max_b)
{
    int c_x = facebox.x + facebox.width / 2;
    int c_y = facebox.y + facebox.height / 2;
    int large = 0;
    if (max_b)
        large = (std::max)(facebox.height, facebox.width) / 2;
    else
        large = (std::min)(facebox.height, facebox.width) / 2;
    cv::Rect rectNot(c_x - large, c_y - large, c_x + large, c_y + large);
    rectNot.x = (std::max)(0, rectNot.x);
    rectNot.y = (std::max)(0, rectNot.y);
    rectNot.height = (std::min)(rectNot.height, margin_rows - 1);
    rectNot.width = (std::min)(rectNot.width, margin_cols - 1);
    if (rectNot.height - rectNot.y != rectNot.width - rectNot.x)
        return SquarePadding(cv::Rect(rectNot.x, rectNot.y, rectNot.width - rectNot.x, rectNot.height - rectNot.y), margin_rows, margin_cols, false);

    return cv::Rect(rectNot.x, rectNot.y, rectNot.width - rectNot.x, rectNot.height - rectNot.y);
}

void processImage(const std::string& imagePath, cv::VideoWriter& videoWriter, const cv::Size& targetSize) {
    cv::Mat image = cv::imread(imagePath);

    // 调整图片大小
    cv::resize(image, image, targetSize);

    videoWriter.write(image);
}

//读取文件夹下的图片合并成视频
void processFolder(const std::string& folderPath, cv::VideoWriter& videoWriter, const cv::Size& targetSize) {
    cv::String pattern = folderPath + "/*.jpg"; // 匹配 JPG 格式的图片
    std::vector<cv::String> imagePaths;
    cv::glob(pattern, imagePaths);

    for (const auto& imagePath : imagePaths) {
        processImage(imagePath, videoWriter, targetSize);
    }
}

//读取文件夹下的所有子文件夹路径
void getDirectoryNames(const std::string& folderPath, std::vector<std::string>& directoryNames)
{
    std::string searchPattern = folderPath + "\\*";

    WIN32_FIND_DATAA findData;
    HANDLE hFind = FindFirstFileA(searchPattern.c_str(), &findData);

    if (hFind != INVALID_HANDLE_VALUE)
    {
        do
        {
            std::string entryName = findData.cFileName;

            if ((findData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) && entryName != "." && entryName != "..")
            {
                directoryNames.push_back(folderPath + "/" + entryName);
            }
        } while (FindNextFileA(hFind, &findData));

        FindClose(hFind);
    }
}


struct Object_Label {    
    double o_x, o_y;  // 中心坐标
    double o_width, o_height;  // 长宽
    int name;
};

std::vector<Object_Label> ReadObjectsFromTxt(const std::wstring& filePath)
{
    std::wifstream inputFile(filePath);

    if (!inputFile)
    {
        std::wcerr << L"Failed to open input file: " << filePath << std::endl;
        return {};
    }

    std::vector<Object_Label> objects;
    std::wstring line;
    while (std::getline(inputFile, line))
    {
        Object_Label obj;
        std::wstringstream ss(line);
        std::wstring token;

        std::getline(ss, token, L',');
        obj.o_x = std::stod(token.substr(token.find(L',') + 1));

        std::getline(ss, token, L',');
        obj.o_y = std::stod(token.substr(token.find(L',') + 1));

        std::getline(ss, token, L',');
        obj.o_width = std::stod(token.substr(token.find(L',') + 1));

        std::getline(ss, token, L',');
        obj.o_height = std::stod(token.substr(token.find(L',') + 1));

        objects.push_back(obj);
    }

    inputFile.close();

    return objects;
}



double calculateIOU(double x1, double y1, double w1, double h1, double x2, double y2, double w2, double h2) {
    double left = std::max(x1, x2);
    double top = std::max(y1, y2);
    double right = std::min(x1 + w1, x2 + w2);
    double bottom = std::min(y1 + h1, y2 + h2);

    double intersection = std::max(0.0, right - left) * std::max(0.0, bottom - top);
    double union_area = w1 * h1 + w2 * h2 - intersection;

    return intersection / union_area;
}




int main(int argc, char *argv[])
{

    std::string modelPath = "./models";
    ncnn::Net squeezenet;
    SCRFD* detector = new SCRFD(modelPath);
    ONet* detector_mtcnn = new ONet(modelPath);
    squeezenet.load_param("./models/mobilefacenet.param");
    squeezenet.load_model("./models/mobilefacenet.bin");
    ncnn::Extractor ex = squeezenet.create_extractor();


    ofstream out("xreal1.txt");
    


    vector<double> motaxreal;//存放mota 用于读取到txt文件中
    vector<double> motpxreal;//存放motp 用于读取到txt文件中
    vector<int> misxreal;//存放misDetection 用于读取到txt文件中
    vector<int> falsexreal;//存放falseDetection 用于读取到txt文件中
    vector<int> N_gtxreal;//存放N_gt 用于读取到txt文件中
    vector<int> IDSWxreal;//存放IDSW 用于读取到txt文件中
    vector<int> N_trxreal;//存放N_tr 用于读取到txt文件中
    vector<double> IDF1xreal;//存放IDF1 用于读取到txt文件中
    vector<double> HOTAaxreal;//存放HOTAa 用于读取到txt文件中




    // 设置输入文件夹和输出视频文件名
    std::string inputFolder = "./video";
    std::string outputVideo = "output_video.avi";
    std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;

    std::vector<std::string> onedirectoryNames;
    std::vector<std::string> finaldirectoryNames;//存储指定文件夹下的子目录路径
    // 设置目标图片大小
    //cv::Size targetSize(640, 480);
    getDirectoryNames(inputFolder, onedirectoryNames);
    for (const auto& path : onedirectoryNames) {
        //以下参数用于计算MOTA(一个文件夹下的MOTA)
        int misDetection = 0;//漏检目标数FP  指未被跟踪器检测到的实际存在的目标数。
        int falseDetection = 0;//误检目标数FN  指被跟踪器错误地检测为目标的虚假目标数。
        int N_gt = 0;//实际目标总数
        int N_tr = 0;//检测到的目标总数TP
        double IDFP = 0.0;//整个过程id变化目标数
        double IDFN = 0.0;
        double IDSW = 0.0;//与上一帧对比id切换的次数；
        double IDTP = 0.0;//初始id为firstId ，整个过程为firstId的次数为IDTP,(TPA)
        double mota = 0.0;
        double sum_distance = 0.0;
        double motp = 0.0;
        double idf1 = 0.0;
        double Ac = 0.0;
        double HOTAa = 0.0;




        //读取子目录中的标注信息
        std::wstring directory = converter.from_bytes(path);
        std::wstring searchPath = directory + L"\\*.txt";
        
       

        WIN32_FIND_DATAW fileData;
        HANDLE hFind = FindFirstFileW(searchPath.c_str(), &fileData);
        std::vector<std::vector<Object_Label>> allObjects;  // 存储所有文件的对象数据
        if (hFind != INVALID_HANDLE_VALUE)
        {
            

            do
            {
                std::wstring filePath = directory + L"\\" + fileData.cFileName;
                std::vector<Object_Label> objects_Label = ReadObjectsFromTxt(filePath);

                // Process the objects
                allObjects.push_back(objects_Label);  // 将对象数据存入容器

            } while (FindNextFileW(hFind, &fileData) != 0);

            FindClose(hFind);

            // //遍历容器中的所有对象数据
            //for (const auto& objects : allObjects)
            //{
            //    // 处理每个文件的对象数据
            //    for (const auto& obj : objects)
            //    {
            //        // Do something with the object data
            //        std::cout << "Object: x=" << obj.o_x << ", y=" << obj.o_y
            //            << ", width=" << obj.o_width << ", height=" << obj.o_height << std::endl;
            //    }
            //}
        }


        getDirectoryNames(path, finaldirectoryNames);
        for (int i = 0; i < finaldirectoryNames.size(); i++) {
            
            // 读取第一张图像以获取视频的宽度和高度
            std::vector<cv::String> imagePaths;

            cv::glob(finaldirectoryNames[i], imagePaths);
            cv::Mat firstImage = cv::imread(imagePaths[0]);
            int width = firstImage.cols;
            int height = firstImage.rows;
            cv::Size targetSize(width, height);

            // 创建输出视频编写器
            cv::VideoWriter videoWriter(outputVideo, cv::VideoWriter::fourcc('M', 'J', 'P', 'G'), 10, cv::Size(width, height));

            if (!videoWriter.isOpened()) {
                std::cout << "无法创建输出视频编写器！" << std::endl;
                return 1;
            }

            // 处理文件夹中的图片
            processFolder(finaldirectoryNames[i], videoWriter, targetSize);


            // 释放资源
            videoWriter.release();

            cv::VideoCapture mVideoCapture(outputVideo);


            //cv::VideoCapture mVideoCapture(0);
            if (!mVideoCapture.isOpened()) {
                std::cout << "fail to openn!" << std::endl;
                return 1;
            }
            cv::Mat frame;
            mVideoCapture >> frame;
            int num_frames = 0;
            int fps = 30;
            BYTETracker tracker(fps, 3000);
            bool firstFrame = true;//判断是否是当前视频的检测到标注目标的第一帧
            vector<int> first_ids; //如果检测到标注目标的第一帧，把这第一帧的id记录下来，以备检查后面id是否变化
            first_ids.resize(1);

            while (true)
            {
                num_frames++;
                mVideoCapture >> frame;

                if (frame.empty()) {
                    // 处理视频帧读取完毕的情况
                    break;
                }

                std::vector<FaceObject> faceobjects;
                std::vector<Bbox> finalBbox;
                std::vector<cv::Rect> bbox;



                auto start = std::chrono::system_clock::now();
                detector->detect_scrfd(frame, faceobjects);
                auto end = std::chrono::system_clock::now();
                auto detect_time = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();//ms
                //detector->draw_faceobjects(frame, faceobjects);

                for (int i = 0; i < faceobjects.size(); i++) {
                    cv::Mat faceROI_Image;
                    frame(faceobjects[i].rect).copyTo(faceROI_Image);
                    ncnn::Mat in = ncnn::Mat::from_pixels_resize(faceROI_Image.data,
                        ncnn::Mat::PIXEL_BGR, faceROI_Image.cols, faceROI_Image.rows, 48, 48);
                    // 传入onet
                    Bbox faceBbox = detector_mtcnn->onetDetect(in, faceobjects[i].rect.x,
                        faceobjects[i].rect.y, faceROI_Image.cols, faceROI_Image.rows);
                    //faceBbox.score = faceobjects[i].prob;
                    finalBbox.push_back(faceBbox);
                    /*for (int j = 0; j < 10; j++) {
                        std::cout << "faceBbox[0].ppoint[" <<j<<"]=" << faceBbox.ppoint[j] << std::endl;
                    } */
                    for (int j = 0; j < 5; j++)
                    {
                        cv::Point p = cv::Point(faceBbox.ppoint[j], faceBbox.ppoint[j + 5]);

                        int r = 255;
                        int g = 255;
                        int b = 0;
                        cv::Scalar color = cv::Scalar(r, g, b);
                        //cv::circle(frame, p, 2, color, 2);
                    }
                }
                //添加人脸识别以识别方式以融合iou跟踪
                const int num_box = finalBbox.size();
                //std::vector<cv::Rect> bbox;
                bbox.resize(num_box);
                bool matchLibrary = FALSE;

                for (int i = 0; i < num_box; i++) {
                    bbox[i] = cv::Rect(finalBbox[i].x1, finalBbox[i].y1,
                        finalBbox[i].x2 - finalBbox[i].x1 + 1, finalBbox[i].y2 - finalBbox[i].y1 + 1);
                    bbox[i] = SquarePadding(bbox[i], frame.rows, frame.cols, true);

                    //根据finalBbox[i]中关键点进行人脸对齐
                    cv::Mat alignedFace = faceAlign(frame, finalBbox[i]);
                    //cv::imshow("alignedFace", alignedFace);
                    float* featDetect = getFeatByMobileFaceNetNCNN(ex, alignedFace);
                    for (int j = 0; j < 128; j++)
                    {
                        finalBbox[i].fects[j] = featDetect[j];
                        //cout << i << " " << featDetect[i] << "\n";
                    }
                }

                //detector->draw_faceobjects(frame, faceobjects);
                //std::cout << "--------------detecting---------------" << std::endl;

                start = std::chrono::system_clock::now();
                std::vector<STrack> output_stracks = tracker.update(finalBbox);
                end = std::chrono::system_clock::now();
                auto track_time = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();//us

               // std::cout << "output_stracks.size()=" << output_stracks.size() << std::endl;

                //处理跟踪得来的数据，计算MOTA
                std::vector<Object_Label> detections;
                Object_Label object_det;
                
                
                

                bbox.resize(output_stracks.size());
                for (unsigned long i = 0; i < output_stracks.size(); i++)
                {
                    std::vector<float> tlwh = output_stracks[i].tlwh;
                    bool vertical = tlwh[2] / tlwh[3] > 1.6;
                    if (tlwh[2] * tlwh[3] > 30 && !vertical)
                    {
                        bbox[i] = cv::Rect(tlwh[0], tlwh[1], tlwh[2] + 1, tlwh[3] + 1);
                        bbox[i] = SquarePadding(bbox[i], frame.rows, frame.cols, true);
                        cv::Scalar s = tracker.get_color(output_stracks[i].track_id);
                        cv::putText(frame, cv::format("%d %.1f%%", output_stracks[i].track_id, 100 * output_stracks[i].score),
                            cv::Point(bbox[i].x, bbox[i].y - 5), 0, 0.6, cv::Scalar(0, 0, 255), 1, cv::LINE_AA);

                        cv::rectangle(frame, cv::Rect(bbox[i].x, bbox[i].y, bbox[i].width, bbox[i].height), s, 2);

                        object_det.name = output_stracks[i].track_id;
                        object_det.o_x = static_cast<double>(tlwh[0] + tlwh[2] / 2.0);
                        object_det.o_y = static_cast<double>(tlwh[1] + tlwh[3] / 2.0);
                        object_det.o_width = static_cast<double>(tlwh[2]);
                        object_det.o_height = static_cast<double>(tlwh[3]);
                        detections.push_back(object_det);
                    }

                }

                cv::putText(frame, cv::format("detect ms:%ld  track us:%ld  current frame:%d", detect_time, track_time, num_frames),
                    cv::Point(1, 40), cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar(255, 255, 255), 1, 8);
                cv::imshow("bytetracker", frame);

                
                int objects_Label_size = allObjects[0].size();
                N_gt += objects_Label_size;
                int detections_size = detections.size();
                vector<int> last_ids;//上一帧的id,用判断与前一帧id是否变化
                last_ids.resize(1);
                vector<int> det_names;
                double TPA = 0;
                double FNA = 0;
                double FPA = 0;
               

                for (int ii = 0; ii < detections.size(); ii++)
                {               
                    const Object_Label& det = detections[ii];
                    for (auto& object_label : allObjects[0]) {

                        double iou = calculateIOU(
                            det.o_x - det.o_width / 2.0,
                            det.o_y - det.o_height / 2.0,
                            det.o_width, det.o_height,
                            object_label.o_x - object_label.o_width / 2.0,
                            object_label.o_y - object_label.o_height / 2.0,
                            object_label.o_width, object_label.o_height);
                        double distance =
                            std::sqrt(std::pow(det.o_x - object_label.o_x, 2) + std::pow(det.o_y - object_label.o_y, 2));

                        if (iou >= 0.7 && distance <= 20.0) {                           
                            N_tr++; //检测匹配成功的数量TP
                            sum_distance += distance;
                            objects_Label_size--;
                            detections_size--;
                            object_label.o_height = 0; //匹配上了，就将其置零，防止二次匹配
                            object_label.o_width = 0;
                            
                            if (firstFrame == true) {                                                                                                
                                first_ids.push_back(det.name);
                                last_ids.push_back(det.name);
                                IDTP++;
                                TPA++;
                            }
                            else {                                                            
                                det_names.push_back(det.name);                                
                            }
                        }
                    }
                }
                                
                if(firstFrame == false){
                    for (int x = 0; x < det_names.size(); x++) {                        
                        if (x <= first_ids.size()) {
                            if (det_names[x] != first_ids[x]) {
                                IDFP++;
                                IDFN++;

                                FPA++;
                                FNA++;
                            }
                            else {
                                IDTP++;

                                TPA++;
                            }
                        }
                        if (x <= last_ids.size()) {
                            if (det_names[x] != last_ids[x]) {
                                IDSW++;
                            }
                        }
                    }
                    if (det_names.size() > first_ids.size()) {
                        for (int x = 0; x < det_names.size() - first_ids.size(); x++) {
                            first_ids.push_back(det_names[first_ids.size()]);
                        }
                    }
                    last_ids.clear();
                    for (int x = 0; x < det_names.size(); x++) {
                        last_ids.push_back(det_names[x]);
                    }
                    det_names.clear();
                }
                Ac += ((abs(TPA)+0.001) / ((abs(TPA) + abs(FNA) + abs(FPA)+0.001)));
                misDetection += objects_Label_size;
                falseDetection += detections_size;
                
                if(cv::waitKey(30) == 27) // Wait for 'esc' key press to exit
                {
                    break;
                }
                firstFrame = false;
                //将标注信息的第一行删除，以便后面的匹配
                allObjects.erase(allObjects.begin());
                faceobjects.clear();
                finalBbox.clear();
            }
            mVideoCapture.release();

            std::cout << "视频播放完成！" << std::endl;
        }

        mota = 1 - (double)(misDetection + falseDetection + 2 * IDSW) / (double)N_gt;
        motp = sum_distance / (double)N_tr;

        
        idf1 = (2 * IDTP ) / (2 * IDTP + IDFP + IDFN);

        HOTAa = std::sqrt(abs(Ac / (double)(abs(N_tr) + abs(misDetection) + abs(falseDetection)+0.001)));

        std::cout << "mota = " << mota << std::endl;
        std::cout << "motp = " << motp << std::endl;
        std::cout << "idf1 = " << idf1 << std::endl;
        std::cout << "HOTAa = " << HOTAa << std::endl;





        motaxreal.push_back(mota);
        motpxreal.push_back(motp);
        misxreal.push_back(misDetection);
        falsexreal.push_back(falseDetection);
        N_gtxreal.push_back(N_gt);
        IDSWxreal.push_back(IDSW);
        N_trxreal.push_back(N_tr);
        IDF1xreal.push_back(idf1);
        HOTAaxreal.push_back(HOTAa);


        finaldirectoryNames.clear();

    }


    for (int i = 0; i < onedirectoryNames.size(); i++)
    {
        //MOTA  MOTP  IDF1  HOTA  FP  FN  N_gt  IDs  N_tr
        out << i << "  " << motaxreal[i] 
                 << "  " << motpxreal[i]
                 << "  " << IDF1xreal[i]
                 << "  " << HOTAaxreal[i]
                 << "  " << misxreal[i]
                 << "  " << falsexreal[i]
                 << "  " << N_gtxreal[i]
                 << "  " << IDSWxreal[i]
                 << "  " << N_trxreal[i]               
                 <<"\n";
    }
  
   
    cv::destroyAllWindows();
}