opencv-dnn部署yolov5检测模型

最新推荐文章于 2024-05-09 16:58:31 发布
秋意为谁白
最新推荐文章于 2024-05-09 16:58:31 发布
阅读量340
点赞数
文章标签： opencv dnn YOLO
本文链接：https://blog.csdn.net/qq_36801705/article/details/133963479
版权
1. 注意事项
2. C++代码
3. 运行结果
1. 注意事项

pt->onnx 一定要选择静态尺寸，dnn只支持静态尺寸，否则会报错。
parser.add_argument('--dynamic', default=True, help='ONNX/TF/TensorRT: dynamic axes')
opencv version: 3.4.15
2. C++代码

#include <iostream>
#include<opencv2//opencv.hpp>
#include<math.h>
#include<time.h>

using namespace std;
using namespace cv;
using namespace dnn;


// reference https://github.com/UNeedCryDear/yolov5-opencv-dnn-cpp/blob/main/yolo.cpp

std::vector<std::string> extensions{ "jpg", "bmp", "png", "jpeg" };
static const string kWinName = "YOLOV5 object detection in OpenCV";

static vector<string> loadLabels(const string& filename)
{
    vector<string> labels;
    ifstream ifs(filename.c_str());
    string line;
    while (getline(ifs, line))
    {
        labels.push_back(line);
    }
    return labels;
}

void GetImgFilenames(std::string path, std::vector<std::string>& imgFilenames)
{
    // imgFilenames.clear();
    if (path.find(".") != std::string::npos)
    {
        imgFilenames.push_back(path);
    }
    else
    {
        std::string fpath = path.append("*.*");
        std::vector<cv::String> allfiles;  //cv::String
        cv::glob(fpath, allfiles);
        for (int i = 0; i < allfiles.size(); i++)
        {
            size_t iPos = allfiles[i].rfind('.');
            std::string fileEx = allfiles[i].substr(iPos + 1, allfiles[i].length());
            if (std::find(extensions.begin(), extensions.end(), fileEx) != extensions.end())
            {
                imgFilenames.push_back(allfiles[i]);
            }
        }
    }
}

std::vector<cv::Scalar> colormap(int numClass)
{
    std::vector<cv::Scalar> colors;

    for (int i = 0; i < numClass; i++) {
        int b = rand() % 256;
        int g = rand() % 256;
        int r = rand() % 256;
        colors.push_back(Scalar(b, g, r));
    }
    return colors;
}

#define isP6 false //是否使用P6模型

struct OutputDet {
    int cls;             // 类别id
    float confidence;   //置信度
    cv::Rect bndbox;       //矩形框
};

class YoloDet {
public:
    
    bool modelInit(cv::dnn::Net& net, std::string& netPath, std::string classFile, bool isCuda);
    bool Detect(cv::Mat srcImg, cv::dnn::Net& net, std::vector<OutputDet>& output);
    void Visvalize(cv::Mat img, std::vector<OutputDet> predOuts);
    void letterbox(cv::Mat srcImg, cv::Mat& pendImg);

private:
#if(defined isP6 && isP6==true)
    const float netAnchors[4][6] = { { 19,27, 44,40, 38,94 },{ 96,68, 86,152, 180,137 },{ 140,301, 303,264, 238,542 },{ 436,615, 739,380, 925,792 } };

    const int inpWidth = 1280;  
    const int inpHeight = 1280; 

    const int strideSize = 4;  //stride size
#else
    const float netAnchors[3][6] = { { 10,13, 16,30, 33,23 },{ 30,61, 62,45, 59,119 },{ 116,90, 156,198, 373,326 } };

    const int inpWidth = 640;   
    const int inpHeight = 640;  

    const int strideSize = 3;   //stride size
#endif // isP6

    const float netStride[4] = { 8, 16.0,32,64 };

    float objThreshold = 0.25;
    float confThreshold = 0.25;

    float nmsThreshold = 0.45;
    float nmsScoreThreshold = objThreshold * confThreshold;

    bool keep_ratio = true;
    cv::Vec4d param;
    vector<string> classNames;
    int numClasses;
    std::vector<cv::Scalar> colors;

    // std::vector<std::string> classNames = { "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
    //  "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
    //  "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
    //  "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
    //  "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
    //  "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
    //  "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
    //  "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
    //  "hair drier", "toothbrush" };
};

bool YoloDet::modelInit(Net& net, string& netPath, std::string classFile, bool isCuda = false) {
    try {
        net = readNet(netPath);
    }
    catch (const std::exception&) {
        return false;
    }
    
    net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
    net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
    
    ifstream ifs(classFile);
    string line;
    while (getline(ifs, line)) this->classNames.push_back(line);
    
    if (classNames.empty()) {
        cout << "Failed to load labels: " << classFile << endl;

    }
    this->numClasses = classNames.size();
    this->colors = colormap(numClasses);
    
    return true;
}

void YoloDet::letterbox(cv::Mat srcImg, cv::Mat& pendImg)
{
    // int sh = srcImg.rows, sw = srcImg.cols;
    // int nh = this->inpHeight;
    // int nw = this->inpWidth;
    // //float ratioh = 1, ratiow = 1;
    // int left = 0, top = 0;
    // //Mat pendImg;
    // if (this->keep_ratio && sh != sw) {
    //  float hw_scale = (float)sh / sw;
    //  if (hw_scale > 1) {
    //      nh = this->inpHeight;
    //      nw = int(this->inpWidth / hw_scale);
    //      cv::resize(srcImg, pendImg, Size(nw, nh), INTER_AREA);
    //      left = int((this->inpWidth - nw) * 0.5);
    //      cv::copyMakeBorder(pendImg, pendImg, 0, 0, left, this->inpWidth - nw - left, BORDER_CONSTANT, 114);
    //  }
    //  else {
    //      nh = (int)this->inpHeight * hw_scale;
    //      nw = this->inpWidth;
    //      cv::resize(srcImg, pendImg, Size(nw, nh), INTER_AREA);
    //      top = (int)(this->inpHeight - nh) * 0.5;
    //      cv::copyMakeBorder(pendImg, pendImg, top, this->inpHeight - nh - top, 0, 0, BORDER_CONSTANT, 114);
    //  }   
    // }
    // else {
    //  cv::resize(srcImg, pendImg, Size(nw, nh), INTER_AREA);
    // }
    // float ratioh = (float)sh / nh, ratiow = (float)sw / nw;
    // cv::Vec4d p(ratioh, ratiow, top, left);
    // this-> param = p;

    int col = srcImg.cols;
    int row = srcImg.rows;

    pendImg = srcImg.clone();
    int maxSize = col>=row?col:row;
    // int maxSize = MAX(col, row);

    if (maxSize > 1.2 * col || maxSize > 1.2 * row) {
        pendImg = cv::Mat::zeros(maxSize, maxSize, CV_8UC3);
        srcImg.copyTo(pendImg(Rect(0, 0, col, row)));
    }
    float ratioh = (float)pendImg.rows / inpHeight;
    float ratiow = (float)pendImg.cols / inpWidth;

    cv::Vec4d p(ratioh, ratiow, 0, 0);
    this-> param = p;

}

bool YoloDet::Detect(cv::Mat srcImg, cv::dnn::Net& net, vector<OutputDet>& output) {
    

    cv::Mat netInputImg;
    this->letterbox(srcImg.clone(), netInputImg);
    cv::Mat blob;
    cv::dnn::blobFromImage(netInputImg, blob, 1 / 255.0, cv::Size(inpWidth, inpHeight), cv::Scalar(0, 0, 0), true, false);
    //blobFromImage(netInputImg, blob, 1 / 255.0, cv::Size(inpWidth, inpHeight), cv::Scalar(104, 117, 123), true, false);
    //blobFromImage(netInputImg, blob, 1 / 255.0, cv::Size(inpWidth, inpHeight), cv::Scalar(114, 114,114), true, false);
    net.setInput(blob);
    std::vector<cv::Mat> netOutputImg;
    net.forward(netOutputImg, net.getUnconnectedOutLayersNames());

#if CV_VERSION_MAJOR==4&&CV_VERSION_MINOR==6
    std::sort(netOutputImg.begin(), netOutputImg.end(), [](Mat& A, Mat& B) {return A.size[1] > B.size[1]; });//opencv 4.6
#endif

    
    int netWidth = numClasses + 5;  
    float* rawOutput = (float*)netOutputImg[0].data;

    std::vector<int> clsIDs;
    std::vector<float> confidences;
    std::vector<cv::Rect> boxes;
    //vector<BoxInfo> boxes;
    for (int n = 0; n < this-> strideSize; n++) 
    {    
        const float stride = pow(2, n + 3);
        int grid_x = (int)ceil((this->inpWidth / stride));
        int grid_y = (int)ceil((this->inpHeight / stride));
        for (int anchor = 0; anchor < 3; anchor++) 
        {   //anchors
            for (int i = 0; i < grid_y; ++i) 
            {
                for (int j = 0; j < grid_x; ++j) 
                {
                    float boxScore = rawOutput[4]; ;//bndbox-confidence
                    if (boxScore >= objThreshold) 
                    {
                        cv::Mat confs(1, classNames.size(), CV_32FC1, rawOutput + 5);
                        Point clsID;
                        double maxConf;
                        minMaxLoc(confs, 0, &maxConf, 0, &clsID);
                        maxConf = (float)maxConf;
                        if (maxConf >= confThreshold) 
                        {
                            float x = rawOutput[0];  //x
                            float y = rawOutput[1];  //y
                            float w = rawOutput[2];  //w
                            float h = rawOutput[3];  //h
                    
                            // float left = (x - param[3] - 0.5 * w) * param[1];
                            // float top = (y - param[2] - 0.5 * h) * param[0];
                            int left = (x - 0.5 * w) * param[1];
                            int top = (y - 0.5 * h) * param[0];
                        
                            w =  w * param[1];
                            h =  h * param[0];
                            
                            clsIDs.push_back(clsID.x);
                            confidences.push_back(maxConf * boxScore);
                            boxes.push_back(cv::Rect(int(left), int(top), int(w), int(h)));
                        }
                    }
                    rawOutput += netWidth;
                }
            }
        }
    }

    cv::Mat img = srcImg.clone();
    vector<int> resultNMS;
    NMSBoxes(boxes, confidences, nmsScoreThreshold, nmsThreshold, resultNMS);
    Rect refineRect(0, 0, img.cols, img.rows);
    // std::vector<OutputDet> tempOutput;
    for (int i = 0; i < resultNMS.size(); ++i) {

        int ix = resultNMS[i];
        OutputDet predOut;
        predOut.cls = clsIDs[ix];
        predOut.confidence = confidences[ix];
        predOut.bndbox = boxes[ix] & refineRect;
        output.push_back(predOut);
    }
    if (output.size())
        return true;
    else
        return false;
}

void YoloDet::Visvalize(cv::Mat img, vector<OutputDet> predOuts) {

    for (int i = 0; i < predOuts.size(); i++) {
        int left, top;
        left = predOuts[i].bndbox.x;
        top = predOuts[i].bndbox.y;
        rectangle(img, predOuts[i].bndbox, colors[predOuts[i].cls], 2, 8);
        
        string label = classNames[predOuts[i].cls] + ":" + to_string(predOuts[i].confidence);

        int baseLine;
        Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
        top = max(top, labelSize.height);
        putText(img, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 1, colors[predOuts[i].cls], 2);
        
    }

    // cv::namedWindow(kWinName, WINDOW_NORMAL);
    cv::imshow(kWinName, img);
    cv::waitKey(0);
    cv::destroyAllWindows();

}

//  不支持动态尺寸
int runner(std::string modelPath, std::string imgPath, std::string classFile){
    
    YoloDet detector;
    cv::dnn::Net net;
    if (detector.modelInit(net, modelPath, classFile, true)) {
        cout << "model loaded successfully!" << endl;
    }
    else {
        cout << "model loaded failed!" << endl;
        return -1;
    }

    std::vector<std::string> imgFiles;
    GetImgFilenames(imgPath, imgFiles);
    cout << imgFiles.size() << endl;
    for (int i = 0; i < imgFiles.size(); i++)
    {
        vector<OutputDet> predOuts;
        Mat img = imread(imgFiles[i]);
        if (detector.Detect(img, net, predOuts)) {
            detector.Visvalize(img, predOuts);

        }
        else {
            cout << "Detect Failed!" << endl;
        }

    }
    return 0;
    
}

int main() {
    std::string imgPath = "images/";
    std::string modelPath = "yolov5s.onnx";
    std::string classFile = "class.names";
    runner(modelPath, imgPath, classFile); 
    return 0;
}