【Yolo 视频和摄像头推理】C++ TensorRT

Z-ChuaN

已于 2023-11-09 18:27:57 修改

阅读量1.4k

点赞数 6

文章标签： YOLO 音视频 c++

于 2023-10-08 16:42:00 首次发布

本文链接：https://blog.csdn.net/qq_38276941/article/details/133681589

版权

文章目录

概要

上篇介绍了使用C++ TensorRT模块进行yoloV5的图片推理，接下来进一步介绍处理视频和摄像头的细节。实现方式有很多种，可选取有用部分进行参考。
系统：linux Ubuntu20.04
编译：Vscode + Cmake
完整项目代码已上传至https://github.com/ZhengChuan-1/YoloV5-TensorRT-inference
更多细节基础部分请参考上篇写的C++ TensorRT图片推理

整体方案

opencv视频和摄像头的使用方法

使用cv::VideoCapture的方法，使用视频则放入视频的地址；使用摄像头则放入摄像头的地址（我的是/dev/video0），有的写0就可以；

//使用本地视频
string video_path = "/home/zc/C++_TensorRT_inference/sample_video/video/1.mp4"; 
//使用摄像头
//string video_path = "/dev/video0";
cv::VideoCapture cap(video_path);

opencv读取和写入视频

创建一个cv::VideoCapture类对象cap读取视频，cv::VideoWriter类对象write写入处理后视频；使用read方法读取视频并把每一帧图像放进frame，write方法把处理后的每帧图像frame写入；
由以下代码可以看出，我们只需把推理代码放入读取和写入之间，便完成整个视频的目标检测。

string video_path = "视频地址";
string save_path = "保存地址";
cv::VideoCapture cap(video_path);
int frame_width = cap.get(cv::CAP_PROP_FRAME_WIDTH);
int frame_height = cap.get(cv::CAP_PROP_FRAME_HEIGHT);
int frame_count = cap.get(cv::CAP_PROP_FRAME_COUNT);
double fps = cap.get(cv::CAP_PROP_FPS);
cv::VideoWriter write(save_path, cv::VideoWriter::fourcc('m', 'p', '4', 'v'), fps, cv::Size(frame_width, frame_height), true);
cv::Mat frame;
while(true){
        int ret = cap.read(frame);  //frame就是每一帧图片
        if(!ret){break;}


//------------------------------------------------------

//-------------此处进行每一帧图片的yolo预测-----------------

//------------------------------------------------------


        write.write(frame);
        cv::imshow("frame", frame);

        int c = cv::waitKey(1);
		if (c == 27) {break;}    //判断若按ESC键退出循环

    }

技术细节

在while循环中添加推理代码与图片推理有一些不同，如果把模型初始化定义和开辟空间的部分一同放进去，会造成一直循环重复多余工作导致运行速度下降，严重情况会一直开辟不同的空间造成电脑内存资源爆满。

因此我把①模型初始化部分：读取trt信息，定义engine引擎的一些对象 ②开辟空间部分：new，malloc，cudaMalloc等
放在while循环之外，只做一次这样的工作。
只需把处理图像的context->enqueueV2相关部分和后处理程序放进循环里。

完整代码如下所示：

#include<fstream>  
#include<iostream> 
#include<opencv2/opencv.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/imgproc.hpp>
#include "processing.hpp"

#include "NvInfer.h"
#include "logging.h"
using namespace nvinfer1;
using namespace std;


class MyLogger : public nvinfer1::ILogger {
 public:
  explicit MyLogger(nvinfer1::ILogger::Severity severity =
                        nvinfer1::ILogger::Severity::kWARNING)
      : severity_(severity) {}

  void log(nvinfer1::ILogger::Severity severity,
           const char *msg) noexcept override {
    if (severity <= severity_) {
      std::cerr << msg << std::endl;
    }
  }
  nvinfer1::ILogger::Severity severity_;
};

int main(int argc, char* argv[])
{
    string video_path = argv[1];
    string save_path = argv[2];
//-------------------------- 一、定义推理模型------------------------------------------
    MyLogger logger;
    //读取trt信息
    const std::string engine_file_path = "/home/zc/C++_TensorRT_inference/sample_video/yolov5s.trt";  //填写自己trt文件路径(需要绝对路径)
    std::stringstream engine_file_stream;
    engine_file_stream.seekg(0, engine_file_stream.beg);  //从起始位置偏移0个字节，指针移动到文件流的开头
    std::ifstream ifs(engine_file_path);
    engine_file_stream << ifs.rdbuf();
    ifs.close();

    engine_file_stream.seekg(0, std::ios::end);         //先把文件输入流指针定位到文档末尾来获取文档的长度
    const int model_size = engine_file_stream.tellg();  //获取文件流的总长度
    engine_file_stream.seekg(0, std::ios::beg);
    void *model_mem = malloc(model_size);               //开辟一样长的空间
    engine_file_stream.read(static_cast<char *>(model_mem), model_size);    //将内容读取到model_mem中

    nvinfer1::IRuntime *runtime = nvinfer1::createInferRuntime(logger);
    nvinfer1::ICudaEngine *engine = runtime->deserializeCudaEngine(model_mem, model_size);

    free(model_mem);

    nvinfer1::IExecutionContext *context = engine->createExecutionContext();

    void *buffers[2];
    // 获取模型输入尺寸并分配GPU内存
    nvinfer1::Dims input_dim = engine->getBindingDimensions(0);
    int input_size = 1;
    for (int j = 0; j < input_dim.nbDims; ++j) {
        input_size *= input_dim.d[j];
    }
    cudaMalloc(&buffers[0], input_size * sizeof(float));

    // 获取模型输出尺寸并分配GPU内存
    nvinfer1::Dims output_dim = engine->getBindingDimensions(1);
    int output_size = 1;
    for (int j = 0; j < output_dim.nbDims; ++j) {
        output_size *= output_dim.d[j];
    }
    cudaMalloc(&buffers[1], output_size * sizeof(float));

    // 给模型输出数据分配相应的CPU内存
    float *output_buffer = new float[output_size];

    cudaStream_t stream;
    cudaStreamCreate(&stream);
//-----------------------------------------------------------------------

    const int model_width = 640;
    const int model_height = 640;
    float* input_blob = new float[model_height * model_width * 3];

    cv::VideoCapture cap(video_path);

    int frame_width = cap.get(cv::CAP_PROP_FRAME_WIDTH);
	int frame_height = cap.get(cv::CAP_PROP_FRAME_HEIGHT);
	int frame_count = cap.get(cv::CAP_PROP_FRAME_COUNT);
	double fps = cap.get(cv::CAP_PROP_FPS);
    //第1个参数 视频文件路径；第2个参数 视频编码方式（我们可以通过VideoCapture::get(CAP_PROP_FOURCC)获得）；第3个参数 fps；第4个参数 size；第5个参数 是否为彩色
    //cv::VideoWriter write(save_path, cap.get(cv::CAP_PROP_FOURCC), fps, cv::Size(frame_width, frame_height), true);
    cv::VideoWriter write(save_path, cv::VideoWriter::fourcc('m', 'p', '4', 'v'), fps, cv::Size(frame_width, frame_height), true);
    cv::Mat frame;
    cv::Mat resize_image;
    while(true){
        int ret = cap.read(frame);  //frame就是每一帧图片
        if(!ret){break;}
        
//----------------- 二、图像预处理  --------------------------------------
        const float ratio = std::min(model_width / (frame.cols * 1.0f),
                                model_height / (frame.rows * 1.0f));
        // 等比例缩放
        const int border_width = frame.cols * ratio;
        const int border_height = frame.rows * ratio;
        // 计算偏移值
        const int x_offset = (model_width - border_width) / 2;
        const int y_offset = (model_height - border_height) / 2;

        //将输入图像缩放至resize_image
        cv::resize(frame, resize_image, cv::Size(border_width, border_height));
        //复制图像并且制作边界
        cv::copyMakeBorder(resize_image, resize_image, y_offset, y_offset, x_offset,
                            x_offset, cv::BORDER_CONSTANT, cv::Scalar(114, 114, 114));
        // 转换为RGB格式
        cv::cvtColor(resize_image, resize_image, cv::COLOR_BGR2RGB);
        //归一化
        normalization(resize_image, input_blob);
//---------------------------------------------------------------------

//-------------三、往engine引擎投入预处理后图像----------------------------
        // 拷贝输入数据至GPU
        cudaMemcpyAsync(buffers[0], input_blob, input_size * sizeof(float),
                        cudaMemcpyHostToDevice, stream);
        // 执行推理
        if(!context->enqueueV2(buffers, stream, nullptr))
        {
            cout << "enqueueV2执行推理失败" << endl;
            return false;
        }
        // 拷贝输出数据至CPU
        cudaMemcpyAsync(output_buffer, buffers[1],output_size * sizeof(float),
                        cudaMemcpyDeviceToHost, stream);
        // 使同步
        cudaStreamSynchronize(stream);
//---------------------------------------------------------------------

//---------------四、预测结果(output_buffer)后处理------------------------
        postprocessing(output_buffer, frame, x_offset, y_offset, ratio);
//---------------------------------------------------------------------

        write.write(frame);
        cv::imshow("frame", frame);

        int c = cv::waitKey(1);
		if (c == 27) {break;}    //判断若按ESC键退出循环

    }

    delete context;
    delete engine;
    delete runtime;
    delete[] input_blob;

    cap.release();
    write.release();

    return 0;
}

附上头文件”processing.hpp":

#include <iostream>
#include <vector>
#include <list>
#include<opencv2/opencv.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/imgproc.hpp>
#include <iomanip>  //保留小数
using namespace std;

string names[] = {"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
        "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
        "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
        "'skis'", "'snowboard'", "'sports ball'", "'kite'", "'baseball bat'", "'baseball glove'", "'skateboard'", "'surfboard'",
        "'tennis racket'", "'bottle'", "'wine glass'", "'cup'", "'fork'", "'knife'", "'spoon'", "'bowl'", "'banana'", "'apple'",
        "'sandwich'", "'orange'", "'broccoli'", "'carrot'", "'hot dog'", "'pizza'", "'donut'", "'cake'", "'chair'", "'couch'",
        "'potted plant'", "'bed'", "'dining table'", "'toilet'", "'tv'", "'laptop'", "'mouse'", "'remote'", "'keyboard'", "'cell phone'",
        "'microwave'", "'oven'", "'toaster'", "'sink'", "'refrigerator'", "'book'", "'clock'", "'vase'", "'scissors'", "'teddy bear'",
        "'hair drier'", "'toothbrush'"};

void normalization(cv::Mat &resize_image, float* input_blob)
{
    //归一化
    const int channels = resize_image.channels();
    const int width = resize_image.cols;
    const int height = resize_image.rows;
    for (int c = 0; c < channels; c++) {
        for (int h = 0; h < height; h++) {
            for (int w = 0; w < width; w++) {
                input_blob[c * width * height + h * width + w] =
                    resize_image.at<cv::Vec3b>(h, w)[c] / 255.0f;  //at<Vec3b> 是 OpenCV 中用于访问图像像素的一种方法，使用 at<Vec3b> 获取彩色图像中特定位置的像素颜色值
            }
        }
    }
}

struct BOX
{
    float x;
    float y;
    float width;
    float height;
};

struct Object
{
    BOX box;
    int label;
    float confidence;
};

bool cmp(Object &obj1, Object &obj2){
    return obj1.confidence > obj2.confidence;
}

vector<list<Object>> NMS(std::vector<Object> objs, float iou_thres = 0.45){
    //第一步：将所有矩形框按照不同的类别标签分组，组内按照置信度高低得分进行排序；
    
    list<Object> obj_l;
    vector<list<Object>> NMS_List;
    int a = 0;
    for(int i = 0; i < 80; i++){
        for(auto j : objs)
        {
            if(j.label == i){
                obj_l.push_back(j);
                obj_l.sort(cmp);        //依据置信度升序排序
                a = 1;
            }
        }
        if(a == 1){
            NMS_List.push_back(std::move(obj_l));
            a = 0;
            }
    }

    //第二步：计算IOU
    float x1, y1, x1_w, y1_h,x2, y2, x2_w, y2_h;
    float x_box, y_box, x_w_box, y_h_box, w_box, h_box;
    float S1,S2,SBOX,res_iou;
    int row = NMS_List.size();  //行数     列数：NMS_List[0].size()
    int tmp;
    for(int i = 0; i < row ; i++)  //不同分类的循环
    {
        tmp = 0;
        list<Object>::iterator it = NMS_List[i].begin();
        while(it != --NMS_List[i].end()){
            x1 = it->box.x;
            y1 = it->box.y;
            x1_w = x1 + it->box.width;
            y1_h = y1 + it->box.height;
            while(it != --NMS_List[i].end())
            {
                it++;
                x2 = it->box.x;
                y2 = it->box.y;
                x2_w = x2 + it->box.width;
                y2_h = y2 + it->box.height;
                //交集左上角坐标x_box,y_box  框1-x1和框2-x2的最大值   框1-y1和框2-y2的最大值
                x_box = std::max(x1, x2);
                y_box = std::max(y1, y2);
                //交集右下角坐标x_w_box,y_h_box  框1-x1_w和框2-x2_w的最小值  框1-y1_h和框2-y2_h的最小值
                x_w_box = std::min(x1_w, x2_w);
                y_h_box = std::min(y1_h, y2_h);
                //交集框宽高
                w_box = x_w_box - x_box;
                h_box = y_h_box - y_box;
                //无交集情况
                if(w_box <= 0 || h_box <= 0)
                {
                    it = NMS_List[i].erase(it);
                    if(it == NMS_List[i].end()){break;}
                    it--;
                    continue;
                }
                //有交集，计算IOU
                S1 = (x1_w - x1) * (y1_h - y1);
                S2 = (x2_w - x2) * (y2_h - y2);
                SBOX = w_box * h_box;
                if((res_iou = SBOX / (S1 + S2 - SBOX)) > iou_thres){
                    it = NMS_List[i].erase(it);
                    if(it == NMS_List[i].end()){break;}
                    it--;
                }

            }
            it = NMS_List[i].begin();
            if(it == --NMS_List[i].end()){break;}
            tmp++;
            for(int z = 0; z < tmp; z++){
                it++;
                if(it == --NMS_List[i].end()){break;}
            }
        }
    }

    return NMS_List;
}

void postprocessing(float* output_buffer, cv::Mat input_image, int x_offset,int y_offset, float ratio)
{
    //1.输出结果output_buffer，放入objs  xywh为中心点坐标 和宽高
    float *ptr = output_buffer;
    std::vector<Object> objs;
    for (int i = 0; i < 25200; ++i) {
        const float objectness = ptr[4];
        if (objectness >= 0.45f) {
            const int label = std::max_element(ptr + 5, ptr + 85) - (ptr + 5);  //std::max_element返回范围内的最大元素
            const float confidence = ptr[5 + label] * objectness;
            if (confidence >= 0.25f) {
                const float bx = ptr[0];
                const float by = ptr[1];
                const float bw = ptr[2];
                const float bh = ptr[3];

                Object obj;
                // 还原图像尺寸中box的尺寸比例，这里要减掉偏移值，并把box中心点坐标xy转成左上角坐标xy
                obj.box.x = (bx - bw * 0.5f - x_offset) / ratio;
                obj.box.y = (by - bh * 0.5f - y_offset) / ratio;
                obj.box.width = bw / ratio;
                obj.box.height = bh / ratio;
                obj.label = label;
                obj.confidence = confidence;
                objs.push_back(std::move(obj));
                }
        }
        ptr += 85;
    }  // i loop

// 2.NMS非极大值抑制
    vector<list<Object>> finalll = NMS(objs);

// 3.画框
    int row = finalll.size();
    for(int i = 0; i < row; i++){
        list<Object>::iterator it = finalll[i].begin();
        while(it != finalll[i].end()){
            cv::Point topLeft(it->box.x, it->box.y);
            cv::Point bottomRight(it->box.x + it->box.width, it->box.y + it->box.height);
            cv::rectangle(input_image, topLeft, bottomRight, cv::Scalar(0, 0, 255), 2);
            std::stringstream buff;
            buff.precision(2);  //覆盖默认精度,置信度保留2位小数
            buff.setf(std::ios::fixed);
            buff << it->confidence;
            string text =names[it->label] + " " + buff.str();
            cv::putText(input_image, text, topLeft, 0, 1, cv::Scalar(0, 255, 0), 2);
            it++;
        }
    }
}

编译使用

由于我用的是main(int argc, char* argv[])命令行输入参数的方法，所以编译完成后，运行可执行文件需要在后面添加源视频地址、保存地址。
使用视频命令行执行：
在这里插入图片描述
使用摄像头命令行执行：
输出结果：

可能出现的问题

执行过程中，可能出现如下问题：
在这里插入图片描述这是因为定义cv::VideoWriter wirte第二个参数不合适导致，把其替换成如下参数。

//cv::VideoWriter write(save_path, cap.get(cv::CAP_PROP_FOURCC), fps, cv::Size(frame_width, frame_height), true);
cv::VideoWriter write(save_path, cv::VideoWriter::fourcc('m', 'p', '4', 'v'), fps, cv::Size(frame_width, frame_height), true);

小结

本文介绍了使用yolov5进行C++视频和摄像头的使用，难点在于循环处理每帧图片时尽量把不相关的部分放在循环外避免资源使用过多。

Z-ChuaN

关注

6
点赞
踩
27

收藏

觉得还不错? 一键收藏
4
评论
【Yolo 视频和摄像头推理】C++ TensorRT

在while循环中添加推理代码与图片推理有一些不同，如果把模型初始化定义和开辟空间的部分一同放进去，会造成一直循环重复多余工作导致运行速度下降，严重情况会一直开辟不同的空间造成电脑内存资源爆满。使用cv::VideoCapture的方法，使用视频则放入视频的地址；本文介绍了使用yolov5进行C++视频和摄像头的使用，难点在于循环处理每帧图片时尽量把不相关的部分放在循环外避免资源使用过多。由以下代码可以看出，我们只需把推理代码放入读取和写入之间，便完成整个视频的目标检测。更多细节基础部分请参考上篇写的。
复制链接

扫一扫