yolov5的TensorRT部署--动态batch

从0到1实现基于tensorrt的yolo部署教程 http://t.csdn.cn/HUn4T,请点击该链接,即可看到全文

本文对于上面的案例,从多batc的角度详细讲解

一、生成TensorRT模型的多batch设置

对于充分利用多batch,能大大提升模型的检测速度,例如多个视频流进行目标检测,我们可以获取多个视频流中的图片,都多张图片一起送往一个网络里进行推理。
在生成TensorRT模型的代码,我们应该加入以下代码

// 设置profile,这里动态batch专属
auto profile = builder->createOptimizationProfile();
auto input_tensor = network->getInput(0);
auto input_dims = input_tensor->getDimensions();

// 配置最小:kMIN、最优:kOPT、最大范围:kMAX       指的是BatchSize
input_dims.d[0] = 1;  // 在最小的batch和最优的batch设置为1
profile->setDimensions(input_tensor->getName(), nvinfer1::OptProfileSelector::kMIN, input_dims);
profile->setDimensions(input_tensor->getName(), nvinfer1::OptProfileSelector::kOPT, input_dims);
// 最大的batch根据自己的要求来设置    
input_dims.d[0] = maxBatchSize;
profile->setDimensions(input_tensor->getName(), nvinfer1::OptProfileSelector::kMAX, input_dims);
// 将这个profile加到设置里面
config->addOptimizationProfile(profile);

2. 在模型推理时的代码

在推理的时,如何将设置显存和内存,并且将输入的数据与指定的内存和显卡关联在一起,这是多batch的难点
batch大小的设置

int input_batch = 2;  // 指定batch的大小
int input_channel = 3;
int input_height = 640;
int input_width = 640;
int input_numel = input_batch * input_channel * input_height * input_width;

float* input_data_host = nullptr;
float* input_data_device = nullptr;
    // 在主机内存和显卡分别设置相应的内存空间
checkRuntime(cudaMallocHost(&input_data_host,input_numel * sizeof(float)));
checkRuntime(cudaMalloc(&input_data_device,input_numel * sizeof(float)));

数据赋值到主机内存和显存

// 读取视频流
cap >> image;
cap1 >> image1;
// 仿射变换来改变图片的尺寸
cv::warpAffine(image,input_image,m2x3_i2d,input_image.size(),cv::INTER_LINEAR, cv::BORDER_CONSTANT, cv::Scalar::all(114)); // 对图像做平移缩放旋转变换,可逆
cv::warpAffine(image1,input_image1,m2x3_i2d1,input_image1.size(),cv::INTER_LINEAR, cv::BORDER_CONSTANT, cv::Scalar::all(114));


int image_area = input_image.cols * input_image.rows;
int image_area1 = input_image1.cols * input_image1.rows;
// 视频流中图一的数据地址
unsigned char *pimage = input_image.data;

float *phost_b = input_data_host + image_area * 0;
float *phost_g = input_data_host + image_area * 1;
float *phost_r = input_data_host + image_area * 2;
// 将视频流中图一的数据赋值到指定的内存中
for (int i = 0; i < image_area; ++i, pimage += 3)
{
      // 注意这里的顺序rgb调换了
      *phost_r++ = pimage[0] / 255.0f;
      *phost_g++ = pimage[1] / 255.0f;
      *phost_b++ = pimage[2] / 255.0f;
}
// 视频流中图二的数据地址
unsigned char *pimage1 = input_image1.data;
// 上面图一的数据赋值到指定内存后,再赋值图二的数据
auto input_data_host1 = input_data_host + image_area * 3;
float *phost_b1 = input_data_host1 + image_area1 * 0;
float *phost_g1 = input_data_host1 + image_area1 * 1;
float *phost_r1 = input_data_host1 + image_area1 * 2;
// 将视频流中图二的数据赋值到指定的内存中
for (int i = 0; i < image_area1; ++i, pimage1 += 3)
{
      // 注意这里的顺序rgb调换了
      *phost_r1++ = pimage1[0] / 255.0f;
      *phost_g1++ = pimage1[1] / 255.0f;
      *phost_b1++ = pimage1[2] / 255.0f;
}

输出的数据在主机内存和显存的设置

int output_numel = input_batch * output_numbox * output_numprob;    // input_batch的大小与input_batch相关联
float* output_data_host = nullptr;
float* output_data_device = nullptr;
checkRuntime(cudaMallocHost(&output_data_host, output_numel * sizeof(float)));
checkRuntime(cudaMalloc(&output_data_device, output_numel * sizeof(float)));

进行模型推理

checkRuntime(cudaMemcpyAsync(input_data_device, input_data_host, input_numel * sizeof(float), cudaMemcpyHostToDevice, stream));
float *bindings[] = {input_data_device, output_data_device};
bool success = execution_context->enqueueV2((void **)bindings, stream, nullptr);
checkRuntime(cudaMemcpyAsync(output_data_host, output_data_device, sizeof(float) * output_numel, cudaMemcpyDeviceToHost, stream));
checkRuntime(cudaStreamSynchronize(stream));

神经网络输出的多batch处理

float *ptr = output_data_host  + i_num * output_numbox * output_numprob + i * output_numprob;   // i_num代表batch的维度

总的动态batch代码


// tensorRT include
// 编译用的头文件
#include <NvInfer.h>

// onnx解析器的头文件
//#include <onnx-tensorrt/NvOnnxParser.h>
#include <NvOnnxParser.h>
// 推理用的运行时头文件
#include <NvInferRuntime.h>

// cuda include
#include <cuda_runtime.h>

// system include
#include <stdio.h>
#include <math.h>

#include <iostream>
#include <fstream>
#include <vector>
#include <memory>
#include <functional>
#include <unistd.h>

#include <opencv2/opencv.hpp>


using namespace std;

#define checkRuntime(op)  __check_cuda_runtime((op), #op, __FILE__, __LINE__)

bool __check_cuda_runtime(cudaError_t code, const char* op, const char* file, int line){
    if(code != cudaSuccess){
        const char* err_name = cudaGetErrorName(code);
        const char* err_message = cudaGetErrorString(code);
        printf("runtime error %s:%d  %s failed. \n  code = %s, message = %s\n", file, line, op, err_name, err_message);
        return false;
    }
    return true;
}

inline const char* severity_string(nvinfer1::ILogger::Severity t){
    switch(t){
    case nvinfer1::ILogger::Severity::kINTERNAL_ERROR: return "internal_error";
    case nvinfer1::ILogger::Severity::kERROR:   return "error";
    case nvinfer1::ILogger::Severity::kWARNING: return "warning";
    case nvinfer1::ILogger::Severity::kINFO:    return "info";
    case nvinfer1::ILogger::Severity::kVERBOSE: return "verbose";
    default: return "unknow";
    }
}

// coco数据集的labels,关于coco:https://cocodataset.org/#home
static const char* cocolabels[] = {
    "person", "bicycle", "car", "motorcycle", "airplane",
    "bus", "train", "truck", "boat", "traffic light", "fire hydrant",
    "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse",
    "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack",
    "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis",
    "snowboard", "sports ball", "kite", "baseball bat", "baseball glove",
    "skateboard", "surfboard", "tennis racket", "bottle", "wine glass",
    "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich",
    "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake",
    "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv",
    "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave",
    "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
    "scissors", "teddy bear", "hair drier", "toothbrush"
};

// hsv转bgr
static std::tuple<uint8_t, uint8_t, uint8_t> hsv2bgr(float h, float s, float v){
    const int h_i = static_cast<int>(h * 6);
    const float f = h * 6 - h_i;
    const float p = v * (1 - s);
    const float q = v * (1 - f*s);
    const float t = v * (1 - (1 - f) * s);
    float r, g, b;
    switch (h_i) {
    case 0:r = v; g = t; b = p;break;
    case 1:r = q; g = v; b = p;break;
    case 2:r = p; g = v; b = t;break;
    case 3:r = p; g = q; b = v;break;
    case 4:r = t; g = p; b = v;break;
    case 5:r = v; g = p; b = q;break;
    default:r = 1; g = 1; b = 1;break;}
    return make_tuple(static_cast<uint8_t>(b * 255), static_cast<uint8_t>(g * 255), static_cast<uint8_t>(r * 255));
}

static std::tuple<uint8_t, uint8_t, uint8_t> random_color(int id){
    float h_plane = ((((unsigned int)id << 2) ^ 0x937151) % 100) / 100.0f;;
    float s_plane = ((((unsigned int)id << 3) ^ 0x315793) % 100) / 100.0f;
    return hsv2bgr(h_plane, s_plane, 1);
}

class TRTLogger : public nvinfer1::ILogger{
public:
    virtual void log(Severity severity, nvinfer1::AsciiChar const* msg) noexcept override{
        if(severity <= Severity::kWARNING){
            // 打印带颜色的字符,格式如下:
            // printf("\033[47;33m打印的文本\033[0m");
            // 其中 \033[ 是起始标记
            //      47    是背景颜色
            //      ;     分隔符
            //      33    文字颜色
            //      m     开始标记结束
            //      \033[0m 是终止标记
            // 其中背景颜色或者文字颜色可不写
            // 部分颜色代码 https://blog.csdn.net/ericbar/article/details/79652086
            if(severity == Severity::kWARNING){
                printf("\033[33m%s: %s\033[0m\n", severity_string(severity), msg);
            }
            else if(severity <= Severity::kERROR){
                printf("\033[31m%s: %s\033[0m\n", severity_string(severity), msg);
            }
            else{
                printf("%s: %s\n", severity_string(severity), msg);
            }
        }
    }
} logger;

// 通过智能指针管理nv返回的指针参数
// 内存自动释放,避免泄漏
template<typename _T>
shared_ptr<_T> make_nvshared(_T* ptr){
    return shared_ptr<_T>(ptr, [](_T* p){p->destroy();});
}

bool exists(const string& path){

#ifdef _WIN32
    return ::PathFileExistsA(path.c_str());
#else
    return access(path.c_str(), R_OK) == 0;
#endif
}

typedef std::function<void(
        int current, int count, const std::vector<std::string>& files,
        nvinfer1::Dims dims, float* ptensor
        )> Int8Process;

class Int8EntropyCalibrator : public nvinfer1::IInt8EntropyCalibrator2
{
public:
    Int8EntropyCalibrator(const vector<string>& imagefiles, nvinfer1::Dims dims, const Int8Process& preprocess) {
        assert(preprocess != nullptr);
        this->dims_ = dims;
        this->allimgs_ = imagefiles;
        this->preprocess_ = preprocess;
        this->fromCalibratorData_ = false;

        files_.resize(dims.d[0]);
    }

    // 这个构造函数,是允许从缓存数据中加载标定结果,这样不用重新读取图像处理
    Int8EntropyCalibrator(const vector<uint8_t>& entropyCalibratorData, nvinfer1::Dims dims, const Int8Process& preprocess) {

        assert(preprocess != nullptr);
        this->dims_ = dims;
        this->entropyCalibratorData_ = entropyCalibratorData;
        this->preprocess_ = preprocess;
        this->fromCalibratorData_ = true;
        files_.resize(dims.d[0]);
    }

    virtual ~Int8EntropyCalibrator(){
        if(tensor_host_ != nullptr){
            checkRuntime(cudaFreeHost(tensor_host_));
            checkRuntime(cudaFree(tensor_device_));
            tensor_host_ = nullptr;
            tensor_device_ = nullptr;
        }
    }

    // 想要按照多少的batch进行标定
    int getBatchSize() const noexcept {
        return dims_.d[0];
    }

    bool next() {
        int batch_size = dims_.d[0];
        if (cursor_ + batch_size > allimgs_.size())
            return false;

        for(int i = 0; i < batch_size; ++i)
            files_[i] = allimgs_[cursor_++];

        if(tensor_host_ == nullptr){
            size_t volumn = 1;
            for(int i = 0; i < dims_.nbDims; ++i)
                volumn *= dims_.d[i];

            bytes_ = volumn * sizeof(float);
            checkRuntime(cudaMallocHost(&tensor_host_, bytes_));
            checkRuntime(cudaMalloc(&tensor_device_, bytes_));
        }

        preprocess_(cursor_, allimgs_.size(), files_, dims_, tensor_host_);
        checkRuntime(cudaMemcpy(tensor_device_, tensor_host_, bytes_, cudaMemcpyHostToDevice));
        return true;
    }

    bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept {
        if (!next()) return false;
        bindings[0] = tensor_device_;
        return true;
    }

    const vector<uint8_t>& getEntropyCalibratorData() {
        return entropyCalibratorData_;
    }

    const void* readCalibrationCache(size_t& length) noexcept {
        if (fromCalibratorData_) {
            length = this->entropyCalibratorData_.size();
            return this->entropyCalibratorData_.data();
        }

        length = 0;
        return nullptr;
    }

    virtual void writeCalibrationCache(const void* cache, size_t length) noexcept {
        entropyCalibratorData_.assign((uint8_t*)cache, (uint8_t*)cache + length);
    }

private:
    Int8Process preprocess_;
    vector<string> allimgs_;
    size_t batchCudaSize_ = 0;
    int cursor_ = 0;
    size_t bytes_ = 0;
    nvinfer1::Dims dims_;
    vector<string> files_;
    float* tensor_host_ = nullptr;
    float* tensor_device_ = nullptr;
    vector<uint8_t> entropyCalibratorData_;
    bool fromCalibratorData_ = false;
};


// 上一节的代码
bool build_model(){

    if(exists("yolov5s.trtmodel")){
        printf("yolov5s.trtmodel has exists.\n");
        return true;
    }

    // 构建logger日记记录器
    TRTLogger logger;

    // 这是基本需要的组件
    // 构建 Builder 网络元数据,这是模型搭建的入口,网络的 TensorRT 内部表示以及可执行程序引擎,都是由该对象的成员方法生成的
    auto builder = make_nvshared(nvinfer1::createInferBuilder(logger));
    // 负责设置模型的一些参数,如是否开启 fp16 模式、int8 模式等。BuilderConfig 是建立在 Builder 基础之上的
    auto config = make_nvshared(builder->createBuilderConfig());
    // 构建 Network 计算图,是 最为核心的一个模块。
    // flag =1???
    auto network = make_nvshared(builder->createNetworkV2(1));
    auto parser = make_nvshared(nvonnxparser::createParser(*network,logger));
    if(!parser->parseFromFile("yolov5s.onnx",1))   // onnx加载后,怎么与network关联起来
    {
        printf("Failed to parse yolov5s.onnx\n");

        // 注意这里的几个指针还没有释放,是有内存泄漏的,后面考虑更优雅的解决
        return false;
    }

    // 多batch推理
    int maxBatchSize = 10;
    config->setMaxWorkspaceSize(1<<28);

    // 如果模型有多个输入,则必须多个profile
    auto profile = builder->createOptimizationProfile();
    auto input_tensor = network->getInput(0);
    auto input_dims = input_tensor->getDimensions();


    input_dims.d[0] = 1;

    config->setFlag(nvinfer1::BuilderFlag::kFP16);


    // 配置最小、最优、最大范围
    input_dims.d[0] = 1;
    profile->setDimensions(input_tensor->getName(), nvinfer1::OptProfileSelector::kMIN, input_dims);
    profile->setDimensions(input_tensor->getName(), nvinfer1::OptProfileSelector::kOPT, input_dims);
    input_dims.d[0] = maxBatchSize;
    profile->setDimensions(input_tensor->getName(), nvinfer1::OptProfileSelector::kMAX, input_dims);
    config->addOptimizationProfile(profile);

    auto engine = make_nvshared(builder->buildEngineWithConfig(*network, *config));
    if (engine == nullptr)
    {
        printf("Build engine failed.\n");
        return false;
    }

    // 将模型序列化,并储存为文件
    auto model_data = make_nvshared(engine->serialize());
    FILE *f = fopen("yolov5s.trtmodel", "wb");
    fwrite(model_data->data(), 1, model_data->size(), f);
    fclose(f);


    return true;
}

///

vector<unsigned char> load_file(const string& file){
    ifstream in(file, ios::in | ios::binary);
    if (!in.is_open())
        return {};

    in.seekg(0, ios::end);
    size_t length = in.tellg();

    std::vector<uint8_t> data;
    if (length > 0){
        in.seekg(0, ios::beg);
        data.resize(length);

        in.read((char*)&data[0], length);
    }
    in.close();
    return data;
}

void inference(){
    TRTLogger logger;
    auto engine_data = load_file("yolov5s.trtmodel");
    auto runtime = make_nvshared(nvinfer1::createInferRuntime(logger));
    // 反序列化
    auto engine = make_nvshared(runtime->deserializeCudaEngine(engine_data.data(),engine_data.size()));
    if(engine == nullptr)
    {
        printf("Deserialize cuda engine failed.\n");
        //        runtime->destroy();
        return;
    }
    if(engine->getNbBindings()!=2)
    {
        printf("你的onnx导出有问题,必须是1个输入和1个输出,你这明显有:%d个输出.\n", engine->getNbBindings() - 1);
        return;
    }
    cudaStream_t stream = nullptr;
    checkRuntime(cudaStreamCreate(&stream));
    auto execution_context = make_nvshared(engine->createExecutionContext());
    int input_batch = 2;
    int input_channel = 3;
    int input_height = 640;
    int input_width = 640;
    int input_numel = input_batch * input_channel * input_height * input_width;

    float* input_data_host = nullptr;
    float* input_data_device = nullptr;
    // 在主机内存和显卡分别设置相应的内存空间
    checkRuntime(cudaMallocHost(&input_data_host,input_numel * sizeof(float)));
    checkRuntime(cudaMalloc(&input_data_device,input_numel * sizeof(float)));

    auto output_dims = engine->getBindingDimensions(1);
    int output_numbox = output_dims.d[1];   // ??
    int output_numprob = output_dims.d[2];  // ??
    int num_classes = output_numprob -5;
    int output_numel = input_batch * output_numbox * output_numprob;
    float* output_data_host = nullptr;
    float* output_data_device = nullptr;
    checkRuntime(cudaMallocHost(&output_data_host, output_numel * sizeof(float)));
    checkRuntime(cudaMalloc(&output_data_device, output_numel * sizeof(float)));


    // 明确当前推理时,使用的数据输入大小
    auto input_dims = engine->getBindingDimensions(0);
    input_dims.d[0] = input_batch;

    execution_context->setBindingDimensions(0, input_dims);

    cv::VideoCapture cap("/media/***/晴.mp4");
    cv::VideoCapture cap1("/media/***/极弯场景.mp4");
    cv::Mat image;
    cv::Mat image1;
    cap>>image;
    cap>>image1;
    // 通过双线性插值对图像进行resize
    float scale_x = input_width / (float)image.cols;
    float scale_y = input_height / (float)image.rows;
    float scale = std::min(scale_x,scale_y);
    float i2d[6],d2i[6];
//     resize图像,源图像和目标图像几何中心的对齐
    i2d[0] = scale;
    i2d[1] = 0;
    i2d[2] = (-scale * image.cols + input_width + scale - 1) * 0.5;
    i2d[3] = 0;
    i2d[4] = scale;
    i2d[5] = (-scale * image.rows + input_height + scale - 1) * 0.5;

    cv::Mat m2x3_i2d(2, 3, CV_32F, i2d);           // image to dst(network), 2x3 matrix
    cv::Mat m2x3_d2i(2, 3, CV_32F, d2i);           // dst to image, 2x3 matrix
    cv::invertAffineTransform(m2x3_i2d, m2x3_d2i); // 计算一个反仿射变换
    cv::Mat input_image(input_height, input_width, CV_8UC3);
    cv::Mat input_image1(input_height, input_width, CV_8UC3);

    float scale_x1 = input_width / (float)image.cols;
    float scale_y1 = input_height / (float)image.rows;
    float scale1 = std::min(scale_x1,scale_y1);
    float i2d1[6],d2i1[6];
    // resize图像,源图像和目标图像几何中心的对齐
    i2d1[0] = scale1;
    i2d1[1] = 0;
    i2d1[2] = (-scale1 * image.cols + input_width + scale1 - 1) * 0.5;
    i2d1[3] = 0;
    i2d1[4] = scale1;
    i2d1[5] = (-scale1 * image.rows + input_height + scale1 - 1) * 0.5;

    cv::Mat m2x3_i2d1(2, 3, CV_32F, i2d1);           // image to dst(network), 2x3 matrix
    cv::Mat m2x3_d2i1(2, 3, CV_32F, d2i1);           // dst to image, 2x3 matrix
    cv::invertAffineTransform(m2x3_i2d1, m2x3_d2i1); // 计算一个反仿射变换

    while(1)
    {
        clock_t startTime, endTime;
        startTime = clock();
        cap >> image;
        cap1 >> image1;
        cv::warpAffine(image,input_image,m2x3_i2d,input_image.size(),cv::INTER_LINEAR, cv::BORDER_CONSTANT, cv::Scalar::all(114)); // 对图像做平移缩放旋转变换,可逆
        cv::warpAffine(image1,input_image1,m2x3_i2d1,input_image1.size(),cv::INTER_LINEAR, cv::BORDER_CONSTANT, cv::Scalar::all(114));


        int image_area = input_image.cols * input_image.rows;
        int image_area1 = input_image1.cols * input_image1.rows;

        unsigned char *pimage = input_image.data;


        float *phost_b = input_data_host + image_area * 0;
        float *phost_g = input_data_host + image_area * 1;
        float *phost_r = input_data_host + image_area * 2;
        for (int i = 0; i < image_area; ++i, pimage += 3)
        {
            // 注意这里的顺序rgb调换了
            *phost_r++ = pimage[0] / 255.0f;
            *phost_g++ = pimage[1] / 255.0f;
            *phost_b++ = pimage[2] / 255.0f;
        }

        unsigned char *pimage1 = input_image1.data;
        auto input_data_host1 = input_data_host + image_area * 3;
        float *phost_b1 = input_data_host1 + image_area1 * 0;
        float *phost_g1 = input_data_host1 + image_area1 * 1;
        float *phost_r1 = input_data_host1 + image_area1 * 2;
        for (int i = 0; i < image_area1; ++i, pimage1 += 3)
        {
            // 注意这里的顺序rgb调换了
            *phost_r1++ = pimage1[0] / 255.0f;
            *phost_g1++ = pimage1[1] / 255.0f;
            *phost_b1++ = pimage1[2] / 255.0f;
        }
        ///
        std::cout << "预处理时间: " << (double)(clock() - startTime) / CLOCKS_PER_SEC << "s" << std::endl;

        auto startTime1 = clock();
        checkRuntime(cudaMemcpyAsync(input_data_device, input_data_host, input_numel * sizeof(float), cudaMemcpyHostToDevice, stream));
        float *bindings[] = {input_data_device, output_data_device};
        bool success = execution_context->enqueueV2((void **)bindings, stream, nullptr);
        checkRuntime(cudaMemcpyAsync(output_data_host, output_data_device, sizeof(float) * output_numel, cudaMemcpyDeviceToHost, stream));
        checkRuntime(cudaStreamSynchronize(stream));
        std::cout << "推理时间: " << (double)(clock() - startTime1) / CLOCKS_PER_SEC << "s" << std::endl;
        startTime1 = clock();
        // decode box:从不同尺度下的预测狂还原到原输入图上(包括:预测框,类被概率,置信度)
        vector<vector<float>> bboxes;
        float confidence_threshold = 0.25;
        float nms_threshold = 0.5;

        for (int i = 0; i < output_numbox; ++i)
        {

            float *ptr = output_data_host  + i * output_numprob;
            float objness = ptr[4];
            if (objness < confidence_threshold)
                continue;

            float *pclass = ptr + 5;
            int label = std::max_element(pclass, pclass + num_classes) - pclass;
            float prob = pclass[label];
            float confidence = prob * objness;
            if (confidence < confidence_threshold)
                continue;

            // 中心点、宽、高
            float cx = ptr[0];
            float cy = ptr[1];
            float width = ptr[2];
            float height = ptr[3];

            // 预测框
            float left = cx - width * 0.5;     // ??
            float top = cy - height * 0.5;
            float right = cx + width * 0.5;
            float bottom = cy + height * 0.5;

            // 对应图上的位置
            float image_base_left = d2i1[0] * left + d2i1[2];
            float image_base_right = d2i1[0] * right + d2i1[2];
            float image_base_top = d2i1[0] * top + d2i1[5];
            float image_base_bottom = d2i1[0] * bottom + d2i1[5];
            bboxes.push_back({image_base_left, image_base_top, image_base_right, image_base_bottom, (float)label, confidence});
        }
        //        printf("decoded bboxes.size = %d\n", bboxes.size());
        // nms非极大抑制
        std::sort(bboxes.begin(), bboxes.end(), [](vector<float> &a, vector<float> &b)
        { return a[5] > b[5]; });
        std::vector<bool> remove_flags1(bboxes.size());
        std::vector<vector<float>> box_result1;
        box_result1.reserve(bboxes.size());

        auto iou1 = [](const vector<float> &a, const vector<float> &b)
        {
            float cross_left = std::max(a[0], b[0]);
            float cross_top = std::max(a[1], b[1]);
            float cross_right = std::min(a[2], b[2]);
            float cross_bottom = std::min(a[3], b[3]);

            float cross_area = std::max(0.0f, cross_right - cross_left) * std::max(0.0f, cross_bottom - cross_top);
            float union_area = std::max(0.0f, a[2] - a[0]) * std::max(0.0f, a[3] - a[1]) + std::max(0.0f, b[2] - b[0]) * std::max(0.0f, b[3] - b[1]) - cross_area;
            if (cross_area == 0 || union_area == 0)
                return 0.0f;
            return cross_area / union_area;
        };

        for (int i = 0; i < bboxes.size(); ++i)
        {
            if (remove_flags1[i])
                continue;

            auto &ibox = bboxes[i];
            box_result1.emplace_back(ibox);
            for (int j = i + 1; j < bboxes.size(); ++j)
            {
                if (remove_flags1[j])
                    continue;

                auto &jbox = bboxes[j];
                if (ibox[4] == jbox[4])
                {
                    // class matched
                    if (iou1(ibox, jbox) >= nms_threshold)
                        remove_flags1[j] = true;
                }
            }
        }
        //        printf("box_result.size = %d\n", box_result.size());

        for (int i = 0; i < box_result1.size(); ++i)
        {
            auto &ibox = box_result1[i];
            float left = ibox[0];
            float top = ibox[1];
            float right = ibox[2];
            float bottom = ibox[3];
            int class_label = ibox[4];
            float confidence = ibox[5];
            cv::Scalar color;
            tie(color[0], color[1], color[2]) = random_color(class_label);
            cv::rectangle(image, cv::Point(left, top), cv::Point(right, bottom), color, 3);

            auto name = cocolabels[class_label];
            auto caption = cv::format("%s %.2f", name, confidence);
            int text_width = cv::getTextSize(caption, 0, 1, 2, nullptr).width + 10;
            cv::rectangle(image, cv::Point(left - 3, top - 33), cv::Point(left + text_width, top), color, -1);
            cv::putText(image, caption, cv::Point(left, top - 5), 0, 1, cv::Scalar::all(0), 2, 16);
        }
        cv::namedWindow("图像坐标系0",0);
        cv::imshow("图像坐标系0", image);
        cv::waitKey(1);
        bboxes.clear();
        for (int i = 0; i < output_numbox; ++i)
        {
            float *ptr = output_data_host  + 1 * output_numbox * output_numprob + i * output_numprob;  //input_batch * output_numbox * output_numprob
            float objness = ptr[4];
            if (objness < confidence_threshold)
                continue;

            float *pclass = ptr + 5;
            int label = std::max_element(pclass, pclass + num_classes) - pclass;
            float prob = pclass[label];
            float confidence = prob * objness;
            if (confidence < confidence_threshold)
                continue;

            // 中心点、宽、高
            float cx = ptr[0];
            float cy = ptr[1];
            float width = ptr[2];
            float height = ptr[3];

            // 预测框
            float left = cx - width * 0.5;     // ??
            float top = cy - height * 0.5;
            float right = cx + width * 0.5;
            float bottom = cy + height * 0.5;

            // 对应图上的位置
            float image_base_left = d2i[0] * left + d2i[2];
            float image_base_right = d2i[0] * right + d2i[2];
            float image_base_top = d2i[0] * top + d2i[5];
            float image_base_bottom = d2i[0] * bottom + d2i[5];
            bboxes.push_back({image_base_left, image_base_top, image_base_right, image_base_bottom, (float)label, confidence});
        }
        //        printf("decoded bboxes.size = %d\n", bboxes.size());

        // nms非极大抑制
        std::sort(bboxes.begin(), bboxes.end(), [](vector<float> &a, vector<float> &b)
        { return a[5] > b[5]; });
        std::vector<bool> remove_flags(bboxes.size());
        std::vector<vector<float>> box_result;
        box_result.reserve(bboxes.size());

        auto iou = [](const vector<float> &a, const vector<float> &b)
        {
            float cross_left = std::max(a[0], b[0]);
            float cross_top = std::max(a[1], b[1]);
            float cross_right = std::min(a[2], b[2]);
            float cross_bottom = std::min(a[3], b[3]);

            float cross_area = std::max(0.0f, cross_right - cross_left) * std::max(0.0f, cross_bottom - cross_top);
            float union_area = std::max(0.0f, a[2] - a[0]) * std::max(0.0f, a[3] - a[1]) + std::max(0.0f, b[2] - b[0]) * std::max(0.0f, b[3] - b[1]) - cross_area;
            if (cross_area == 0 || union_area == 0)
                return 0.0f;
            return cross_area / union_area;
        };

        for (int i = 0; i < bboxes.size(); ++i)
        {
            if (remove_flags[i])
                continue;

            auto &ibox = bboxes[i];
            box_result.emplace_back(ibox);
            for (int j = i + 1; j < bboxes.size(); ++j)
            {
                if (remove_flags[j])
                    continue;

                auto &jbox = bboxes[j];
                if (ibox[4] == jbox[4])
                {
                    // class matched
                    if (iou(ibox, jbox) >= nms_threshold)
                        remove_flags[j] = true;
                }
            }
        }
        //        printf("box_result.size = %d\n", box_result.size());

        for (int i = 0; i < box_result.size(); ++i)
        {
            auto &ibox = box_result[i];
            float left = ibox[0];
            float top = ibox[1];
            float right = ibox[2];
            float bottom = ibox[3];
            int class_label = ibox[4];
            float confidence = ibox[5];
            cv::Scalar color;
            tie(color[0], color[1], color[2]) = random_color(class_label);
            cv::rectangle(image1, cv::Point(left, top), cv::Point(right, bottom), color, 3);

            auto name = cocolabels[class_label];
            auto caption = cv::format("%s %.2f", name, confidence);
            int text_width = cv::getTextSize(caption, 0, 1, 2, nullptr).width + 10;
            cv::rectangle(image1, cv::Point(left - 3, top - 33), cv::Point(left + text_width, top), color, -1);
            cv::putText(image1, caption, cv::Point(left, top - 5), 0, 1, cv::Scalar::all(0), 2, 16);
        }
        cv::namedWindow("图像坐标系1",0);
        cv::imshow("图像坐标系1", image1);
        cv::waitKey(1);
        std::cout << "后处理时间: " << (double)(clock() - startTime1) / CLOCKS_PER_SEC << "s" << std::endl;
        endTime = clock(); //计时结束
        std::cout << "预处理+推理+后处理的总时间: " << (double)(endTime - startTime) / CLOCKS_PER_SEC << "s" << std::endl;
    }
    // cv::imwrite("image-draw.jpg", image);
    checkRuntime(cudaStreamDestroy(stream));
    checkRuntime(cudaFreeHost(input_data_host));
    checkRuntime(cudaFreeHost(output_data_host));
    checkRuntime(cudaFree(input_data_device));
    checkRuntime(cudaFree(output_data_device));




}

int main(){
    if(!build_model()){
        return -1;
    }
    inference();
    return 0;
}

  • 4
    点赞
  • 39
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
是的,部署YOLOv7到TensorRT的代码可以在GitHub上找到。以下是链接:https://github.com/WongKinYiu/yolov7-tensorrt 该仓库提供了YOLOv7的PyTorch实现,同时也提供了将YOLOv7模型转换为TensorRT格式的代码。在该仓库中,你可以找到以下文件: - `yolov7.py`:YOLOv7的PyTorch实现代码 - `tensorrt.py`:将YOLOv7模型转换为TensorRT格式的代码 - `infer.py`:使用TensorRT格式的模型进行推理的Python脚本 你可以按照以下步骤来使用这些代码: 1. 克隆该仓库并安装相关依赖。 ``` git clone https://github.com/WongKinYiu/yolov7-tensorrt.git cd yolov7-tensorrt pip install -r requirements.txt ``` 2. 下载YOLOv7 PyTorch模型的预训练权重,并将其转换为ONNX格式。 ``` python3 yolov7.py --weights yolov7.pt --cfg cfg/yolov7.yaml --img-size 640 --onnx ``` 3. 将ONNX模型转换为TensorRT格式。 ``` python3 tensorrt.py --onnx yolov7.onnx --engine yolov7.trt --batch 1 ``` 4. 使用TensorRT格式的模型进行推理。 ``` python3 infer.py --engine yolov7.trt --image ./data/samples/zidane.jpg ``` 以上步骤中,`--weights`参数指定了YOLOv7模型的预训练权重,`--cfg`参数指定了YOLOv7模型的配置文件,`--img-size`参数指定了输入图片的大小,`--onnx`参数将模型转换为ONNX格式。`--onnx`参数会生成一个名为`yolov7.onnx`的文件。`--engine`参数将ONNX模型转换为TensorRT格式,并生成一个名为`yolov7.trt`的文件。`--batch`参数指定了推理时的批次大小。`--image`参数指定了要进行推理的图片路径。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值