Cpp DenseNet Tensorrt部署主体

Cpp DenseNet Tensorrt辅助函数

Python版本导引

Cpp OpenVINO版本导引

CMake工程示例

测试性质的工程结构如下:

Project:

DenseGradeWrapper

肉眼可见的,Trt版本的执行代码,就会比OpenVINO复杂很多

  1. 需要创建运行时,手动实现Logger
  2. 模型读取方式需要手动使用io类
  3. 手动读取模型输入输出,并分配输入的host和device以及输出的host和device
  4. 推理时需要手动完成数据传输,出发函数后需要手动完成数据拷贝回主机的操作

相对的,Trt给了更多的可操作空间,执行速度也是同水平最好的一档,请各位开发者按照自己的需求进行选择、二次开发

#include "DenseGradeWrapper.h"

DenseGradeWrapper::DenseGradeWrapper(QString enginePath, QObject *parent)
    : QObject{parent}, runtime(nvinfer1::createInferRuntime(this->logger)),
    logger(nvinfer1::ILogger::Severity::kVERBOSE)
{
    // De-serialize engine from file
    std::ifstream engineFile(enginePath.toStdString(), std::ios::binary);
    if (engineFile.fail())
    {
        return;
    }

    engineFile.seekg(0, std::ifstream::end);
    auto fsize = engineFile.tellg();
    engineFile.seekg(0, std::ifstream::beg);

    std::vector<char> engineData(fsize);
    engineFile.read(engineData.data(), fsize);

    // load to engine
    this->engine.reset(runtime->deserializeCudaEngine(engineData.data(), fsize));
    assert(this->engine.get() != nullptr);

    // allocate host and device memory
    this->allocateBuffers();
}

DenseInferResult DenseGradeWrapper::infer(QString imageSrc){
    double tS, tE;

    tS = (double) clock();
    cv::Mat transDim = this->preprocess(imageSrc);
    tE = (double) clock();
    qDebug() << "Preprocess Done, cost " << (tE - tS) / (CLOCKS_PER_SEC) << " s";

    tS = (double) clock();
    QVector<float> buffer = this->_infer(transDim);
    tE = (double) clock();
    qDebug() << "Infer Done, cost " << (tE - tS) / (CLOCKS_PER_SEC) << " s";

    tS = (double) clock();
    this->softmax(buffer);
    DenseInferResult res = this->postProcess(buffer);
    tE = (double) clock();
    qDebug() << "Postprocess Done, cost " << (tE - tS) / (CLOCKS_PER_SEC) << " s";

    return res;
}

QVector<float> DenseGradeWrapper::_infer(cv::Mat transDim){
    // copy device data to host
    if(cudaMemcpyAsync(inputs[0], transDim.data, transDim.total() * transDim.elemSize(), cudaMemcpyHostToDevice, this->stream) != cudaSuccess){
        qCritical() << "copy cv data to device failed";
        return {0};
    }

    // bind
    for(size_t i = 0; i < this->engine->getNbIOTensors(); i++){
        this->context->setTensorAddress(this->engine->getIOTensorName(i), bindings[i]);
    }

    // exec
    this->context->enqueueV3(this->stream);

    // copy to host
    float outputBuffer[5];
    if(cudaMemcpyAsync(outputBuffer, this->outputs[0], 5 * sizeof(float), cudaMemcpyDeviceToHost, this->stream) != cudaSuccess){
        qCritical() << "copy device data to host failed";
        return {0};
    }

    cudaStreamSynchronize(this->stream);
    return QVector<float>(outputBuffer, (outputBuffer + 5));
}


void DenseGradeWrapper::softmax(QVector<float> &buffer){
    float denominator = 0.0;

    for(size_t i = 0; i < buffer.size(); i++){
        buffer[i] = std::exp(buffer[i]);
        denominator += buffer[i];
    }

    for(size_t i = 0; i < buffer.size(); i++){
        buffer[i] /= denominator;
    }
}

DenseInferResult DenseGradeWrapper::postProcess(const QVector<float> &buffer){
    int index = buffer.indexOf(*std::max_element(buffer.begin(), buffer.end()));
    DenseInferResult result;
    result.index = index;
    std::copy(buffer.begin(), buffer.end(), result.pie);
    return result;
}

cv::Mat DenseGradeWrapper::preprocess(QString imageSrc){
    cv::Mat cropMat = this->circleCrop(imageSrc);
    std::vector<cv::Mat> batch_mat;
    batch_mat.push_back(cropMat);
    return cv::dnn::blobFromImages(batch_mat, 1.0 / 255.0, cv::Size(),
                                   cv::Scalar(), true);
}

cv::Mat DenseGradeWrapper::circleCrop(QString imageSrc){
    cv::Mat cropMask = NetworkTool::ToolCropImageFromMask(imageSrc);
    return NetworkTool::ToolCropWithGaussian(cropMask);
}



void DenseGradeWrapper::allocateBuffers(){
    // create stream
    if(cudaStreamCreate(&this->stream) != cudaSuccess){
        qCritical() << "ERROR: cuda stream allocation failed";
        return ;
    }

    // create context
    context = std::unique_ptr<nvinfer1::IExecutionContext>(this->engine->createExecutionContext());

    // create memory
    for(size_t i = 0; i < this->engine->getNbIOTensors(); i++){
        // name
        const nvinfer1::AsciiChar *tensorName = this->engine->getIOTensorName(i);

        // shape
        nvinfer1::Dims shape = this->engine->getTensorShape(tensorName);

        // size
        size_t memSize = this->getMemorySize(shape, sizeof(float));

        // cuda ptr
        void *memPtr{nullptr};
        if (cudaMalloc(&memPtr, memSize) != cudaSuccess)
        {
            qCritical() << "ERROR: input cuda memory allocation failed, size = " << memSize << " bytes";
            return ;
        }

        // save
        if(this->engine->getTensorIOMode(tensorName) == nvinfer1::TensorIOMode::kINPUT){
            this->inputs.push_back(memPtr);
        }else{
            this->outputs.push_back(memPtr);
        }
        this->bindings.push_back(memPtr);
    }
}

size_t DenseGradeWrapper::getMemorySize(const nvinfer1::Dims& dims, const int32_t elem_size){
    return std::accumulate(dims.d, dims.d + dims.nbDims, 1, std::multiplies<int64_t>()) * elem_size;
}

DenseGradeWrapper::~DenseGradeWrapper(){
    for(size_t i = 0; i < this->bindings.size(); i++){
        cudaFree(this->bindings[i]);
    }
}

main

同样的,给出封装类的执行方式

#include <QCoreApplication>

#include "Network/DenseGradeWrapper.h"

int main(int argc, char *argv[])
{
    QString model_path = "../tensorrt-gpu-win-msvc2019/Script/export_dense121_gpu.engine";
    QString image_path = "../tensorrt-gpu-win-msvc2019/Script/1.jpg";

    DenseGradeWrapper denseWrapper(model_path);
    DenseInferResult res = denseWrapper.infer(image_path);

    qDebug() << "Level is: " << res.index;
    qDebug() << "Each possible is: ";
    for(int i = 0; i < 5; i++){
        qDebug() << "\t" << res.pie[i];
    }
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

tacom_

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值