tensorrt自定义层总览

最新推荐文章于 2023-07-23 21:43:02 发布

小涵涵

最新推荐文章于 2023-07-23 21:43:02 发布

阅读量214

点赞数

分类专栏： tensorrt 文章标签： tensorrt

本文链接：https://blog.csdn.net/qq_34929889/article/details/106563242

版权

tensorrt 专栏收录该内容

7 篇文章 0 订阅

订阅专栏

tensorrt学习二

创建序列化模型文件
反序列化模型
初始化引擎
数据预处理
inference
int8实现

创建序列化模型文件

BuildEngineWithCaffe(const std::string& prototxt, 
                        const std::string& caffeModel,
                        const std::string& engineFile,
                        const std::vector<std::string>& outputBlobName,
                        const std::vector<std::vector<float>>& calibratorData,
                        int maxBatchSize)

bool Trt::BuildEngineWithCaffe(const std::string& prototxt, 
                        const std::string& caffeModel,
                        const std::string& engineFile,
                        const std::vector<std::string>& outputBlobName,
                        const std::vector<std::vector<float>>& calibratorData,
                        int maxBatchSize) {
    mBatchSize = maxBatchSize;
    nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(mLogger);
    nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0);
    assert(network != nullptr);
    nvcaffeparser1::ICaffeParser* parser = nvcaffeparser1::createCaffeParser();
    if(mPluginFactory != nullptr) {
        parser->setPluginFactoryV2(mPluginFactory);
    }
    nvinfer1::DataType type = mRunMode==1 ? nvinfer1::DataType::kHALF : nvinfer1::DataType::kFLOAT;
    const nvcaffeparser1::IBlobNameToTensor* blobNameToTensor = parser->parse(prototxt.c_str(),caffeModel.c_str(),
                                                                            *network,type);
    
    for(auto& s : outputBlobName) {
        network->markOutput(*blobNameToTensor->find(s.c_str()));
    }
    spdlog::info("Number of network layers: {}",network->getNbLayers());
    spdlog::info("Number of input: ", network->getNbInputs());
    BuildEngine(builder, network, calibratorData, maxBatchSize, mRunMode);
    SaveEngine(engineFile);
    builder->destroy();
    network->destroy();
    parser->destroy();
    return true;
}

BuildEngine(nvinfer1::IBuilder* builder,
                      nvinfer1::INetworkDefinition* network,
                      const std::vector<std::vector<float>>& calibratorData,
                      int maxBatchSize,
                      int mode)

void Trt::BuildEngine(nvinfer1::IBuilder* builder,
                      nvinfer1::INetworkDefinition* network,
                      const std::vector<std::vector<float>>& calibratorData,
                      int maxBatchSize,
                      int mode) {
    nvinfer1::IBuilderConfig* config = builder->createBuilderConfig();
    Int8EntropyCalibrator* calibrator = nullptr;
    if (mRunMode == 2)
    {
        if (!builder->platformHasFastInt8()) {
            spdlog::warn("Warning: current platform doesn't support int8 inference");
        }
        if (calibratorData.size() > 0 ){
            std::string calibratorName = "calibrator";
            std::cout << "create calibrator,Named:" << calibratorName << std::endl;
            calibrator = new Int8EntropyCalibrator(maxBatchSize,calibratorData,calibratorName,false);
        }
        // enum class BuilderFlag : int
        // {
        //     kFP16 = 0,         //!< Enable FP16 layer selection.
        //     kINT8 = 1,         //!< Enable Int8 layer selection.
        //     kDEBUG = 2,        //!< Enable debugging of layers via synchronizing after every layer.
        //     kGPU_FALLBACK = 3, //!< Enable layers marked to execute on GPU if layer cannot execute on DLA.
        //     kSTRICT_TYPES = 4, //!< Enables strict type constraints.
        //     kREFIT = 5,        //!< Enable building a refittable engine.
        // };
        config->setFlag(nvinfer1::BuilderFlag::kINT8);
        config->setInt8Calibrator(calibrator);
    }
    
    if (mRunMode == 1)
    {
        spdlog::info("setFp16Mode");
        if (!builder->platformHasFastFp16()) {
            spdlog::warn("the platform do not has fast for fp16");
        }
        config->setFlag(nvinfer1::BuilderFlag::kFP16);
    }
    builder->setMaxBatchSize(mBatchSize);
    // set the maximum GPU temporary memory which the engine can use at execution time.
    config->setMaxWorkspaceSize(10 << 20);
    mEngine = builder -> buildEngineWithConfig(*network, *config);
    assert(mEngine != nullptr);
    config->destroy();
    if(calibrator){
        delete calibrator;
        calibrator = nullptr;
    }
}

SaveEngine(const std::string& fileName)

void Trt::SaveEngine(const std::string& fileName) {
    if(fileName == "") {
        spdlog::warn("empty engine file name, skip save");
        return;
    }
    if(mEngine != nullptr) {
        spdlog::info("save engine to {}...",fileName);
        nvinfer1::IHostMemory* data = mEngine->serialize();
        std::ofstream file;
        file.open(fileName,std::ios::binary | std::ios::out);
        if(!file.is_open()) {
            spdlog::error("read create engine file {} failed",fileName);
            return;
        }
        file.write((const char*)data->data(), data->size());
        file.close();
        data->destroy();
    } else {
        spdlog::error("engine is empty, save engine failed");
    }
}

反序列化模型

DeserializeEngine(const std::string& engineFile)

bool Trt::DeserializeEngine(const std::string& engineFile) {
    std::ifstream in(engineFile.c_str(), std::ifstream::binary);
    if(in.is_open()) {
        spdlog::info("deserialize engine from {}",engineFile);
        auto const start_pos = in.tellg();
        in.ignore(std::numeric_limits<std::streamsize>::max());
        size_t bufCount = in.gcount();
        in.seekg(start_pos);
        std::unique_ptr<char[]> engineBuf(new char[bufCount]);
        in.read(engineBuf.get(), bufCount);
        initLibNvInferPlugins(&mLogger, "");
        mRuntime = nvinfer1::createInferRuntime(mLogger);
        mEngine = mRuntime->deserializeCudaEngine((void*)engineBuf.get(), bufCount, nullptr);
        assert(mEngine != nullptr);
        mBatchSize = mEngine->getMaxBatchSize();
        spdlog::info("max batch size of deserialized engine: {}",mEngine->getMaxBatchSize());
        mRuntime->destroy();
        return true;
    }
    return false;
}

初始化引擎

InitEngine()

void Trt::InitEngine() {
    
    mContext = mEngine->createExecutionContext();
    assert(mContext != nullptr);
    int nbBindings = mEngine->getNbBindings();
    mBinding.resize(nbBindings);
    mBindingSize.resize(nbBindings);
    mBindingName.resize(nbBindings);
    mBindingDims.resize(nbBindings);
    mBindingDataType.resize(nbBindings);
    for(int i=0; i< nbBindings; i++) {
        nvinfer1::Dims dims = mEngine->getBindingDimensions(i);
        nvinfer1::DataType dtype = mEngine->getBindingDataType(i);
        const char* name = mEngine->getBindingName(i);
        int64_t totalSize = volume(dims) * mBatchSize * getElementSize(dtype);
        mBindingSize[i] = totalSize;
        mBindingName[i] = name;
        mBindingDims[i] = dims;
        mBindingDataType[i] = dtype;
        if(mEngine->bindingIsInput(i)) {
            spdlog::info("input: ");
        } else {
            spdlog::info("output: ");
        }
        mBinding[i] = safeCudaMalloc(totalSize);
        if(mEngine->bindingIsInput(i)) {
            mInputSize++;
        }
    }
}

数据预处理

void Trt::test_img(const std::string& engineFile,cv::Mat img)
{
   
    nvinfer1::Dims in_DIms=mBindingDims[0];
    nvinfer1::Dims out_Dims=mBindingDims[1];
    // int inlen=mBindingDims[0].d[0]*mBindingDims[0].d[1]*mBindingDims[0].d[2];
    // int outlen=mBindingDims[1].d[0]*mBindingDims[1].d[1]*mBindingDims[1].d[2];
    int channel=in_DIms.d[0];
    int heigh=in_DIms.d[1];
    int width=in_DIms.d[2];
    int inlen=channel*heigh*width;
    int outlen=out_Dims.d[0]*out_Dims.d[1]*out_Dims.d[2];
    std::vector<float> data_in(inlen,0);
    std::vector<float> data_out(outlen,0);
    float* data=data_in.data();
    cv::Mat sample_resized;
    cv::resize(img, sample_resized, cv::Size(width, heigh));
    cv::Mat sample=sample_resized;
//    cv::cvtColor(sample_resized, sample, CV_BGR2RGB);
    cv::Mat sample_float;
    if (channel == 3)
        sample.convertTo(sample_float, CV_32FC3);
    else
        sample.convertTo(sample_float, CV_32FC1);
    cv::Mat sample_normalized;
    cv::subtract(sample_float, cv::Scalar(104, 117, 123), sample_normalized);
    // cv::divide(sample_float, cv::Scalar(255,255,255), sample_normalized);
    // cv::subtract(sample_normalized, cv::Scalar(0.46309134, 0.46395576, 0.36762613), sample_normalized);
    // cv::divide(sample_normalized, cv::Scalar(0.26067975, 0.24779406, 0.24456058), sample_normalized);
    vector<cv::Mat> input_channels;
    for(int ii = 0; ii < channel; ii ++){
        cv::Mat new_img(width, heigh, CV_32FC1, data);
        input_channels.push_back(new_img);
        data +=width*heigh;
    }
    cv::split(sample_normalized, input_channels);  
}

inference

  	CopyFromHostToDevice(data_in,0);
    Forward();
    CopyFromDeviceToHost(data_out,1);

数据处理

void Trt::CopyFromHostToDevice(const std::vector<float>& input, int bindIndex) {
    CUDA_CHECK(cudaMemcpy(mBinding[bindIndex], input.data(), mBindingSize[bindIndex], cudaMemcpyHostToDevice));
}
void Trt::CopyFromHostToDevice(const std::vector<float>& input, int bindIndex, const cudaStream_t& stream) {
    CUDA_CHECK(cudaMemcpyAsync(mBinding[bindIndex], input.data(), mBindingSize[bindIndex], cudaMemcpyHostToDevice, stream));
}

Forward();

void Trt::Forward() {
    cudaEvent_t start,stop;
    float elapsedTime;
    cudaEventCreate(&start);
    cudaEventCreate(&stop);
    cudaEventRecord(start, 0);
    mContext->execute(mBatchSize, &mBinding[0]);
    cudaEventRecord(stop, 0);
	cudaEventSynchronize(stop);
	cudaEventElapsedTime(&elapsedTime, start, stop);
    spdlog::info("net forward takes {} ms", elapsedTime);
}

BuildEngineWithCaffe(const std::string& prototxt, 
                        const std::string& caffeModel,
                        const std::string& engineFile,
                        const std::vector<std::string>& outputBlobName,
                        const std::vector<std::vector<float>>& calibratorData,
                        int maxBatchSize)


      bool Trt::BuildEngineWithCaffe(const std::string& prototxt, 
                        const std::string& caffeModel,
                        const std::string& engineFile,
                        const std::vector<std::string>& outputBlobName,
                        const std::vector<std::vector<float>>& calibratorData,
                        int maxBatchSize) {
    mBatchSize = maxBatchSize;
    nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(mLogger);
    nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0);
    assert(network != nullptr);
    nvcaffeparser1::ICaffeParser* parser = nvcaffeparser1::createCaffeParser();
    if(mPluginFactory != nullptr) {
        parser->setPluginFactoryV2(mPluginFactory);
    }
    nvinfer1::DataType type = mRunMode==1 ? nvinfer1::DataType::kHALF : nvinfer1::DataType::kFLOAT;
    const nvcaffeparser1::IBlobNameToTensor* blobNameToTensor = parser->parse(prototxt.c_str(),caffeModel.c_str(),
                                                                            *network,type);
    
    for(auto& s : outputBlobName) {
        network->markOutput(*blobNameToTensor->find(s.c_str()));
    }
    spdlog::info("Number of network layers: {}",network->getNbLayers());
    spdlog::info("Number of input: ", network->getNbInputs());
    BuildEngine(builder, network, calibratorData, maxBatchSize, mRunMode);
    SaveEngine(engineFile);
    builder->destroy();
    network->destroy();
    parser->destroy();
    return true;
}

bool Trt::BuildEngineWithCaffe(const std::string& prototxt, 
                        const std::string& caffeModel,
                        const std::string& engineFile,
                        const std::vector<std::string>& outputBlobName,
                        const std::vector<std::vector<float>>& calibratorData,
                        int maxBatchSize) {
    mBatchSize = maxBatchSize;
    spdlog::info("build caffe engine with {} and {}", prototxt, caffeModel);
    nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(mLogger);
    assert(builder != nullptr);
    // NetworkDefinitionCreationFlag::kEXPLICIT_BATCH 
    nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0);
    assert(network != nullptr);
    nvcaffeparser1::ICaffeParser* parser = nvcaffeparser1::createCaffeParser();
    if(mPluginFactory != nullptr) {
        parser->setPluginFactoryV2(mPluginFactory);
    }
    // Notice: change here to costom data type
    nvinfer1::DataType type = mRunMode==1 ? nvinfer1::DataType::kHALF : nvinfer1::DataType::kFLOAT;
    const nvcaffeparser1::IBlobNameToTensor* blobNameToTensor = parser->parse(prototxt.c_str(),caffeModel.c_str(),
                                                                            *network,type);
    
    for(auto& s : outputBlobName) {
        network->markOutput(*blobNameToTensor->find(s.c_str()));
    }
    spdlog::info("Number of network layers: {}",network->getNbLayers());
    spdlog::info("Number of input: ", network->getNbInputs());
    std::cout << "Input layer: " << std::endl;
    for(int i = 0; i < network->getNbInputs(); i++) {
        std::cout << network->getInput(i)->getName() << " : ";
        Dims dims = network->getInput(i)->getDimensions();
        for(int j = 0; j < dims.nbDims; j++) {
            std::cout << dims.d[j] << "x"; 
        }
        std::cout << "\b "  << std::endl;
    }
    spdlog::info("Number of output: {}",network->getNbOutputs());
    std::cout << "Output layer: " << std::endl;
    for(int i = 0; i < network->getNbOutputs(); i++) {
        std::cout << network->getOutput(i)->getName() << " : ";
        Dims dims = network->getOutput(i)->getDimensions();
        for(int j = 0; j < dims.nbDims; j++) {
            std::cout << dims.d[j] << "x"; 
        }
        std::cout << "\b " << std::endl;
    }
    spdlog::info("parse network done");

    BuildEngine(builder, network, calibratorData, maxBatchSize, mRunMode);

    spdlog::info("serialize engine to {}", engineFile);
    SaveEngine(engineFile);
    
    builder->destroy();
    network->destroy();
    parser->destroy();
    return true;
}

int8实现

这个可以参考官方的sample,主要有BatchStream.h和EntropyCalibrator.h两个头文件,主要是实现一个迭代器的功能.

/*
 * @Description: int8 entrophy calibrator 2
 * @Author: zengren
 * @Date: 2019-08-21 16:52:06
 * @LastEditTime: 2019-08-22 17:04:49
 * @LastEditors: Please set LastEditors
 */
#include "Int8EntropyCalibrator.h"
#include <fstream>
#include <iterator>
#include <cassert>
#include <string.h>
#include <algorithm>


Int8EntropyCalibrator::Int8EntropyCalibrator(int BatchSize,const std::vector<std::vector<float>>& data,
                                        const std::string& CalibDataName /*= ""*/,bool readCache /*= true*/)
    : mCalibDataName(CalibDataName),mBatchSize(BatchSize),mReadCache(readCache)
{     
    mDatas.reserve(data.size());
    mDatas = data;

    mInputCount =  BatchSize * data[0].size();
    mCurBatchData = new float[mInputCount];
    mCurBatchIdx = 0;
    CUDA_CHECK(cudaMalloc(&mDeviceInput, mInputCount * sizeof(float)));
}


Int8EntropyCalibrator::~Int8EntropyCalibrator()
{
    CUDA_CHECK(cudaFree(mDeviceInput));
    if(mCurBatchData)
        delete[] mCurBatchData;
}


bool Int8EntropyCalibrator::getBatch(void* bindings[], const char* names[], int nbBindings)
{
    std::cout << "name: " << names[0] << "nbBindings: " << nbBindings << std::endl;
    std::cout << "size: " << mDatas.size() << ":mCurBatchIdx " << mCurBatchIdx<<"batch"<< mBatchSize<< std::endl;
    if (mCurBatchIdx + mBatchSize > int(mDatas.size())) 
            return false;

    float* ptr = mCurBatchData;
    size_t imgSize = mInputCount / mBatchSize;
    auto iter = mDatas.begin() + mCurBatchIdx;

    std::for_each(iter, iter + mBatchSize, [=,&ptr](std::vector<float>& val){
        assert(imgSize == val.size());
        memcpy(ptr,val.data(),imgSize*sizeof(float));
        
        ptr += imgSize;
    });

    CUDA_CHECK(cudaMemcpy(mDeviceInput, mCurBatchData, mInputCount * sizeof(float), cudaMemcpyHostToDevice));
    //std::cout << "input name " << names[0] << std::endl;
    bindings[0] = mDeviceInput;

    std::cout << "load batch " << mCurBatchIdx << " to " << mCurBatchIdx + mBatchSize - 1 << std::endl;        
    mCurBatchIdx += mBatchSize;
    return true;
}

const void* Int8EntropyCalibrator::readCalibrationCache(size_t& length)
{
    mCalibrationCache.clear();
    std::ifstream input(mCalibDataName+".calib", std::ios::binary);
    input >> std::noskipws;
    if (mReadCache && input.good())
        std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(), std::back_inserter(mCalibrationCache));

    length = mCalibrationCache.size();
    return length ? &mCalibrationCache[0] : nullptr;
}

void Int8EntropyCalibrator::writeCalibrationCache(const void* cache, size_t length)
{
    std::ofstream output(mCalibDataName+".calib", std::ios::binary);
    output.write(reinterpret_cast<const char*>(cache), length);
}

/*
 * @Description: In User Settings Edit
 * @Author: your name
 * @Date: 2019-08-21 16:48:34
 * @LastEditTime: 2019-08-22 17:06:20
 * @LastEditors: Please set LastEditors
 */
#ifndef _ENTROY_CALIBRATOR_H
#define _ENTROY_CALIBRATOR_H

#include <cudnn.h>
#include <string>
#include <vector>
#include "NvInfer.h"
#include "utils.h"


class Int8EntropyCalibrator : public nvinfer1::IInt8EntropyCalibrator2 {
public:
	Int8EntropyCalibrator(int BatchSize,const std::vector<std::vector<float>>& data,const std::string& CalibDataName = "",bool readCache = true);

	virtual ~Int8EntropyCalibrator();

	int getBatchSize() const override {
		std::cout << "getbatchSize: " << mBatchSize << std::endl;
		return mBatchSize; 
	}

	bool getBatch(void* bindings[], const char* names[], int nbBindings) override;

	const void* readCalibrationCache(size_t& length) override;

	void writeCalibrationCache(const void* cache, size_t length) override;

private:
	std::string mCalibDataName;
	std::vector<std::vector<float>> mDatas;
	int mBatchSize;

	int mCurBatchIdx;
	float* mCurBatchData{ nullptr };
	
	size_t mInputCount;
	bool mReadCache;
	void* mDeviceInput{ nullptr };

	std::vector<char> mCalibrationCache;
};


#endif //_ENTROY_CALIBRATOR_H

static int file2list(const std::string &listf, std::vector<std::string> &lists){
    lists.clear();
    std::ifstream file;
    file.open(listf);
    char str[1024];
    int num=0;
    while(!file.eof()){
        memset(str,0,1024);
        file.getline(str,1024);
        lists.push_back(std::string(str));
        num++;
    }
	file.close();
    lists.pop_back();
    num--;
    return num;
}
void Trt::img_process(string file_list,Dims3 input_dim,vector<vector<float> >&out_data)
{
        
        int channel=input_dim.d[0];
        int heigh=input_dim.d[1];
        int width=input_dim.d[2];
        int inlen=channel*heigh*width;
        vector<string> image_list;

        int num_imgs=file2list(file_list,image_list);
        vector<vector<float>>temp;
        for(int i=0;i<num_imgs;i++)
        {
            std::vector<float> data_in(inlen,0);
            float* data=data_in.data();
            cout<<image_list[i]<<endl;
            Mat img_in=cv::imread(image_list[i]);
            cv::Size size(width, heigh);
            cv::Mat sample_resized;
            cv::resize(img_in, sample_resized, size ,0, 0, cv::INTER_CUBIC);
            cv::Mat sample;
            if (sample_resized.channels() == 3 && channel == 1)
                cv::cvtColor(sample_resized, sample, CV_BGR2GRAY);
            else if (sample_resized.channels() == 4 && channel == 1)
                cv::cvtColor(sample_resized, sample, CV_BGRA2GRAY);
            else if (sample_resized.channels() == 4 && channel == 3)
                cv::cvtColor(sample_resized, sample, CV_BGRA2BGR);
            else if (sample_resized.channels() == 1 && channel == 3)
                cv::cvtColor(sample_resized, sample, CV_GRAY2BGR);
            else
                sample = sample_resized;

//            if(img_in.empty())
//                cout<<"img empty"<<endl;
//            if(!img_in.data)
//                cout<<"no img data"<<endl;
//            cv::Mat sample_resized;
//            cv::Size size(width, heigh);
//            if (img_in.size() != size)
//                cv::resize(img_in, sample_resized, size);
//            else
//                sample_resized = img_in;
//
//            cv::Mat sample;
//            if (sample_resized.channels() == 3 && channel == 1)
//                cv::cvtColor(sample_resized, sample, CV_BGR2GRAY);
//            else if (sample_resized.channels() == 4 && channel == 1)
//                cv::cvtColor(sample_resized, sample, CV_BGRA2GRAY);
//            else if (sample_resized.channels() == 4 && channel == 3)
//                cv::cvtColor(sample_resized, sample, CV_BGRA2BGR);
//            else if (sample_resized.channels() == 1 && channel == 3)
//                cv::cvtColor(sample_resized, sample, CV_GRAY2BGR);
//            else
//                sample = sample_resized;
//
            cv::Mat sample_float;
            if (channel == 3)
                sample_resized.convertTo(sample_float, CV_32FC3);
            else
                sample_resized.convertTo(sample_float, CV_32FC1);

            cv::Mat sample_normalized;
            cv::Scalar mean(104.0f, 117.0f, 123.0f);

            cv::subtract(sample_float, mean, sample_normalized);



            cout<<sample_normalized.size()<<endl;



            vector<cv::Mat> input_channels;
            for(int ii = 0; ii < channel; ii ++){
                cv::Mat new_img(width, heigh, CV_32FC1, data);
                input_channels.push_back(new_img);
                data +=width*heigh;
            }
            cv::split(sample_normalized, input_channels);


            temp.push_back(data_in);
        }
            cout<<"temp size "<<temp.size()<<endl;
            out_data=temp;
        
}

小涵涵

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
0
评论
tensorrt自定义层总览

tensorrt学习二创建序列化模型文件反序列化模型初始化引擎数据预处理inference创建序列化模型文件BuildEngine(nvinfer1::IBuilder* builder, nvinfer1::INetworkDefinition* network, const std::vector<std::vector<float>>& calibratorData,
复制链接

扫一扫