创建序列化模型文件
BuildEngineWithCaffe(const std::string& prototxt,
const std::string& caffeModel,
const std::string& engineFile,
const std::vector<std::string>& outputBlobName,
const std::vector<std::vector<float>>& calibratorData,
int maxBatchSize)
bool Trt::BuildEngineWithCaffe(const std::string& prototxt,
const std::string& caffeModel,
const std::string& engineFile,
const std::vector<std::string>& outputBlobName,
const std::vector<std::vector<float>>& calibratorData,
int maxBatchSize) {
mBatchSize = maxBatchSize;
nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(mLogger);
nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0);
assert(network != nullptr);
nvcaffeparser1::ICaffeParser* parser = nvcaffeparser1::createCaffeParser();
if(mPluginFactory != nullptr) {
parser->setPluginFactoryV2(mPluginFactory);
}
nvinfer1::DataType type = mRunMode==1 ? nvinfer1::DataType::kHALF : nvinfer1::DataType::kFLOAT;
const nvcaffeparser1::IBlobNameToTensor* blobNameToTensor = parser->parse(prototxt.c_str(),caffeModel.c_str(),
*network,type);
for(auto& s : outputBlobName) {
network->markOutput(*blobNameToTensor->find(s.c_str()));
}
spdlog::info("Number of network layers: {}",network->getNbLayers());
spdlog::info("Number of input: ", network->getNbInputs());
BuildEngine(builder, network, calibratorData, maxBatchSize, mRunMode);
SaveEngine(engineFile);
builder->destroy();
network->destroy();
parser->destroy();
return true;
}
BuildEngine(nvinfer1::IBuilder* builder,
nvinfer1::INetworkDefinition* network,
const std::vector<std::vector<float>>& calibratorData,
int maxBatchSize,
int mode)
void Trt::BuildEngine(nvinfer1::IBuilder* builder,
nvinfer1::INetworkDefinition* network,
const std::vector<std::vector<float>>& calibratorData,
int maxBatchSize,
int mode) {
nvinfer1::IBuilderConfig* config = builder->createBuilderConfig();
Int8EntropyCalibrator* calibrator = nullptr;
if (mRunMode == 2)
{
if (!builder->platformHasFastInt8()) {
spdlog::warn("Warning: current platform doesn't support int8 inference");
}
if (calibratorData.size() > 0 ){
std::string calibratorName = "calibrator";
std::cout << "create calibrator,Named:" << calibratorName << std::endl;
calibrator = new Int8EntropyCalibrator(maxBatchSize,calibratorData,calibratorName,false);
}
// enum class BuilderFlag : int
// {
// kFP16 = 0, //!< Enable FP16 layer selection.
// kINT8 = 1, //!< Enable Int8 layer selection.
// kDEBUG = 2, //!< Enable debugging of layers via synchronizing after every layer.
// kGPU_FALLBACK = 3, //!< Enable layers marked to execute on GPU if layer cannot execute on DLA.
// kSTRICT_TYPES = 4, //!< Enables strict type constraints.
// kREFIT = 5, //!< Enable building a refittable engine.
// };
config->setFlag(nvinfer1::BuilderFlag::kINT8);
config->setInt8Calibrator(calibrator);
}
if (mRunMode == 1)
{
spdlog::info("setFp16Mode");
if (!builder->platformHasFastFp16()) {
spdlog::warn("the platform do not has fast for fp16");
}
config->setFlag(nvinfer1::BuilderFlag::kFP16);
}
builder->setMaxBatchSize(mBatchSize);
// set the maximum GPU temporary memory which the engine can use at execution time.
config->setMaxWorkspaceSize(10 << 20);
mEngine = builder -> buildEngineWithConfig(*network, *config);
assert(mEngine != nullptr);
config->destroy();
if(calibrator){
delete calibrator;
calibrator = nullptr;
}
}
SaveEngine(const std::string& fileName)
void Trt::SaveEngine(const std::string& fileName) {
if(fileName == "") {
spdlog::warn("empty engine file name, skip save");
return;
}
if(mEngine != nullptr) {
spdlog::info("save engine to {}...",fileName);
nvinfer1::IHostMemory* data = mEngine->serialize();
std::ofstream file;
file.open(fileName,std::ios::binary | std::ios::out);
if(!file.is_open()) {
spdlog::error("read create engine file {} failed",fileName);
return;
}
file.write((const char*)data->data(), data->size());
file.close();
data->destroy();
} else {
spdlog::error("engine is empty, save engine failed");
}
}
反序列化模型
DeserializeEngine(const std::string& engineFile)
bool Trt::DeserializeEngine(const std::string& engineFile) {
std::ifstream in(engineFile.c_str(), std::ifstream::binary);
if(in.is_open()) {
spdlog::info("deserialize engine from {}",engineFile);
auto const start_pos = in.tellg();
in.ignore(std::numeric_limits<std::streamsize>::max());
size_t bufCount = in.gcount();
in.seekg(start_pos);
std::unique_ptr<char[]> engineBuf(new char[bufCount]);
in.read(engineBuf.get(), bufCount);
initLibNvInferPlugins(&mLogger, "");
mRuntime = nvinfer1::createInferRuntime(mLogger);
mEngine = mRuntime->deserializeCudaEngine((void*)engineBuf.get(), bufCount, nullptr);
assert(mEngine != nullptr);
mBatchSize = mEngine->getMaxBatchSize();
spdlog::info("max batch size of deserialized engine: {}",mEngine->getMaxBatchSize());
mRuntime->destroy();
return true;
}
return false;
}
初始化引擎
InitEngine()
void Trt::InitEngine() {
mContext = mEngine->createExecutionContext();
assert(mContext != nullptr);
int nbBindings = mEngine->getNbBindings();
mBinding.resize(nbBindings);
mBindingSize.resize(nbBindings);
mBindingName.resize(nbBindings);
mBindingDims.resize(nbBindings);
mBindingDataType.resize(nbBindings);
for(int i=0; i< nbBindings; i++) {
nvinfer1::Dims dims = mEngine->getBindingDimensions(i);
nvinfer1::DataType dtype = mEngine->getBindingDataType(i);
const char* name = mEngine->getBindingName(i);
int64_t totalSize = volume(dims) * mBatchSize * getElementSize(dtype);
mBindingSize[i] = totalSize;
mBindingName[i] = name;
mBindingDims[i] = dims;
mBindingDataType[i] = dtype;
if(mEngine->bindingIsInput(i)) {
spdlog::info("input: ");
} else {
spdlog::info("output: ");
}
mBinding[i] = safeCudaMalloc(totalSize);
if(mEngine->bindingIsInput(i)) {
mInputSize++;
}
}
}
数据预处理
void Trt::test_img(const std::string& engineFile,cv::Mat img)
{
nvinfer1::Dims in_DIms=mBindingDims[0];
nvinfer1::Dims out_Dims=mBindingDims[1];
// int inlen=mBindingDims[0].d[0]*mBindingDims[0].d[1]*mBindingDims[0].d[2];
// int outlen=mBindingDims[1].d[0]*mBindingDims[1].d[1]*mBindingDims[1].d[2];
int channel=in_DIms.d[0];
int heigh=in_DIms.d[1];
int width=in_DIms.d[2];
int inlen=channel*heigh*width;
int outlen=out_Dims.d[0]*out_Dims.d[1]*out_Dims.d[2];
std::vector<float> data_in(inlen,0);
std::vector<float> data_out(outlen,0);
float* data=data_in.data();
cv::Mat sample_resized;
cv::resize(img, sample_resized, cv::Size(width, heigh));
cv::Mat sample=sample_resized;
// cv::cvtColor(sample_resized, sample, CV_BGR2RGB);
cv::Mat sample_float;
if (channel == 3)
sample.convertTo(sample_float, CV_32FC3);
else
sample.convertTo(sample_float, CV_32FC1);
cv::Mat sample_normalized;
cv::subtract(sample_float, cv::Scalar(104, 117, 123), sample_normalized);
// cv::divide(sample_float, cv::Scalar(255,255,255), sample_normalized);
// cv::subtract(sample_normalized, cv::Scalar(0.46309134, 0.46395576, 0.36762613), sample_normalized);
// cv::divide(sample_normalized, cv::Scalar(0.26067975, 0.24779406, 0.24456058), sample_normalized);
vector<cv::Mat> input_channels;
for(int ii = 0; ii < channel; ii ++){
cv::Mat new_img(width, heigh, CV_32FC1, data);
input_channels.push_back(new_img);
data +=width*heigh;
}
cv::split(sample_normalized, input_channels);
}
inference
CopyFromHostToDevice(data_in,0);
Forward();
CopyFromDeviceToHost(data_out,1);
数据处理
void Trt::CopyFromHostToDevice(const std::vector<float>& input, int bindIndex) {
CUDA_CHECK(cudaMemcpy(mBinding[bindIndex], input.data(), mBindingSize[bindIndex], cudaMemcpyHostToDevice));
}
void Trt::CopyFromHostToDevice(const std::vector<float>& input, int bindIndex, const cudaStream_t& stream) {
CUDA_CHECK(cudaMemcpyAsync(mBinding[bindIndex], input.data(), mBindingSize[bindIndex], cudaMemcpyHostToDevice, stream));
}
Forward();
void Trt::Forward() {
cudaEvent_t start,stop;
float elapsedTime;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
mContext->execute(mBatchSize, &mBinding[0]);
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&elapsedTime, start, stop);
spdlog::info("net forward takes {} ms", elapsedTime);
}
BuildEngineWithCaffe(const std::string& prototxt,
const std::string& caffeModel,
const std::string& engineFile,
const std::vector<std::string>& outputBlobName,
const std::vector<std::vector<float>>& calibratorData,
int maxBatchSize)
bool Trt::BuildEngineWithCaffe(const std::string& prototxt,
const std::string& caffeModel,
const std::string& engineFile,
const std::vector<std::string>& outputBlobName,
const std::vector<std::vector<float>>& calibratorData,
int maxBatchSize) {
mBatchSize = maxBatchSize;
nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(mLogger);
nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0);
assert(network != nullptr);
nvcaffeparser1::ICaffeParser* parser = nvcaffeparser1::createCaffeParser();
if(mPluginFactory != nullptr) {
parser->setPluginFactoryV2(mPluginFactory);
}
nvinfer1::DataType type = mRunMode==1 ? nvinfer1::DataType::kHALF : nvinfer1::DataType::kFLOAT;
const nvcaffeparser1::IBlobNameToTensor* blobNameToTensor = parser->parse(prototxt.c_str(),caffeModel.c_str(),
*network,type);
for(auto& s : outputBlobName) {
network->markOutput(*blobNameToTensor->find(s.c_str()));
}
spdlog::info("Number of network layers: {}",network->getNbLayers());
spdlog::info("Number of input: ", network->getNbInputs());
BuildEngine(builder, network, calibratorData, maxBatchSize, mRunMode);
SaveEngine(engineFile);
builder->destroy();
network->destroy();
parser->destroy();
return true;
}
bool Trt::BuildEngineWithCaffe(const std::string& prototxt,
const std::string& caffeModel,
const std::string& engineFile,
const std::vector<std::string>& outputBlobName,
const std::vector<std::vector<float>>& calibratorData,
int maxBatchSize) {
mBatchSize = maxBatchSize;
spdlog::info("build caffe engine with {} and {}", prototxt, caffeModel);
nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(mLogger);
assert(builder != nullptr);
// NetworkDefinitionCreationFlag::kEXPLICIT_BATCH
nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0);
assert(network != nullptr);
nvcaffeparser1::ICaffeParser* parser = nvcaffeparser1::createCaffeParser();
if(mPluginFactory != nullptr) {
parser->setPluginFactoryV2(mPluginFactory);
}
// Notice: change here to costom data type
nvinfer1::DataType type = mRunMode==1 ? nvinfer1::DataType::kHALF : nvinfer1::DataType::kFLOAT;
const nvcaffeparser1::IBlobNameToTensor* blobNameToTensor = parser->parse(prototxt.c_str(),caffeModel.c_str(),
*network,type);
for(auto& s : outputBlobName) {
network->markOutput(*blobNameToTensor->find(s.c_str()));
}
spdlog::info("Number of network layers: {}",network->getNbLayers());
spdlog::info("Number of input: ", network->getNbInputs());
std::cout << "Input layer: " << std::endl;
for(int i = 0; i < network->getNbInputs(); i++) {
std::cout << network->getInput(i)->getName() << " : ";
Dims dims = network->getInput(i)->getDimensions();
for(int j = 0; j < dims.nbDims; j++) {
std::cout << dims.d[j] << "x";
}
std::cout << "\b " << std::endl;
}
spdlog::info("Number of output: {}",network->getNbOutputs());
std::cout << "Output layer: " << std::endl;
for(int i = 0; i < network->getNbOutputs(); i++) {
std::cout << network->getOutput(i)->getName() << " : ";
Dims dims = network->getOutput(i)->getDimensions();
for(int j = 0; j < dims.nbDims; j++) {
std::cout << dims.d[j] << "x";
}
std::cout << "\b " << std::endl;
}
spdlog::info("parse network done");
BuildEngine(builder, network, calibratorData, maxBatchSize, mRunMode);
spdlog::info("serialize engine to {}", engineFile);
SaveEngine(engineFile);
builder->destroy();
network->destroy();
parser->destroy();
return true;
}
int8实现
这个可以参考官方的sample,主要有BatchStream.h和EntropyCalibrator.h两个头文件,主要是实现一个迭代器的功能.
/*
* @Description: int8 entrophy calibrator 2
* @Author: zengren
* @Date: 2019-08-21 16:52:06
* @LastEditTime: 2019-08-22 17:04:49
* @LastEditors: Please set LastEditors
*/
#include "Int8EntropyCalibrator.h"
#include <fstream>
#include <iterator>
#include <cassert>
#include <string.h>
#include <algorithm>
Int8EntropyCalibrator::Int8EntropyCalibrator(int BatchSize,const std::vector<std::vector<float>>& data,
const std::string& CalibDataName /*= ""*/,bool readCache /*= true*/)
: mCalibDataName(CalibDataName),mBatchSize(BatchSize),mReadCache(readCache)
{
mDatas.reserve(data.size());
mDatas = data;
mInputCount = BatchSize * data[0].size();
mCurBatchData = new float[mInputCount];
mCurBatchIdx = 0;
CUDA_CHECK(cudaMalloc(&mDeviceInput, mInputCount * sizeof(float)));
}
Int8EntropyCalibrator::~Int8EntropyCalibrator()
{
CUDA_CHECK(cudaFree(mDeviceInput));
if(mCurBatchData)
delete[] mCurBatchData;
}
bool Int8EntropyCalibrator::getBatch(void* bindings[], const char* names[], int nbBindings)
{
std::cout << "name: " << names[0] << "nbBindings: " << nbBindings << std::endl;
std::cout << "size: " << mDatas.size() << ":mCurBatchIdx " << mCurBatchIdx<<"batch"<< mBatchSize<< std::endl;
if (mCurBatchIdx + mBatchSize > int(mDatas.size()))
return false;
float* ptr = mCurBatchData;
size_t imgSize = mInputCount / mBatchSize;
auto iter = mDatas.begin() + mCurBatchIdx;
std::for_each(iter, iter + mBatchSize, [=,&ptr](std::vector<float>& val){
assert(imgSize == val.size());
memcpy(ptr,val.data(),imgSize*sizeof(float));
ptr += imgSize;
});
CUDA_CHECK(cudaMemcpy(mDeviceInput, mCurBatchData, mInputCount * sizeof(float), cudaMemcpyHostToDevice));
//std::cout << "input name " << names[0] << std::endl;
bindings[0] = mDeviceInput;
std::cout << "load batch " << mCurBatchIdx << " to " << mCurBatchIdx + mBatchSize - 1 << std::endl;
mCurBatchIdx += mBatchSize;
return true;
}
const void* Int8EntropyCalibrator::readCalibrationCache(size_t& length)
{
mCalibrationCache.clear();
std::ifstream input(mCalibDataName+".calib", std::ios::binary);
input >> std::noskipws;
if (mReadCache && input.good())
std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(), std::back_inserter(mCalibrationCache));
length = mCalibrationCache.size();
return length ? &mCalibrationCache[0] : nullptr;
}
void Int8EntropyCalibrator::writeCalibrationCache(const void* cache, size_t length)
{
std::ofstream output(mCalibDataName+".calib", std::ios::binary);
output.write(reinterpret_cast<const char*>(cache), length);
}
/*
* @Description: In User Settings Edit
* @Author: your name
* @Date: 2019-08-21 16:48:34
* @LastEditTime: 2019-08-22 17:06:20
* @LastEditors: Please set LastEditors
*/
#ifndef _ENTROY_CALIBRATOR_H
#define _ENTROY_CALIBRATOR_H
#include <cudnn.h>
#include <string>
#include <vector>
#include "NvInfer.h"
#include "utils.h"
class Int8EntropyCalibrator : public nvinfer1::IInt8EntropyCalibrator2 {
public:
Int8EntropyCalibrator(int BatchSize,const std::vector<std::vector<float>>& data,const std::string& CalibDataName = "",bool readCache = true);
virtual ~Int8EntropyCalibrator();
int getBatchSize() const override {
std::cout << "getbatchSize: " << mBatchSize << std::endl;
return mBatchSize;
}
bool getBatch(void* bindings[], const char* names[], int nbBindings) override;
const void* readCalibrationCache(size_t& length) override;
void writeCalibrationCache(const void* cache, size_t length) override;
private:
std::string mCalibDataName;
std::vector<std::vector<float>> mDatas;
int mBatchSize;
int mCurBatchIdx;
float* mCurBatchData{ nullptr };
size_t mInputCount;
bool mReadCache;
void* mDeviceInput{ nullptr };
std::vector<char> mCalibrationCache;
};
#endif //_ENTROY_CALIBRATOR_H
static int file2list(const std::string &listf, std::vector<std::string> &lists){
lists.clear();
std::ifstream file;
file.open(listf);
char str[1024];
int num=0;
while(!file.eof()){
memset(str,0,1024);
file.getline(str,1024);
lists.push_back(std::string(str));
num++;
}
file.close();
lists.pop_back();
num--;
return num;
}
void Trt::img_process(string file_list,Dims3 input_dim,vector<vector<float> >&out_data)
{
int channel=input_dim.d[0];
int heigh=input_dim.d[1];
int width=input_dim.d[2];
int inlen=channel*heigh*width;
vector<string> image_list;
int num_imgs=file2list(file_list,image_list);
vector<vector<float>>temp;
for(int i=0;i<num_imgs;i++)
{
std::vector<float> data_in(inlen,0);
float* data=data_in.data();
cout<<image_list[i]<<endl;
Mat img_in=cv::imread(image_list[i]);
cv::Size size(width, heigh);
cv::Mat sample_resized;
cv::resize(img_in, sample_resized, size ,0, 0, cv::INTER_CUBIC);
cv::Mat sample;
if (sample_resized.channels() == 3 && channel == 1)
cv::cvtColor(sample_resized, sample, CV_BGR2GRAY);
else if (sample_resized.channels() == 4 && channel == 1)
cv::cvtColor(sample_resized, sample, CV_BGRA2GRAY);
else if (sample_resized.channels() == 4 && channel == 3)
cv::cvtColor(sample_resized, sample, CV_BGRA2BGR);
else if (sample_resized.channels() == 1 && channel == 3)
cv::cvtColor(sample_resized, sample, CV_GRAY2BGR);
else
sample = sample_resized;
// if(img_in.empty())
// cout<<"img empty"<<endl;
// if(!img_in.data)
// cout<<"no img data"<<endl;
// cv::Mat sample_resized;
// cv::Size size(width, heigh);
// if (img_in.size() != size)
// cv::resize(img_in, sample_resized, size);
// else
// sample_resized = img_in;
//
// cv::Mat sample;
// if (sample_resized.channels() == 3 && channel == 1)
// cv::cvtColor(sample_resized, sample, CV_BGR2GRAY);
// else if (sample_resized.channels() == 4 && channel == 1)
// cv::cvtColor(sample_resized, sample, CV_BGRA2GRAY);
// else if (sample_resized.channels() == 4 && channel == 3)
// cv::cvtColor(sample_resized, sample, CV_BGRA2BGR);
// else if (sample_resized.channels() == 1 && channel == 3)
// cv::cvtColor(sample_resized, sample, CV_GRAY2BGR);
// else
// sample = sample_resized;
//
cv::Mat sample_float;
if (channel == 3)
sample_resized.convertTo(sample_float, CV_32FC3);
else
sample_resized.convertTo(sample_float, CV_32FC1);
cv::Mat sample_normalized;
cv::Scalar mean(104.0f, 117.0f, 123.0f);
cv::subtract(sample_float, mean, sample_normalized);
cout<<sample_normalized.size()<<endl;
vector<cv::Mat> input_channels;
for(int ii = 0; ii < channel; ii ++){
cv::Mat new_img(width, heigh, CV_32FC1, data);
input_channels.push_back(new_img);
data +=width*heigh;
}
cv::split(sample_normalized, input_channels);
temp.push_back(data_in);
}
cout<<"temp size "<<temp.size()<<endl;
out_data=temp;
}