demo展示图和使用API为Float16和int8精度的无格式I/O显式指定为
- TensorFOrmat::kLINEAR
- TensorFormat::kCHW2
- TensorFOrmat::kHWC8
实现方式:
ITensor::setAllowFormats:指定预期支持的格式,不用使将I/O张量从FP32转换为FP32的格式化操作。
BuilderFlag::kSTRICT_TYPES:将分配给构建器配置,以润徐构建器选择无格式路径,而不是最路径。
- 如果自由重定义格式尉氏县,则将选择具有重定义格式的最快路径,并显示以下警告消息:
- Warning: no implementation obeys reformatting-free rlues,…
7.7.2 code
#include "common/BatchStream.h"
#include "common/EntropyCalibrator.h"
#include "common/argsParser.h"
#include "common/buffers.h"
#include "common/common.h"
#include "common/logger.h"
#include "common/logging.h"
#include "common/parserOnnxConfig.h"
#include "NvInfer.h"
#include <cuda_runtime_api.h>
#include <random>
const std::string gSampleName = "TensorRT.sample_dynamic_reshape";
class SamplesDynamicReshape
{
template <typename T>
using SampleUniquePtr = std::unique_ptr<T,samplesCommon::InferDeleter>;
public:
SamplesDynamicReshape(const samplesCommon::OnnxSampleParams& params)
:mParams(params)
{
}
bool build();
bool prepare();
bool infer();
private:
bool buildPreprocessorEngine(const SampleUniquePtr<nvinfer1::IBuilder>& builder);
bool builderPredicetionEngine(const SampleUniquePtr<nvinfer1::IBuilder>& builder);
Dims loadPGMFile(const std::string& fileName);
bool validateOutput(int digit);
samplesCommon::OnnxSampleParams mParams;
nvinfer1::Dims mPredictionInputDims;
nvinfer1::Dims mPredictionOutputDims;
SampleUniquePtr<nvinfer1::ICudaEngine> mPreprocessorEngine{nullptr},mPredictionEngine{nullptr};
SampleUniquePtr<nvinfer1::IExecutionContext> mPreprocessorContext{nullptr},mPredictionContext{nullptr};
samplesCommon::ManagedBuffer mInput{};
samplesCommon::DeviceBuffer mPredictionInput{};
samplesCommon::ManagedBuffer mOutput{};
template <typename T>
SampleUniquePtr<T> makeUnique(T* t)
{
return SampleUniquePtr<T>{t};
}
};
bool SamplesDynamicReshape::build()
{
auto builder = makeUnique(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
if(!builder)
{
sample::gLogError <<"Create inference builder failed."<<std::endl;
return false;
}
return builderPredicetionEngine(builder) && buildPreprocessorEngine(builder);
}
bool SamplesDynamicReshape::buildPreprocessorEngine(const SampleUniquePtr<nvinfer1::IBuilder> &builder)
{
auto preprocessorNetwork = makeUnique(
builder->createNetworkV2(1U << static_cast<uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH)));
if(!preprocessorNetwork)
{
sample::gLogError <<"Create network failed. "<<std::endl;
return false;
}
auto input = preprocessorNetwork->addInput("input",nvinfer1::DataType::kFLOAT,Dims4{-1,1,-1,-1});
auto resizeLayer = preprocessorNetwork->addResize(*input);
resizeLayer->setOutputDimensions(mPredictionInputDims);
preprocessorNetwork->markOutput(*resizeLayer->getOutput(0));
auto preprocessorConfig = makeUnique(builder->createBuilderConfig());
if(!preprocessorConfig)
{
sample::gLogError<<"Create builder config failed."<<std::endl;
return false;
}
auto profile = builder->createOptimizationProfile();
profile->setDimensions(input->getName(),OptProfileSelector::kMIN,Dims4{1,1,1,1});
profile->setDimensions(input->getName(),OptProfileSelector::kOPT,Dims4{1,1,28,28});
profile->setDimensions(input->getName(),OptProfileSelector::kMAX,Dims4{1,1,56,56});
preprocessorConfig->addOptimizationProfile(profile);
auto profileCalib = builder->createOptimizationProfile();
const int calibBatchSize{256};
profileCalib->setDimensions(input->getName(),OptProfileSelector::kMIN,Dims4{calibBatchSize,1,28,28});
profileCalib->setDimensions(input->getName(),OptProfileSelector::kOPT,Dims4{calibBatchSize,1,28,28});
profileCalib->setDimensions(input->getName(),OptProfileSelector::kMAX,Dims4{calibBatchSize,1,28,28});
preprocessorConfig->setCalibrationProfile(profileCalib);
std::unique_ptr<IInt8Calibrator> calibrator;
if(mParams.int8)
{
preprocessorConfig->setFlag(BuilderFlag::kINT8);
const int nCalibBatches{10};
MNISTBatchStream calibrationStream(
calibBatchSize, nCalibBatches, "train-images-idx3-ubyte", "train-labels-idx1-ubyte", mParams.dataDirs);
calibrator.reset(
new Int8EntropyCalibrator2<MNISTBatchStream>(calibrationStream, 0, "MNISTPreprocessor", "input"));
preprocessorConfig->setInt8Calibrator(calibrator.get());
}
mPreprocessorEngine = makeUnique(builder->buildEngineWithConfig(*preprocessorNetwork,*preprocessorConfig));
if(!mPreprocessorEngine)
{
sample::gLogError << "Preprocessor engine build failed."<<std::endl;
return false;
}
sample::gLogInfo << "Profile dimensions in preprocessor engine:: "<<std::endl;
sample::gLogInfo <<" Mninmum = "<<mPreprocessorEngine->getProfileDimensions(0,0,OptProfileSelector::kMIN)
<<std::endl;
sample::gLogInfo <<" Optimum = "<<mPreprocessorEngine->getProfileDimensions(0,0,OptProfileSelector::kOPT)
<<std::endl;
sample::gLogInfo <<" Maximum = "<<mPreprocessorEngine->getProfileDimensions(0,0,OptProfileSelector::kMAX)
<<std::endl;
return true;
}
bool SamplesDynamicReshape::builderPredicetionEngine(const SampleUniquePtr<nvinfer1::IBuilder> &builder)
{
const auto explicitBatchh = 1U << static_cast<uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
auto network = makeUnique(builder->createNetworkV2(explicitBatchh));
if(!network)
{
sample::gLogError<<"Creat network failed."<<std::endl;
return false;
}
auto parser = samplesCommon::infer_object(nvonnxparser::createParser(*network,sample::gLogger.getTRTLogger()));
bool parsingSuccess = parser->parseFromFile(locateFile(mParams.onnxFileName,mParams.dataDirs).c_str(),
static_cast<int>(sample::gLogger.getReportableSeverity()));
if(!parsingSuccess)
{
sample::gLogError<<"Failed to parse model."<<std::endl;
return false;
}
auto softmax = network->addSoftMax(*network->getOutput(0));
softmax->setAxes(1<<1);
network->unmarkOutput(*network->getOutput(0));
network->markOutput(*softmax->getOutput(0));
mPredictionInputDims = network->getInput(0)->getDimensions();
mPredictionOutputDims = network->getOutput(0)->getDimensions();
auto config = makeUnique(builder->createBuilderConfig());
if(!config)
{
sample::gLogError<<"Create builder config failed."<<std::endl;
return false;
}
config->setMaxWorkspaceSize(16_MiB);
if(mParams.fp16)
{
config->setFlag(BuilderFlag::kFP16);
}
auto profileCalib = builder->createOptimizationProfile();
const auto inputName = mParams.inputTensorNames[0].c_str();
const int calibBatchSize{1};
profileCalib->setDimensions(inputName, OptProfileSelector::kMIN, Dims4{calibBatchSize, 1, 28, 28});
profileCalib->setDimensions(inputName, OptProfileSelector::kOPT, Dims4{calibBatchSize, 1, 28, 28});
profileCalib->setDimensions(inputName, OptProfileSelector::kMAX, Dims4{calibBatchSize, 1, 28, 28});
config->setCalibrationProfile(profileCalib);
std::unique_ptr<IInt8Calibrator> calibrator;
if (mParams.int8)
{
config->setFlag(BuilderFlag::kINT8);
int nCalibBatches{10};
MNISTBatchStream calibrationStream(
calibBatchSize, nCalibBatches, "train-images-idx3-ubyte", "train-labels-idx1-ubyte", mParams.dataDirs);
calibrator.reset(
new Int8EntropyCalibrator2<MNISTBatchStream>(calibrationStream, 0, "MNISTPrediction", inputName));
config->setInt8Calibrator(calibrator.get());
}
mPredictionEngine = makeUnique(builder->buildEngineWithConfig(*network, *config));
if (!mPredictionEngine)
{
sample::gLogError << "Prediction engine build failed." << std::endl;
return false;
}
return true;
}
bool SamplesDynamicReshape::prepare()
{
mPreprocessorContext = makeUnique(mPreprocessorEngine->createExecutionContext());
if(!mPreprocessorContext)
{
sample::gLogError<<"Preprocessor context build failed."<<std::endl;
return false;
}
mPredictionContext = makeUnique(mPredictionEngine->createExecutionContext());
if(!mPredictionContext)
{
sample::gLogError<<"Prediction contect build failed."<<std::endl;
return false;
}
mPredictionInput.resize(mPredictionInputDims);
mOutput.hostBuffer.resize(mPredictionOutputDims);
mOutput.deviceBuffer.resize(mPredictionOutputDims);
return true;
}
bool SamplesDynamicReshape::infer()
{
std::random_device rd{};
std::default_random_engine generator{rd()};
std::uniform_int_distribution<int> digitDistribution{0,9};
int digit = digitDistribution(generator);
Dims inputDims = loadPGMFile(locateFile(std::to_string(digit)+".pgm",mParams.dataDirs));
mInput.deviceBuffer.resize(inputDims);
CHECK(cudaMemcpy(
mInput.deviceBuffer.data(),mInput.hostBuffer.data(),mInput.hostBuffer.nbBytes(),cudaMemcpyHostToDevice));
CHECK_RETURN_W_MSG(mPreprocessorContext->setBindingDimensions(0,inputDims),false,"Invalid binding dimensions.");
// We can only run inference once all dynamic input shapes have been specified.
if (!mPreprocessorContext->allInputDimensionsSpecified())
{
return false;
}
// Run the preprocessor to resize the input to the correct shape
std::vector<void*> preprocessorBindings = {mInput.deviceBuffer.data(), mPredictionInput.data()};
// For engines using full dims, we can use executeV2, which does not include a separate batch size parameter.
bool status = mPreprocessorContext->executeV2(preprocessorBindings.data());
if (!status)
{
return false;
}
std::vector<void*> predictionBindings = {mPredictionInput.data(),mOutput.deviceBuffer.data()};
status = mPredictionContext->executeV2(predictionBindings.data());
if(!status)
{
return false;
}
CHECK(cudaMemcpy(mOutput.hostBuffer.data(),mOutput.deviceBuffer.data(),mOutput.deviceBuffer.nbBytes(),cudaMemcpyDeviceToHost));
return validateOutput(digit);
}
Dims SamplesDynamicReshape::loadPGMFile(const std::string& fileName)
{
std::ifstream infile(fileName,std::ifstream::binary);
assert(infile.is_open() && " Attempting to read from a file that is not open.");
std::string magic;
int h,w,max;
infile>>magic>>h>>w>>max;
infile.seekg(1,infile.cur);
Dims4 inputDims{1,1,h,w};
size_t vol = samplesCommon::volume(inputDims);
std::vector<uint8_t> fileData(vol);
infile.read(reinterpret_cast<char*>(fileData.data()),vol);
sample::gLogInfo <<"Input: \n";
for(size_t i=0;i<vol;i++)
{
sample::gLogInfo << (" .:-=+*#%@"[fileData[i] / 26]) << (((i + 1) % w) ? "" : "\n");
}
sample::gLogInfo<<std::endl;
mInput.hostBuffer.resize(inputDims);
float* hostDataBuffer = static_cast<float*>(mInput.hostBuffer.data());
std::transform(fileData.begin(),fileData.end(),hostDataBuffer,
[](uint8_t x){return 1.0-static_cast<float>(x/255.0);});
return inputDims;
}
bool SamplesDynamicReshape::validateOutput(int digit)
{
const float* buffRaw = static_cast<const float*>(mOutput.hostBuffer.data());
std::vector<float> prob(buffRaw,buffRaw+mOutput.hostBuffer.size());
int curIndex{0};
for(const auto&elem : prob)
{
sample::gLogInfo <<"Prob "<<curIndex<<" "<<std::fixed<<std::setw(5)<<std::setprecision(4)
<<" "
<<"Class "<<curIndex<<": "<<std::string(int(std::floor(elem*10 + 0.5f)),'*')
<<std::endl;
++curIndex;
}
int predictedDigit = std::max_element(prob.begin(),prob.end())-prob.begin();
return digit == predictedDigit;
}
samplesCommon::OnnxSampleParams initializeSampleParams(const samplesCommon::Args & args)
{
samplesCommon::OnnxSampleParams params;
if (args.dataDirs.empty()) //!< Use default directories if user hasn't provided directory paths
{
params.dataDirs.push_back("data/");
}
else //!< Use the data directory provided by the user
{
params.dataDirs = args.dataDirs;
}
params.onnxFileName = "mnist.onnx";
params.inputTensorNames.push_back("Input3");
params.outputTensorNames.push_back("Plus214_Output_0");
params.int8 = args.runInInt8;
params.fp16 = args.runInFp16;
return params;
}
void printHelpInfo()
{
std::cout << "Usage: ./sample_dynamic_reshape [-h or --help] [-d or --datadir=<path to data directory>]"
<< std::endl;
std::cout << "--help, -h Display help information" << std::endl;
std::cout << "--datadir Specify path to a data directory, overriding the default. This option can be used "
"multiple times to add multiple directories. If no data directories are given, the default is to use "
"(data/samples/mnist/, data/mnist/)"
<< std::endl;
std::cout << "--int8 Run in Int8 mode." << std::endl;
std::cout << "--fp16 Run in FP16 mode." << std::endl;
}
int main(int argc,char** argv)
{
samplesCommon::Args args;
bool argsOK = samplesCommon::parseArgs(args, argc, argv);
if (!argsOK)
{
sample::gLogError << "Invalid arguments" << std::endl;
printHelpInfo();
return EXIT_FAILURE;
}
if (args.help)
{
printHelpInfo();
return EXIT_SUCCESS;
}
auto sampleTest = sample::gLogger.defineTest(gSampleName, argc, argv);
sample::gLogger.reportTestStart(sampleTest);
SamplesDynamicReshape sample{initializeSampleParams(args)};
if (!sample.build())
{
return sample::gLogger.reportFail(sampleTest);
}
if (!sample.prepare())
{
return sample::gLogger.reportFail(sampleTest);
}
if (!sample.infer())
{
return sample::gLogger.reportFail(sampleTest);
}
return sample::gLogger.reportPass(sampleTest);
}