1 sampleUffSSD_plugin
config.py 定义了通过修改‘op’自定义的tensorrt插件,为‘‘convert_to_uff’’命令使用。插件参数的名称应该与TensorRT所期望的插件完全匹配。比如“GridAnchor”插件,在“config.py”中应该具有以下内容:
PriorBox = gs.create_plugin_node(name="GridAnchor",
op="GridAnchor_TRT",
numLayers=6,
minSize=0.2,
maxSize=0.95,
aspectRatios=[1.0, 2.0, 0.5, 3.0, 0.33],
variance=[0.1,0.1,0.2,0.2],
featureMapShapes=[19, 10, 5, 3, 2, 1])
其中GridAnchor_TRT与注册的插件名相同,参数名称和大小与插件中相同。如果正确定义config.py。nvUffParser可以调用合适的插件来解析网络。
GridAnchorGeneration plugin:为不同的feature map 计算每个网格cell 的bounding box
- [19x19] feature map: 3 boxes (19x19x3x4(co-ordinates/box))
- [10x10] feature map: 6 boxes (10x10x6x4)
- [5x5] feature map: 6 boxes (5x5x6x4)
- [3x3] feature map: 6 boxes (3x3x6x4)
- [2x2] feature map: 6 boxes (2x2x6x4)
- [1x1] feature map: 6 boxes (1x1x6x4)
NMS plugin
2 API and ops
- activation layer: kRELU
- concatenation layer
- Pdding layer
- plugin layer
- pooling layer
- scale layer
- shffule layer
3 run
- convert-to-uff data/frozen_inference_graph.pb -O NMS -p config.py
- mkdir build && cd build && cmake … && make -j8 && ./perception [–int8]
4 coding
config.py
#
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import graphsurgeon as gs
import tensorflow as tf
Input = gs.create_node("Input",
op="Placeholder",
dtype=tf.float32,
shape=[1, 3, 300, 300])
PriorBox = gs.create_plugin_node(name="GridAnchor", op="GridAnchor_TRT",
numLayers=6,
minSize=0.2,
maxSize=0.95,
aspectRatios=[1.0, 2.0, 0.5, 3.0, 0.33],
variance=[0.1,0.1,0.2,0.2],
featureMapShapes=[19, 10, 5, 3, 2, 1])
NMS = gs.create_plugin_node(name="NMS", op="NMS_TRT",
shareLocation=1,
varianceEncodedInTarget=0,
backgroundLabelId=0,
confidenceThreshold=1e-8,
nmsThreshold=0.6,
topK=100,
keepTopK=100,
numClasses=91,
inputOrder=[0, 2, 1],
confSigmoid=1,
isNormalized=1)
concat_priorbox = gs.create_node(name="concat_priorbox", op="ConcatV2", dtype=tf.float32, axis=2)
concat_box_loc = gs.create_plugin_node("concat_box_loc", op="FlattenConcat_TRT", dtype=tf.float32, axis=1,
ignoreBatch=0)
concat_box_conf = gs.create_plugin_node("concat_box_conf", op="FlattenConcat_TRT", dtype=tf.float32, axis=1,
ignoreBatch=0)
namespace_plugin_map = {
"MultipleGridAnchorGenerator": PriorBox,
"Postprocessor": NMS,
"Preprocessor": Input,
"ToFloat": Input,
"image_tensor": Input,
"MultipleGridAnchorGenerator/Concatenate": concat_priorbox,
"MultipleGridAnchorGenerator/Identity": concat_priorbox,
"concat": concat_box_loc,
"concat_1": concat_box_conf
}
def preprocess(dynamic_graph):
# Now create a new graph by collapsing namespaces
dynamic_graph.collapse_namespaces(namespace_plugin_map)
# Remove the outputs, so we just have a single output node (NMS).
dynamic_graph.remove(dynamic_graph.graph_outputs, remove_exclusive_dependencies=False)
sampleUffSSD.cpp
#include "common/BatchStream.h"
#include "common/EntropyCalibrator.h"
#include "common/argsParser.h"
#include"common/buffers.h"
#include"common/common.h"
#include"common/logger.h"
#include "NvInfer.h"
#include "NvUffParser.h"
#include <cuda_runtime_api.h>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <sstream>
const std::string gSampleName = "TensorRT.sample_uff_ssd";
const std::vector<std::string> gImgFnames = {"dog.ppm", "bus.ppm"};
struct SampleUffSSDParams : public samplesCommon::SampleParams
{
std::string uffFileName;
std::string labelsFileName;
int32_t outputClsSize;
int32_t calBatchSize;
int32_t nbCalBatches;
int32_t keepTopK;
float visualThreshold;
};
class SampleUffSSD
{
template<typename T >
using SampleUniquePtr = std::unique_ptr<T, samplesCommon::InferDeleter>;
public:
SampleUffSSD(const SampleUffSSDParams& params)
: mParams(params)
,mEngine(nullptr)
{
}
bool build();
bool infer();
bool teardown();
private:
SampleUffSSDParams mParams;
nvinfer1::Dims mInputDims;
std::vector<samplesCommon::PPM<3, 300, 300>> mPPMs;
std::shared_ptr<nvinfer1::ICudaEngine> mEngine;
bool constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder,
SampleUniquePtr<nvinfer1::INetworkDefinition>& network,
SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
SampleUniquePtr<nvuffparser::IUffParser>& parser);
bool processInput(const samplesCommon::BufferManager& buffers);
bool verifyOutput(const samplesCommon::BufferManager& buffers);
};
bool SampleUffSSD::build()
{
initLibNvInferPlugins(&sample::gLogger.getTRTLogger(), "");
auto builder = SampleUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
if(!builder)
{
return false;
}
auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetwork());
if(!network)
{
return false;
}
auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
if(!config)
{
return false;
}
auto parser = SampleUniquePtr<nvuffparser::IUffParser>(nvuffparser::createUffParser());
if(!parser)
{
return false;
}
auto constructed = constructNetwork(builder, network, config, parser);
if(!constructed)
{
return false;
}
assert(network->getNbInputs() == 1);
mInputDims = network->getInput(0)->getDimensions();
assert(mInputDims.nbDims == 3);
assert(network->getNbOutputs() == 2);
return true;
}
bool SampleUffSSD::constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder,
SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
SampleUniquePtr<nvuffparser::IUffParser>& parser)
{
parser->registerInput(mParams.inputTensorNames[0].c_str(), DimsCHW(3,300,300), nvuffparser::UffInputOrder::kNCHW);
parser->registerOutput(mParams.outputTensorNames[0].c_str());
auto parsed = parser->parse(locateFile(mParams.uffFileName, mParams.dataDirs).c_str(), *network, DataType::kFLOAT);
if(!parsed)
{
return false;
}
builder->setMaxBatchSize(mParams.batchSize);
config->setMaxWorkspaceSize(1_GiB);
if(mParams.fp16)
{
config->setFlag(BuilderFlag::kFP16);
}
std::unique_ptr<IInt8Calibrator> calibrator;
if(mParams.int8)
{
sample::gLogInfo << "Using Entropy Calibrator 2" <<std::endl;
const std::string listFileName = "list.txt";
const int32_t imageC = 3;
const int32_t imageH = 300;
const int32_t imageW = 300;
nvinfer1::DimsNCHW imageDims{};
imageDims = nvinfer1::DimsNCHW{mParams.calBatchSize, imageC, imageH, imageW};
BatchStream calibrationStream(mParams.calBatchSize, mParams.nbCalBatches,
imageDims, listFileName, mParams.dataDirs);
calibrator.reset(new Int8EntropyCalibrator2<BatchStream>(
calibrationStream, 0, "UffSSD", mParams.inputTensorNames[0].c_str()));
config->setFlag(BuilderFlag::kINT8);
config->setInt8Calibrator(calibrator.get());
}
mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(builder->buildEngineWithConfig(*network,*config),samplesCommon::InferDeleter());
if(!mEngine)
{
return false;
}
return true;
}
bool SampleUffSSD::infer()
{
samplesCommon::BufferManager buffers(mEngine, mParams.batchSize);
auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
if(!context)
{
return false;
}
assert(mParams.inputTensorNames.size() == 1);
if (!processInput(buffers))
{
return false;
}
buffers.copyInputToDevice();
const bool status = context->execute(mParams.batchSize, buffers.getDeviceBindings().data());
if (!status)
{
return false;
}
buffers.copyOutputToHost();
if (!verifyOutput(buffers))
{
return false;
}
return true;
}
bool SampleUffSSD::teardown()
{
nvuffparser::shutdownProtobufLibrary();
return true;
}
bool SampleUffSSD::processInput(const samplesCommon::BufferManager& buffers)
{
const int32_t inputC = mInputDims.d[0];
const int32_t inputH = mInputDims.d[1];
const int32_t inputW = mInputDims.d[2];
const int32_t batchSize = mParams.batchSize;
mPPMs.resize(batchSize);
assert(mPPMs.size() == gImgFnames.size());
for (int32_t i = 0; i < batchSize; ++i)
{
readPPMFile(locateFile(gImgFnames[i], mParams.dataDirs), mPPMs[i]);
}
float* hostDataBuffer = static_cast<float*>(buffers.getHostBuffer(mParams.inputTensorNames[0]));
// Host memory for input buffer
for (int32_t i = 0, volImg = inputC * inputH * inputW; i < mParams.batchSize; ++i)
{
for (int32_t c = 0; c < inputC; ++c)
{
// The color image to input should be in BGR order
for (uint32_t j = 0, volChl = inputH * inputW; j < volChl; ++j)
{
hostDataBuffer[i * volImg + c * volChl + j]
= (2.0 / 255.0) * float(mPPMs[i].buffer[j * inputC + c]) - 1.0;
}
}
}
return true;
}
bool SampleUffSSD::verifyOutput(const samplesCommon::BufferManager& buffers)
{
const int32_t inputH = mInputDims.d[1];
const int32_t inputW = mInputDims.d[2];
const int32_t batchSize = mParams.batchSize;
const int32_t keepTopK = mParams.keepTopK;
const float visualThreshold = mParams.visualThreshold;
const int32_t outputClsSize = mParams.outputClsSize;
const float* detectionOut = static_cast<const float*>(buffers.getHostBuffer(mParams.outputTensorNames[0]));
const int32_t* keepCount = static_cast<const int32_t*>(buffers.getHostBuffer(mParams.outputTensorNames[1]));
// Read COCO class labels from file
std::vector<std::string> classes(outputClsSize);
{
std::ifstream labelFile(locateFile(mParams.labelsFileName, mParams.dataDirs));
std::string line;
int32_t id = 0;
while (getline(labelFile, line))
{
classes[id++] = line;
}
}
bool pass = true;
for (int32_t bi = 0; bi < batchSize; ++bi)
{
int32_t numDetections = 0;
bool correctDetection = false;
for (int32_t i = 0; i < keepCount[bi]; ++i)
{
const float* det = &detectionOut[0] + (bi * keepTopK + i) * 7;
if (det[2] < visualThreshold)
{
continue;
}
// Output format for each detection is stored in the below order
// [image_id, label, confidence, xmin, ymin, xmax, ymax]
const int32_t detection = det[1];
assert(detection < outputClsSize);
const std::string outFname = classes[detection] + "-" + std::to_string(det[2]) + ".ppm";
numDetections++;
if ((bi == 0 && classes[detection] == "dog")
|| (bi == 1 && (classes[detection] == "truck" || classes[detection] == "car")))
{
correctDetection = true;
}
sample::gLogInfo << "Detected " << classes[detection].c_str() << " in image "
<< static_cast<int32_t>(det[0]) << " (" << mPPMs[bi].fileName.c_str() << ")"
<< " with confidence " << det[2] * 100.f << " and coordinates (" << det[3] * inputW << ", "
<< det[4] * inputH << ")"
<< ", (" << det[5] * inputW << ", " << det[6] * inputH << ")." << std::endl;
sample::gLogInfo << "Result stored in: " << outFname.c_str() << std::endl;
samplesCommon::writePPMFileWithBBox(
outFname, mPPMs[bi], {det[3] * inputW, det[4] * inputH, det[5] * inputW, det[6] * inputH});
}
pass &= correctDetection;
pass &= numDetections >= 1;
}
return pass;
}
SampleUffSSDParams initializeSampleParams(const samplesCommon::Args& args)
{
SampleUffSSDParams params;
if (args.dataDirs.empty()) //!< Use default directories if user hasn't provided directory paths
{
params.dataDirs.push_back("data/");
}
else //!< Use the data directory provided by the user
{
params.dataDirs = args.dataDirs;
}
params.uffFileName = "frozen_inference_graph.uff";
params.labelsFileName = "ssd_coco_labels.txt";
params.inputTensorNames.push_back("Input");
params.batchSize = gImgFnames.size();
params.outputTensorNames.push_back("NMS");
params.outputTensorNames.push_back("NMS_1");
params.dlaCore = args.useDLACore;
params.int8 = args.runInInt8;
params.fp16 = args.runInFp16;
params.outputClsSize = 91;
params.calBatchSize = 10;
params.nbCalBatches = 10;
params.keepTopK = 100;
params.visualThreshold = 0.5;
return params;
}
void printHelpInfo()
{
std::cout << "Usage: ./sample_uff_ssd [-h or --help] [-d or --datadir=<path to data directory>] [--useDLACore=<N>]"
<< std::endl;
std::cout << "--help Display help information" << std::endl;
std::cout << "--datadir Specify path to a data directory, overriding the default. This option can be used "
"multiple times to add multiple directories. If no data directories are given, the default is to use "
"data/samples/ssd/ and data/ssd/"
<< std::endl;
std::cout << "--useDLACore Specify a DLA engine for layers that support DLA. Value can range from 0 to N-1, "
"where N is the number of DLA engines on the platform."
<< std::endl;
std::cout << "--fp16 Specify to run in fp16 mode." << std::endl;
std::cout << "--int8 Specify to run in int8 mode." << std::endl;
}
int32_t main(int32_t argc, char** argv)
{
samplesCommon::Args args;
const bool argsOK = samplesCommon::parseArgs(args, argc, argv);
if (!argsOK)
{
sample::gLogError << "Invalid arguments" << std::endl;
printHelpInfo();
return EXIT_FAILURE;
}
if (args.help)
{
printHelpInfo();
return EXIT_SUCCESS;
}
auto sampleTest = sample::gLogger.defineTest(gSampleName, argc, argv);
sample::gLogger.reportTestStart(sampleTest);
SampleUffSSD sample(initializeSampleParams(args));
sample::gLogInfo << "Building inference engine for SSD" << std::endl;
if (!sample.build())
{
return sample::gLogger.reportFail(sampleTest);
}
sample::gLogInfo << "Running inference engine for SSD" << std::endl;
if (!sample.infer())
{
return sample::gLogger.reportFail(sampleTest);
}
if (!sample.teardown())
{
return sample::gLogger.reportFail(sampleTest);
}
return sample::gLogger.reportPass(sampleTest);
}