tensorflow_ssd

1 sampleUffSSD_plugin

config.py 定义了通过修改‘op’自定义的tensorrt插件,为‘‘convert_to_uff’’命令使用。插件参数的名称应该与TensorRT所期望的插件完全匹配。比如“GridAnchor”插件,在“config.py”中应该具有以下内容:

PriorBox = gs.create_plugin_node(name="GridAnchor",
op="GridAnchor_TRT",  
	numLayers=6,  
	minSize=0.2,  
	maxSize=0.95,  
	aspectRatios=[1.0, 2.0, 0.5, 3.0, 0.33],  
	variance=[0.1,0.1,0.2,0.2],  
	featureMapShapes=[19, 10, 5, 3, 2, 1])  

其中GridAnchor_TRT与注册的插件名相同,参数名称和大小与插件中相同。如果正确定义config.py。nvUffParser可以调用合适的插件来解析网络。

GridAnchorGeneration plugin:为不同的feature map 计算每个网格cell 的bounding box

  • [19x19] feature map: 3 boxes (19x19x3x4(co-ordinates/box))
  • [10x10] feature map: 6 boxes (10x10x6x4)
  • [5x5] feature map: 6 boxes (5x5x6x4)
  • [3x3] feature map: 6 boxes (3x3x6x4)
  • [2x2] feature map: 6 boxes (2x2x6x4)
  • [1x1] feature map: 6 boxes (1x1x6x4)

NMS plugin

2 API and ops

  • activation layer: kRELU
  • concatenation layer
  • Pdding layer
  • plugin layer
  • pooling layer
  • scale layer
  • shffule layer

3 run

  1. convert-to-uff data/frozen_inference_graph.pb -O NMS -p config.py
  2. mkdir build && cd build && cmake … && make -j8 && ./perception [–int8]

4 coding

config.py

#
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import graphsurgeon as gs
import tensorflow as tf

Input = gs.create_node("Input",
    op="Placeholder",
    dtype=tf.float32,
    shape=[1, 3, 300, 300])
PriorBox = gs.create_plugin_node(name="GridAnchor", op="GridAnchor_TRT",
    numLayers=6,
    minSize=0.2,
    maxSize=0.95,
    aspectRatios=[1.0, 2.0, 0.5, 3.0, 0.33],
    variance=[0.1,0.1,0.2,0.2],
    featureMapShapes=[19, 10, 5, 3, 2, 1])
NMS = gs.create_plugin_node(name="NMS", op="NMS_TRT",
    shareLocation=1,
    varianceEncodedInTarget=0,
    backgroundLabelId=0,
    confidenceThreshold=1e-8,
    nmsThreshold=0.6,
    topK=100,
    keepTopK=100,
    numClasses=91,
    inputOrder=[0, 2, 1],
    confSigmoid=1,
    isNormalized=1)
concat_priorbox = gs.create_node(name="concat_priorbox", op="ConcatV2", dtype=tf.float32, axis=2)
concat_box_loc = gs.create_plugin_node("concat_box_loc", op="FlattenConcat_TRT", dtype=tf.float32, axis=1,
                                       ignoreBatch=0)
concat_box_conf = gs.create_plugin_node("concat_box_conf", op="FlattenConcat_TRT", dtype=tf.float32, axis=1,
                                        ignoreBatch=0)

namespace_plugin_map = {
    "MultipleGridAnchorGenerator": PriorBox,
    "Postprocessor": NMS,
    "Preprocessor": Input,
    "ToFloat": Input,
    "image_tensor": Input,
    "MultipleGridAnchorGenerator/Concatenate": concat_priorbox,
    "MultipleGridAnchorGenerator/Identity": concat_priorbox,
    "concat": concat_box_loc,
    "concat_1": concat_box_conf
}

def preprocess(dynamic_graph):
    # Now create a new graph by collapsing namespaces
    dynamic_graph.collapse_namespaces(namespace_plugin_map)
    # Remove the outputs, so we just have a single output node (NMS).
    dynamic_graph.remove(dynamic_graph.graph_outputs, remove_exclusive_dependencies=False)

sampleUffSSD.cpp

#include "common/BatchStream.h"
#include "common/EntropyCalibrator.h"
#include "common/argsParser.h"
#include"common/buffers.h"
#include"common/common.h"
#include"common/logger.h"
#include "NvInfer.h"
#include "NvUffParser.h"
#include <cuda_runtime_api.h>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <sstream>
const std::string gSampleName = "TensorRT.sample_uff_ssd";
const std::vector<std::string> gImgFnames = {"dog.ppm", "bus.ppm"};
struct SampleUffSSDParams : public samplesCommon::SampleParams
{
    std::string uffFileName;
    std::string labelsFileName;
    int32_t outputClsSize;
    int32_t calBatchSize;
    int32_t nbCalBatches;
    int32_t keepTopK;
    float visualThreshold;
};
class SampleUffSSD
{
    template<typename T >
    using SampleUniquePtr = std::unique_ptr<T, samplesCommon::InferDeleter>;
public:
    SampleUffSSD(const SampleUffSSDParams& params)
        : mParams(params)
        ,mEngine(nullptr)
    {

    }
    bool build();
    bool infer();
    bool teardown();
private:
    SampleUffSSDParams mParams;
    nvinfer1::Dims mInputDims;
    std::vector<samplesCommon::PPM<3, 300, 300>> mPPMs;
    std::shared_ptr<nvinfer1::ICudaEngine> mEngine;
    bool constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder,
                          SampleUniquePtr<nvinfer1::INetworkDefinition>& network,
                          SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
                          SampleUniquePtr<nvuffparser::IUffParser>& parser);
    bool processInput(const samplesCommon::BufferManager& buffers);
    bool verifyOutput(const samplesCommon::BufferManager& buffers);

};
bool SampleUffSSD::build()
{
    initLibNvInferPlugins(&sample::gLogger.getTRTLogger(), "");
    auto builder = SampleUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
    if(!builder)
    {
        return false;
    }
    auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetwork());
    if(!network)
    {
        return false;
    }
    auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
    if(!config)
    {
        return false;
    }
    auto parser = SampleUniquePtr<nvuffparser::IUffParser>(nvuffparser::createUffParser());
    if(!parser)
    {
        return false;
    }
    auto constructed = constructNetwork(builder, network, config, parser);
    if(!constructed)
    {
        return false;
    }
    assert(network->getNbInputs() == 1);
    mInputDims = network->getInput(0)->getDimensions();
    assert(mInputDims.nbDims == 3);
    assert(network->getNbOutputs() == 2);
    return true;
}
bool SampleUffSSD::constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder,
    SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
    SampleUniquePtr<nvuffparser::IUffParser>& parser)
{
    parser->registerInput(mParams.inputTensorNames[0].c_str(), DimsCHW(3,300,300), nvuffparser::UffInputOrder::kNCHW);
    parser->registerOutput(mParams.outputTensorNames[0].c_str());
    auto parsed = parser->parse(locateFile(mParams.uffFileName, mParams.dataDirs).c_str(), *network, DataType::kFLOAT);
    if(!parsed)
    {
        return false;
    }
    builder->setMaxBatchSize(mParams.batchSize);
    config->setMaxWorkspaceSize(1_GiB);
    if(mParams.fp16)
    {
        config->setFlag(BuilderFlag::kFP16);
    }
    std::unique_ptr<IInt8Calibrator> calibrator;
    if(mParams.int8)
    {
        sample::gLogInfo << "Using Entropy Calibrator 2" <<std::endl;
        const std::string listFileName = "list.txt";
        const int32_t imageC = 3;
        const int32_t imageH = 300;
        const int32_t imageW = 300;
        nvinfer1::DimsNCHW imageDims{};
        imageDims = nvinfer1::DimsNCHW{mParams.calBatchSize, imageC, imageH, imageW};
        BatchStream calibrationStream(mParams.calBatchSize, mParams.nbCalBatches,
                                      imageDims, listFileName, mParams.dataDirs);
        calibrator.reset(new Int8EntropyCalibrator2<BatchStream>(
                             calibrationStream, 0, "UffSSD", mParams.inputTensorNames[0].c_str()));
        config->setFlag(BuilderFlag::kINT8);
        config->setInt8Calibrator(calibrator.get());
    }
    mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(builder->buildEngineWithConfig(*network,*config),samplesCommon::InferDeleter());
    if(!mEngine)
    {
        return false;
    }
    return true;
}

bool SampleUffSSD::infer()
{
    samplesCommon::BufferManager buffers(mEngine, mParams.batchSize);
    auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
    if(!context)
    {
        return false;
    }
    assert(mParams.inputTensorNames.size() == 1);
    if (!processInput(buffers))
    {
        return false;
    }
    buffers.copyInputToDevice();

    const bool status = context->execute(mParams.batchSize, buffers.getDeviceBindings().data());
    if (!status)
    {
        return false;
    }
    buffers.copyOutputToHost();
    if (!verifyOutput(buffers))
    {
        return false;
    }

    return true;
}
bool SampleUffSSD::teardown()
{
    nvuffparser::shutdownProtobufLibrary();
    return true;
}
bool SampleUffSSD::processInput(const samplesCommon::BufferManager& buffers)
{
    const int32_t inputC = mInputDims.d[0];
    const int32_t inputH = mInputDims.d[1];
    const int32_t inputW = mInputDims.d[2];
    const int32_t batchSize = mParams.batchSize;

    mPPMs.resize(batchSize);
    assert(mPPMs.size() == gImgFnames.size());
    for (int32_t i = 0; i < batchSize; ++i)
    {
        readPPMFile(locateFile(gImgFnames[i], mParams.dataDirs), mPPMs[i]);
    }

    float* hostDataBuffer = static_cast<float*>(buffers.getHostBuffer(mParams.inputTensorNames[0]));
    // Host memory for input buffer
    for (int32_t i = 0, volImg = inputC * inputH * inputW; i < mParams.batchSize; ++i)
    {
        for (int32_t c = 0; c < inputC; ++c)
        {
            // The color image to input should be in BGR order
            for (uint32_t j = 0, volChl = inputH * inputW; j < volChl; ++j)
            {
                hostDataBuffer[i * volImg + c * volChl + j]
                    = (2.0 / 255.0) * float(mPPMs[i].buffer[j * inputC + c]) - 1.0;
            }
        }
    }

    return true;
}
bool SampleUffSSD::verifyOutput(const samplesCommon::BufferManager& buffers)
{
    const int32_t inputH = mInputDims.d[1];
    const int32_t inputW = mInputDims.d[2];
    const int32_t batchSize = mParams.batchSize;
    const int32_t keepTopK = mParams.keepTopK;
    const float visualThreshold = mParams.visualThreshold;
    const int32_t outputClsSize = mParams.outputClsSize;

    const float* detectionOut = static_cast<const float*>(buffers.getHostBuffer(mParams.outputTensorNames[0]));
    const int32_t* keepCount = static_cast<const int32_t*>(buffers.getHostBuffer(mParams.outputTensorNames[1]));

    // Read COCO class labels from file
    std::vector<std::string> classes(outputClsSize);
    {
        std::ifstream labelFile(locateFile(mParams.labelsFileName, mParams.dataDirs));
        std::string line;
        int32_t id = 0;
        while (getline(labelFile, line))
        {
            classes[id++] = line;
        }
    }

    bool pass = true;

    for (int32_t bi = 0; bi < batchSize; ++bi)
    {
        int32_t numDetections = 0;
        bool correctDetection = false;

        for (int32_t i = 0; i < keepCount[bi]; ++i)
        {
            const float* det = &detectionOut[0] + (bi * keepTopK + i) * 7;
            if (det[2] < visualThreshold)
            {
                continue;
            }

            // Output format for each detection is stored in the below order
            // [image_id, label, confidence, xmin, ymin, xmax, ymax]
            const int32_t detection = det[1];
            assert(detection < outputClsSize);
            const std::string outFname = classes[detection] + "-" + std::to_string(det[2]) + ".ppm";

            numDetections++;

            if ((bi == 0 && classes[detection] == "dog")
                || (bi == 1 && (classes[detection] == "truck" || classes[detection] == "car")))
            {
                correctDetection = true;
            }

            sample::gLogInfo << "Detected " << classes[detection].c_str() << " in image "
                             << static_cast<int32_t>(det[0]) << " (" << mPPMs[bi].fileName.c_str() << ")"
                             << " with confidence " << det[2] * 100.f << " and coordinates (" << det[3] * inputW << ", "
                             << det[4] * inputH << ")"
                             << ", (" << det[5] * inputW << ", " << det[6] * inputH << ")." << std::endl;

            sample::gLogInfo << "Result stored in: " << outFname.c_str() << std::endl;

            samplesCommon::writePPMFileWithBBox(
                outFname, mPPMs[bi], {det[3] * inputW, det[4] * inputH, det[5] * inputW, det[6] * inputH});
        }

        pass &= correctDetection;
        pass &= numDetections >= 1;
    }

    return pass;
}
SampleUffSSDParams initializeSampleParams(const samplesCommon::Args& args)
{
    SampleUffSSDParams params;
    if (args.dataDirs.empty()) //!< Use default directories if user hasn't provided directory paths
    {
        params.dataDirs.push_back("data/");
    }
    else //!< Use the data directory provided by the user
    {
        params.dataDirs = args.dataDirs;
    }

    params.uffFileName = "frozen_inference_graph.uff";
    params.labelsFileName = "ssd_coco_labels.txt";
    params.inputTensorNames.push_back("Input");
    params.batchSize = gImgFnames.size();
    params.outputTensorNames.push_back("NMS");
    params.outputTensorNames.push_back("NMS_1");
    params.dlaCore = args.useDLACore;
    params.int8 = args.runInInt8;
    params.fp16 = args.runInFp16;

    params.outputClsSize = 91;
    params.calBatchSize = 10;
    params.nbCalBatches = 10;
    params.keepTopK = 100;
    params.visualThreshold = 0.5;

    return params;
}
void printHelpInfo()
{
    std::cout << "Usage: ./sample_uff_ssd [-h or --help] [-d or --datadir=<path to data directory>] [--useDLACore=<N>]"
              << std::endl;
    std::cout << "--help          Display help information" << std::endl;
    std::cout << "--datadir       Specify path to a data directory, overriding the default. This option can be used "
                 "multiple times to add multiple directories. If no data directories are given, the default is to use "
                 "data/samples/ssd/ and data/ssd/"
              << std::endl;
    std::cout << "--useDLACore    Specify a DLA engine for layers that support DLA. Value can range from 0 to N-1, "
                 "where N is the number of DLA engines on the platform."
              << std::endl;
    std::cout << "--fp16          Specify to run in fp16 mode." << std::endl;
    std::cout << "--int8          Specify to run in int8 mode." << std::endl;
}
int32_t main(int32_t argc, char** argv)
{
    samplesCommon::Args args;
    const bool argsOK = samplesCommon::parseArgs(args, argc, argv);

    if (!argsOK)
    {
        sample::gLogError << "Invalid arguments" << std::endl;
        printHelpInfo();
        return EXIT_FAILURE;
    }

    if (args.help)
    {
        printHelpInfo();
        return EXIT_SUCCESS;
    }

    auto sampleTest = sample::gLogger.defineTest(gSampleName, argc, argv);

    sample::gLogger.reportTestStart(sampleTest);

    SampleUffSSD sample(initializeSampleParams(args));

    sample::gLogInfo << "Building inference engine for SSD" << std::endl;
    if (!sample.build())
    {
        return sample::gLogger.reportFail(sampleTest);
    }

    sample::gLogInfo << "Running inference engine for SSD" << std::endl;
    if (!sample.infer())
    {
        return sample::gLogger.reportFail(sampleTest);
    }

    if (!sample.teardown())
    {
        return sample::gLogger.reportFail(sampleTest);
    }

    return sample::gLogger.reportPass(sampleTest);
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值