ssd_learning

谢欣燕

于 2021-10-27 17:30:40 发布

阅读量163

点赞数

分类专栏：笔记文章标签： python

本文链接：https://blog.csdn.net/weixin_43941538/article/details/120997824

版权

笔记专栏收录该内容

70 篇文章 6 订阅

订阅专栏

1 batch数据准备

$TRT_SOURCE/samples/opensource/sampleSSD/PrepareINT8CalibrationBatches.sh

2 model 准备

1.Download models_VGGNet_VOC0712_SSD_300x300.tar.gz.

2.Check the MD5 hash of it is 9a795fc161fff2e8f3aed07f4d488faf.

md5sum models_VGGNet_VOC0712_SSD_300x300.tar.gz

3.Extract the archive, and copy the model file to the TensorRT data directory.

tar xvf models_VGGNet_VOC0712_SSD_300x300.tar.gz
cp models/VGGNet/VOC0712/SSD_300x300/VGG_VOC0712_SSD_300x300_iter_120000.caffemodel <TensorRT root directory>/data/ssd
cp models/VGGNet/VOC0712/SSD_300x300/deploy.prototxt <TensorRT root directory>/data/ssd/ssd.prototxt

4.修改

在“ssd.prototxt”中，将所有“Flatten”层更改为“reshape”操作（例如，“type:Reshape”），因为TensorRT通过“Reshape”启用“Flatten”，并向每个层添加“rshape_param”(如下所示)：

reshape_param {
    shape {
        dim: 0
        dim: -1
        dim: 1
        dim: 1
    }
}

在ssd.prototxt中 detection_out 层添加top: "keep_count" as TensorRT DetectionOutput Plugin requires this output.

3 code

#include "common/BatchStream.h"
#include "common/EntropyCalibrator.h"
#include "common/argsParser.h"
#include "common/buffers.h"
#include "common/common.h"
#include "common/logger.h"

#include "NvCaffeParser.h"
#include "NvInfer.h"
#include <cuda_runtime_api.h>

#include <cstdlib>
#include <fstream>
#include <iostream>
#include <sstream>
const std::string gSampleName = "TensorRT.sample_ssd";
struct SampleSSDParams : public samplesCommon::CaffeSampleParams
{
    int outputClsSize;              //!< The number of output classes
    int keepTopK;                   //!< The maximum number of detection post-NMS
    int nbCalBatches;               //!< The number of batches for calibration
    float visualThreshold;          //!< The minimum score threshold to consider a detection
    std::string calibrationBatches; //!< The path to calibration batches
};
class SampleSSD
{
    template <typename T>
    using SampleUniquePtr = std::unique_ptr<T, samplesCommon::InferDeleter>;

public:
    SampleSSD(const SampleSSDParams& params)
        : mParams(params)
        , mEngine(nullptr)
    {
    }
    bool build();
    bool infer();
    bool teardown();

private:
    SampleSSDParams mParams;
    nvinfer1::Dims mInputDims;
    std::vector<samplesCommon::PPM<3, 300, 300>> mPPMs;
    std::shared_ptr<nvinfer1::ICudaEngine> mEngine;
    bool constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder,
        SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
        SampleUniquePtr<nvcaffeparser1::ICaffeParser>& parser);
    bool processInput(const samplesCommon::BufferManager& buffers);
    bool verifyOutput(const samplesCommon::BufferManager& buffers);
};
bool SampleSSD::build()
{
    initLibNvInferPlugins(&sample::gLogger.getTRTLogger(), "");

    auto builder = SampleUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
    if (!builder)
    {
        return false;
    }

    auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetwork());
    if (!network)
    {
        return false;
    }

    auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
    if (!config)
    {
        return false;
    }

    auto parser = SampleUniquePtr<nvcaffeparser1::ICaffeParser>(nvcaffeparser1::createCaffeParser());
    if (!parser)
    {
        return false;
    }

    auto constructed = constructNetwork(builder, network, config, parser);
    if (!constructed)
    {
        return false;
    }

    assert(network->getNbInputs() == 1);
    mInputDims = network->getInput(0)->getDimensions();
    assert(mInputDims.nbDims == 3);

    return true;
}
bool SampleSSD::constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder,
    SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
    SampleUniquePtr<nvcaffeparser1::ICaffeParser>& parser)
{
    const nvcaffeparser1::IBlobNameToTensor* blobNameToTensor
        = parser->parse(locateFile(mParams.prototxtFileName, mParams.dataDirs).c_str(),
            locateFile(mParams.weightsFileName, mParams.dataDirs).c_str(), *network, DataType::kFLOAT);

    for (auto& s : mParams.outputTensorNames)
    {
        network->markOutput(*blobNameToTensor->find(s.c_str()));
    }

    builder->setMaxBatchSize(mParams.batchSize);
    config->setMaxWorkspaceSize(36_MiB);
    if (mParams.fp16)
    {
        config->setFlag(BuilderFlag::kFP16);
    }

    // Calibrator life time needs to last until after the engine is built.
    std::unique_ptr<IInt8Calibrator> calibrator;

    if (mParams.int8)
    {
        sample::gLogInfo << "Using Entropy Calibrator 2" << std::endl;
        BatchStream calibrationStream(
            mParams.batchSize, mParams.nbCalBatches, mParams.calibrationBatches, mParams.dataDirs);
        calibrator.reset(
            new Int8EntropyCalibrator2<BatchStream>(calibrationStream, 0, "SSD", mParams.inputTensorNames[0].c_str()));
        config->setFlag(BuilderFlag::kINT8);
        config->setInt8Calibrator(calibrator.get());
    }

    samplesCommon::enableDLA(builder.get(), config.get(), mParams.dlaCore);
    mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(
        builder->buildEngineWithConfig(*network, *config), samplesCommon::InferDeleter());
    if (!mEngine)
    {
        return false;
    }

    return true;
}
bool SampleSSD::infer()
{
    // Create RAII buffer manager object
    samplesCommon::BufferManager buffers(mEngine, mParams.batchSize);

    auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
    if (!context)
    {
        return false;
    }

    // Read the input data into the managed buffers
    assert(mParams.inputTensorNames.size() == 1);
    if (!processInput(buffers))
    {
        return false;
    }

    // Memcpy from host input buffers to device input buffers
    buffers.copyInputToDevice();

    bool status = context->execute(mParams.batchSize, buffers.getDeviceBindings().data());
    if (!status)
    {
        return false;
    }

    // Memcpy from device output buffers to host output buffers
    buffers.copyOutputToHost();

    // Post-process detections and verify results
    if (!verifyOutput(buffers))
    {
        return false;
    }

    return true;
}
bool SampleSSD::teardown()
{
    //! Clean up the libprotobuf files as the parsing is complete
    //! \note It is not safe to use any other part of the protocol buffers library after
    //! ShutdownProtobufLibrary() has been called.
    nvcaffeparser1::shutdownProtobufLibrary();
    return true;
}
bool SampleSSD::processInput(const samplesCommon::BufferManager& buffers)
{
    const int inputC = mInputDims.d[0];
    const int inputH = mInputDims.d[1];
    const int inputW = mInputDims.d[2];
    const int batchSize = mParams.batchSize;

    // Available images
    std::vector<std::string> imageList = {"bus.ppm"};
    mPPMs.resize(batchSize);
    assert(mPPMs.size() <= imageList.size());
    for (int i = 0; i < batchSize; ++i)
    {
        readPPMFile(locateFile(imageList[i], mParams.dataDirs), mPPMs[i]);
    }

    // Fill data buffer
    float* hostDataBuffer = static_cast<float*>(buffers.getHostBuffer("data"));
    float pixelMean[3]{104.0f, 117.0f, 123.0f}; // In BGR order
    // Host memory for input buffer
    for (int i = 0, volImg = inputC * inputH * inputW; i < mParams.batchSize; ++i)
    {
        for (int c = 0; c < inputC; ++c)
        {
            // The color image to input should be in BGR order
            for (unsigned j = 0, volChl = inputH * inputW; j < volChl; ++j)
            {
                hostDataBuffer[i * volImg + c * volChl + j] = float(mPPMs[i].buffer[j * inputC + 2 - c]) - pixelMean[c];
            }
        }
    }

    return true;
}
bool SampleSSD::verifyOutput(const samplesCommon::BufferManager& buffers)
{
    const int inputH = mInputDims.d[1];
    const int inputW = mInputDims.d[2];
    const int batchSize = mParams.batchSize;
    const int keepTopK = mParams.keepTopK;
    const float visualThreshold = mParams.visualThreshold;
    const int outputClsSize = mParams.outputClsSize;

    const float* detectionOut = static_cast<const float*>(buffers.getHostBuffer("detection_out"));
    const int* keepCount = static_cast<const int*>(buffers.getHostBuffer("keep_count"));

    const std::vector<std::string> classes{"background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car",
        "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa",
        "train", "tvmonitor"}; // List of class labels

    bool pass = true;

    for (int p = 0; p < batchSize; ++p)
    {
        int numDetections = 0;
        // is there at least one correct detection?
        bool correctDetection = false;
        for (int i = 0; i < keepCount[p]; ++i)
        {
            const float* det = detectionOut + (p * keepTopK + i) * 7;
            if (det[2] < visualThreshold)
            {
                continue;
            }
            assert((int) det[1] < outputClsSize);
            std::string storeName = classes[(int) det[1]] + "-" + std::to_string(det[2]) + ".ppm";

            numDetections++;
            if (classes[(int) det[1]] == "car")
            {
                correctDetection = true;
            }

            sample::gLogInfo << " Image name:" << mPPMs[p].fileName.c_str()
                             << ", Label: " << classes[(int) det[1]].c_str() << ","
                             << " confidence: " << det[2] * 100.f << " xmin: " << det[3] * inputW
                             << " ymin: " << det[4] * inputH << " xmax: " << det[5] * inputW
                             << " ymax: " << det[6] * inputH << std::endl;

            samplesCommon::writePPMFileWithBBox(
                storeName, mPPMs[p], {det[3] * inputW, det[4] * inputH, det[5] * inputW, det[6] * inputH});
        }
        pass &= numDetections >= 1;
        pass &= correctDetection;
    }

    return pass;
}
SampleSSDParams initializeSampleParams(const samplesCommon::Args& args)
{
    SampleSSDParams params;
    if (args.dataDirs.empty()) //!< Use default directories if user hasn't provided directory paths
    {
        params.dataDirs.push_back("data/");
    }
    else //!< Use the data directory provided by the user
    {
        params.dataDirs = args.dataDirs;
    }
    params.prototxtFileName = "ssd.prototxt";
    params.weightsFileName = "VGG_VOC0712_SSD_300x300_iter_120000.caffemodel";
    params.inputTensorNames.push_back("data");
    params.batchSize = 1;
    params.outputTensorNames.push_back("detection_out");
    params.outputTensorNames.push_back("keep_count");
    params.dlaCore = args.useDLACore;
    params.int8 = args.runInInt8;
    params.fp16 = args.runInFp16;

    params.outputClsSize = 21;
    params.keepTopK = 200; // Number of total bboxes to be kept per image after NMS step. It is same as
                           // detection_output_param.keep_top_k in prototxt file
    params.nbCalBatches = 50;
    params.visualThreshold = 0.6f;
    params.calibrationBatches = "batches/batch_calibration";

    return params;
}
void printHelpInfo()
{
    std::cout << "Usage: ./sample_ssd [-h or --help] [-d or --datadir=<path to data directory>] [--useDLACore=<int>]"
              << std::endl;
    std::cout << "--help          Display help information" << std::endl;
    std::cout << "--datadir       Specify path to a data directory, overriding the default. This option can be used "
                 "multiple times to add multiple directories. If no data directories are given, the default is to use "
                 "data/samples/ssd/ and data/ssd/"
              << std::endl;
    std::cout << "--useDLACore=N  Specify a DLA engine for layers that support DLA. Value can range from 0 to n-1, "
                 "where n is the number of DLA engines on the platform."
              << std::endl;
    std::cout << "--fp16          Specify to run in fp16 mode." << std::endl;
    std::cout << "--int8          Specify to run in int8 mode." << std::endl;
}

int main(int argc, char** argv)
{
    samplesCommon::Args args;
    bool argsOK = samplesCommon::parseArgs(args, argc, argv);
    if (!argsOK)
    {
        sample::gLogError << "Invalid arguments" << std::endl;
        printHelpInfo();
        return EXIT_FAILURE;
    }
    if (args.help)
    {
        printHelpInfo();
        return EXIT_SUCCESS;
    }

    auto sampleTest = sample::gLogger.defineTest(gSampleName, argc, argv);

    sample::gLogger.reportTestStart(sampleTest);

    SampleSSD sample(initializeSampleParams(args));

    sample::gLogInfo << "Building and running a GPU inference engine for SSD" << std::endl;

    if (!sample.build())
    {
        return sample::gLogger.reportFail(sampleTest);
    }
    if (!sample.infer())
    {
        return sample::gLogger.reportFail(sampleTest);
    }
    if (!sample.teardown())
    {
        return sample::gLogger.reportFail(sampleTest);
    }

    return sample::gLogger.reportPass(sampleTest);
}

谢欣燕

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
ssd_learning

1 batch数据准备$TRT_SOURCE/samples/opensource/sampleSSD/PrepareINT8CalibrationBatches.sh2 model 准备1.Download models_VGGNet_VOC0712_SSD_300x300.tar.gz.2.Check the MD5 hash of it is 9a795fc161fff2e8f3aed07f4d488faf.md5sum models_VGGNet_VOC0712_SSD_300x300.t
复制链接

扫一扫