如何调用一个具有动态维度的tensorrt engine

最新推荐文章于 2024-05-26 18:36:03 发布

Arnold-FY-Chen

最新推荐文章于 2024-05-26 18:36:03 发布

阅读量9.2k

点赞数 12

分类专栏：深度学习 NVIDIA 文章标签：深度学习人工智能 TensorRT nvidia

本文链接：https://blog.csdn.net/XCCCCZ/article/details/123009816

版权

深度学习同时被 2 个专栏收录

38 篇文章 13 订阅

订阅专栏

NVIDIA

15 篇文章 5 订阅

订阅专栏

调用具体动态维度的模型engine时如果没有指定维度，会导致报类似这样的错误：

[TRT] Parameter check failed at: engine.cpp::resolveslots::1227, condition: allInputDimensionsSpecified(routine)

在python代码里，在调用engine推理前做这样的设置即可:

context.set_binding_shape(0, (BATCH, 3, INPUT_H, INPUT_W))

在C++代码里改如何设置，很少有文章提及，关于如何调用有动态维度的模型，一般都是举的python代码的例子，我查了一下TensorRT的头文件NvInferRuntime.h里的代码才知道，C++代码里应该调用IExecutionContext类型的实例的setBindingDimensions(int bindingIndex, Dims dimensions)方法。

总体思路是：拿到一个对维度未知的模型engine文件后，首先读入文件内容并做deserialize获得engine:

ARNet::ARNet(std::string engine_file,
                         std::string shape_file, std::string input_name,
                         std::string output_name)
    : mEngine(nullptr) {
  samplesCommon::OnnxSampleParams params;
  params.inputTensorNames.push_back(input_name.c_str());
  params.outputTensorNames.push_back(output_name.c_str());
  params.int8 = false;
  params.fp16 = true;
  mParams = params;
  std::string se_path = engine_file;  //"arnet_b1_fp16.engine";
  if (access(se_path.c_str(), 4) != -1) {
    nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(gLogger);
    std::ifstream fin(se_path);
    std::string cached_engine = "";
    while (fin.peek() != EOF) {
      std::stringstream buffer;
      buffer << fin.rdbuf();
      cached_engine.append(buffer.str());
    }
    fin.close();
    mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(
        runtime->deserializeCudaEngine(cached_engine.data(),
                                       cached_engine.size(), nullptr),
        samplesCommon::InferDeleter());
    if(!mEngine){
      std::cout <<"Deserialize from "<< se_path <<" failed!!! rebuild enigne ..." << std::endl;
      build();
      return;
    }
...

然后调用getBindingDimensions()查看engine的输入输出维度(如果知道维度就不用):

for (int i = 0; i < mEngine->getNbBindings(); i++)
{
     nvinfer1::Dims dims = mEngine->getBindingDimensions(i);
     printf("index %d, dims: (");
     for (int d = 0; d < dims.nbDims; d ++)
     {
         if (d < dims.nbDims -1) 
             printf("%d,", dims.d[d]);
         else
             printf("%d", dims.d[d]);
     }
     printf(")\n");
}

在调用context->executeV2()做推理前把维度值为-1的动态维度值替换成具体的维度并调用context->setBindingDimensions()设置具体维度，然后在数据填入input buffer准备好后调用context->executeV2()做推理即可:

samplesCommon::BufferManager buffers(mEngine);
  ...

  auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(
      mEngine->createExecutionContext());
  if (!context) {
    return -1;
  }

  context->setOptimizationProfile(0);

  nvinfer1::Dims dims5; 
  dims5.d[0] = 1;    // replace dynamic batch size with 1
  dims5.d[1] = mInputDims.d[1];
  dims5.d[2] = mInputDims.d[2];
  dims5.d[3] = mInputDims.d[3];
  dims5.d[4] = mInputDims.d[4];
  dims5.nbDims = 5;
  context->setBindingDimensions(0, dims5);

  ...

}


int ARNet::infer(nvinfer1::IExecutionContext* context, samplesCommon::BufferManager& buffers, vector<cv::Mat>* p_cvImgs, int nClasses) {
  
  ...

  if (!processInput(buffers, p_cvImgs)) {
    return -2;
  }
  buffers.copyInputToDevice();

  bool status = context->executeV2(buffers.getDeviceBindings().data());
  if (!status) {
    return -3;
  }

  buffers.copyOutputToHost();
  vector<float> result = processOutput(buffers,nClasses);
  ...

}

关于getBindingDimensions()和setBindingDimensions()等重要的API的说明参见/usr/include/aarch64-linux-gnu/NvInferRuntime.h里的相关代码:

class ICudaEngine
{
public:
    //!
    //! \brief Get the number of binding indices.
    //!
    //! There are separate binding indices for each optimization profile.
    //! This method returns the total over all profiles.
    //! If the engine has been built for K profiles, the first getNbBindings() / K bindings are used by profile
    //! number 0, the following getNbBindings() / K bindings are used by profile number 1 etc.
    //!
    //! \see getBindingIndex();
    //!
    virtual int getNbBindings() const noexcept = 0;

    //!
    //! \brief Retrieve the binding index for a named tensor.
    //!
    //! IExecutionContext::enqueue() and IExecutionContext::execute() require an array of buffers.
    //!
    //! Engine bindings map from tensor names to indices in this array.
    //! Binding indices are assigned at engine build time, and take values in the range [0 ... n-1] where n is the total number of inputs and outputs.
    //!
    //! To get the binding index of the name in an optimization profile with index k > 0,
    //! mangle the name by appending " [profile k]", as described for method getBindingName().
    //!
    //! \param name The tensor name.
    //! \return The binding index for the named tensor, or -1 if the name is not found.
    //!
    //! \see getNbBindings() getBindingName()
    //!
    virtual int getBindingIndex(const char* name) const noexcept = 0;

    //!
    //! \brief Retrieve the name corresponding to a binding index.
    //!
    //! This is the reverse mapping to that provided by getBindingIndex().
    //!
    //! For optimization profiles with an index k > 0, the name is mangled by appending
    //! " [profile k]", with k written in decimal.  For example, if the tensor in the
    //! INetworkDefinition had the name "foo", and bindingIndex refers to that tensor in the
    //! optimization profile with index 3, getBindingName returns "foo [profile 3]".
    //!
    //! \param bindingIndex The binding index.
    //! \return The name corresponding to the index, or nullptr if the index is out of range.
    //!
    //! \see getBindingIndex()
    //!
    virtual const char* getBindingName(int bindingIndex) const noexcept = 0;
    //!
    //! \brief Determine whether a binding is an input binding.
    //!
    //! \param bindingIndex The binding index.
    //! \return True if the index corresponds to an input binding and the index is in range.
    //!
    //! \see getBindingIndex()
    //!
    virtual bool bindingIsInput(int bindingIndex) const noexcept = 0;

    //!
    //! \brief Get the dimensions of a binding.
    //!
    //! \param bindingIndex The binding index.
    //! \return The dimensions of the binding if the index is in range, otherwise Dims().
    //!         Has -1 for any dimension that varies within the optimization profile.
    //!
    //! For example, suppose an INetworkDefinition has an input with shape [-1,-1]
    //! that becomes a binding b in the engine.  If the associated optimization profile
    //! specifies that b has minimum dimensions as [6,9] and maximum dimensions [7,9],
    //! getBindingDimensions(b) returns [-1,9], despite the second dimension being
    //! dynamic in the INetworkDefinition.
    //!
    //! Because each optimization profile has separate bindings, the returned value can
    //! differ across profiles. Consider another binding b' for the same network input,
    //! but for another optimization profile.  If that other profile specifies minimum
    //! dimensions [5,8] and maximum dimensions [5,9], getBindingDimensions(b') returns [5,-1].
    //!
    //! \see getBindingIndex()
    //!
    virtual Dims getBindingDimensions(int bindingIndex) const noexcept = 0;

    //!
    //! \brief Determine the required data type for a buffer from its binding index.
    //!
    //! \param bindingIndex The binding index.
    //! \return The type of the data in the buffer.
    //!
    //! \see getBindingIndex()
    //!
    virtual DataType getBindingDataType(int bindingIndex) const noexcept = 0;
...

class IExecutionContext
{
public:
    //!
    //! \brief Synchronously execute inference on a batch.
    //!
    //! This method requires an array of input and output buffers. The mapping from tensor names to indices can be queried using ICudaEngine::getBindingIndex()
    //! \param batchSize The batch size. This is at most the value supplied when the engine was built.
    //! \param bindings An array of pointers to input and output buffers for the network.
    //!
    //! \return True if execution succeeded.
    //!
    //! \see ICudaEngine::getBindingIndex() ICudaEngine::getMaxBatchSize()
    //!
    virtual bool execute(int batchSize, void** bindings) noexcept = 0;

    //!
    //! \brief Asynchronously execute inference on a batch.
    //!
    //! This method requires an array of input and output buffers. The mapping from tensor names to indices can be queried using ICudaEngine::getBindingIndex()
    //! \param batchSize The batch size. This is at most the value supplied when the engine was built.
    //! \param bindings An array of pointers to input and output buffers for the network.
    //! \param stream A cuda stream on which the inference kernels will be enqueued
    //! \param inputConsumed An optional event which will be signaled when the input buffers can be refilled with new data
    //!
    //! \return True if the kernels were enqueued successfully.
    //!
    //! \see ICudaEngine::getBindingIndex() ICudaEngine::getMaxBatchSize()
    //!
    virtual bool enqueue(int batchSize, void** bindings, cudaStream_t stream, cudaEvent_t* inputConsumed) noexcept = 0;
...
    //!
    //! \brief Select an optimization profile for the current context.
    //!
    //! \param profileIndex Index of the profile. It must lie between 0 and
    //!        getEngine().getNbOptimizationProfiles() - 1
    //!
    //! The selected profile will be used in subsequent calls to execute() or enqueue().
    //!
    //! If the associated CUDA engine has dynamic inputs, this method must be called at least once
    //! with a unique profileIndex before calling execute or enqueue (i.e. the profile index
    //! may not be in use by another execution context that has not been destroyed yet).
    //! For the first execution context that is created for an engine, setOptimizationProfile(0)
    //! is called implicitly.
    //!
    //! If the associated CUDA engine does not have inputs with dynamic shapes, this method need not be
    //! called, in which case the default profile index of 0 will be used (this is particularly
    //! the case for all safe engines).
    //!
    //! setOptimizationProfile() must be called before calling setBindingDimensions() and
    //! setInputShapeBinding() for all dynamic input tensors or input shape tensors, which in
    //! turn must be called before either execute() or enqueue().
    //!
    //! \return true if the call succeeded, else false (e.g. input out of range)
    //!
    //! \see ICudaEngine::getNbOptimizationProfiles()
    virtual bool setOptimizationProfile(int profileIndex) noexcept = 0;
...
    //!
    //! \brief Set the dynamic dimensions of a binding
    //!
    //! Requires the engine to be built without an implicit batch dimension.
    //! The binding must be an input tensor, and all dimensions must be compatible with
    //! the network definition (i.e. only the wildcard dimension -1 can be replaced with a
    //! new dimension > 0). Furthermore, the dimensions must be in the valid range for the
    //! currently selected optimization profile, and the corresponding engine must not be
    //! safety-certified.
    //!
    //! This method will fail unless a valid optimization profile is defined for the current
    //! execution context (getOptimizationProfile() must not be -1).
    //!
    //! For all dynamic non-output bindings (which have at least one wildcard dimension of -1),
    //! this method needs to be called before either enqueue() or execute() may be called.
    //! This can be checked using the method allInputDimensionsSpecified().
    //!
    //! \return false if an error occurs (e.g. index out of range), else true
    //!
    //! \see ICudaEngine::getBindingIndex
    //!
    virtual bool setBindingDimensions(int bindingIndex, Dims dimensions) noexcept = 0;
...

Arnold-FY-Chen

关注

12
点赞
踩
20

收藏

觉得还不错? 一键收藏
打赏
5
评论
如何调用一个具有动态维度的tensorrt engine

调用具体动态维度的模型engine时如果没有指定维度，会导致报类似这样的错误：[TRT] Parameter check failed at: engine.cpp::resolveslots::1227, condition: allInputDimensionsSpecified(routine)在python代码里，在调用engine推理前做这样的设置即可:context.set_binding_shape(0, (BATCH, 3, INPUT_H, INPUT_W))在C++代码里
复制链接

扫一扫