使用C++ API添加自定义插件层
添加插件层基本流程
//step1:创建插件creator
auto creator = getPluginRegistry()->getPluginCreator(pluginName, pluginVersion);
//step2:获取插件字段名
const PluginFieldCollection* pluginFC = creator->getFieldNames();
//step3:获取插件元数据
PluginFieldCollection *pluginData = parseAndFillFields(pluginFC, layerFields);
//step4:创建插件
IPluginV2 *pluginObj = creator->createPlugin(layerName, pluginData);
//step5:将插件层添加到网络
auto layer = network.addPluginV2(&inputs[0], int(inputs.size()), pluginObj);
… (build rest of the network and serialize engine)
//释放资源
pluginObj->destroy() //销毁插件对象
… (destroy network, engine, builder)
… (free allocated pluginData)
以下示例代码添加了一个名为 FooPlugin 的新插件:
//创建插件FooPlugin类
class FooPlugin : public IPluginV2IOExt
{
...override all pure virtual methods of IPluginV2IOExt with definitions for your plugin.
Do not override the TRT_DEPRECATED methods.
};
//插件工厂类
class MyPluginFactory : public nvinfer1::IPluginFactory
{
//为你的插件实现所有工厂方法
...implement all factory methods for your plugin
};
示例:使用 C++ 创建 Caffe 中不支持的自定义层
官方实例
https://github.com/NVIDIA/TensorRT/tree/master/samples/opensource/samplePlugin
https://github.com/NVIDIA/TensorRT/tree/master/samples/opensource/sampleFasterRCNN
//创建插件FooPlugin类
class FooPlugin : public IPluginV2IOExt
{
...implement all class methods for your plugin
};
//插件工厂类
class FooPluginFactory : public nvcaffeparser1::IPluginFactoryV2
{
//创建插件
virtual nvinfer1::IPluginV2* createPlugin(...)
{
...create and return plugin object of type FooPlugin
}
//检查是否层名对应着插件
bool isPlugin(const char* name)
{
...check if layer name corresponds to plugin
}
}
//插件FooPluginCreator创建器类
class FooPluginCreator : public IPluginCreator
{
...implement all creator methods here
};
//注册插件
REGISTER_TENSORRT_PLUGIN(FooPluginCreator);
示例:使用 C++ 添加 UFF 中不支持的自定义层
官方示例
https://github.com/NVIDIA/TensorRT/tree/master/samples/opensource/sampleUffSSD
https://docs.nvidia.com/deeplearning/tensorrt/sample-support-guide/index.html#uff_custom_plugin
https://docs.nvidia.com/deeplearning/tensorrt/sample-support-guide/index.html#uff_ssd
首先 调用UFF转换器,通过 -p 指定配置脚本,转换tensorflow模型为UFF模型,生成的模型文件就已经替换了插件节点
convert-to-uff frozen_inference_graph.pb -p config.py -t
config.py
的内容如下,用来预处理运算图
#示例通:过插件实现Relu6操作
#导入运算图医生模块
import graphsurgeon as gs
#创建插件节点
my_relu6 = gs.create_plugin_node(name=”MyRelu6”, op=”Clip_TRT”, clipMin=0.0, clipMax=6.0)
#映射插件到tf_relu6节点
Namespace_plugin_map = { “tf_relu6” : my_relu6 }
#预处理动态图更新插件映射
def preprocess(dynamic_graph):
dynamic_graph.collapse_namespaces(namespace_plugin_map)
示例:使用 C++ 创建具有动态形状支持的自定义层
动态支持必须要用IPluginV2DynamicExt
受动态形状影响的四种方法是
getOutputDimensions
supportsFormatCombination
configurePlugin
enqueue
//创建插件FooPlugin类
class FooPlugin : public IPluginV2DynamicExt
{
...implement all class methods for your plugin
};
//插件工厂类
class FooPluginFactory : public nvcaffeparser1::IPluginFactoryV2
{
//创建插件
virtual nvinfer1::IPluginV2* createPlugin(...)
{
...create and return plugin object of type FooPlugin
}
//检查是否层名对应着插件
bool isPlugin(const char* name)
{
...check if layer name corresponds to plugin
}
}
//插件FooPluginCreator创建器类
class FooPluginCreator : public IPluginCreator
{
...implement all creator methods here
};
//注册插件
REGISTER_TENSORRT_PLUGIN(FooPluginCreator);
//---------------------------------------------以下是动态shape需要的
//BarPlugin插件类
class BarPlugin : public IPluginV2DynamicExt
{
...override virtual methods inherited from IPluginV2DynamicExt.
};
//getOutputDimensions方法
DimsExprs BarPlugin::getOutputDimensions(int outputIndex, const DimsExprs* inputs, int nbInputs, IExprBuilder& exprBuilder)
{
switch (outputIndex)
{
case 0:
{
// First dimension of output is sum of input
// first dimensions.
DimsExprs output(inputs[0]);
output.d[0] =
exprBuilder.operation(DimensionOperation::kSUM,
inputs[0].d[0], inputs[1].d[0]);
return output;
}
case 1:
return inputs[0];
default:
throw std::invalid_argument(“invalid output”);
}
//supportsFormatCombination方法
bool BarPlugin::supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) override
{
assert(0 <= pos && pos < 4);
const auto* in = inOut;
const auto* out = inOut + nbInputs;
switch (pos)
{
case 0: in[0].format == TensorFormat::kLINEAR;
case 1: return in[1].type == in[0].type &&
in[0].format == TensorFormat::kLINEAR;
case 2: return out[0].type == in[0].type &&
out[0].format == TensorFormat::kLINEAR;
case 3: return out[1].type == in[0].type &&
out[1].format == TensorFormat::kLINEAR;
}
throw std::invalid_argument(“invalid connection number”);
}
//configurePlugin方法
void BarPlugin::configurePlugin(const DynamicPluginTensorDesc* in, int nbInputs,
const DynamicPluginTensorDesc* out, int nbOutputs) override
{
}
示例:使用 C++ 添加具有INT8/IO支持的自定义层
为了支持 INT8 I/O,您的插件可以派生自 IPluginV2IOExt
或 IPluginV2DynamicExt
受INT8影响的主要方法
supportsFormatCombination
configurePlugin
enqueue
//创建插件FooPlugin类
class FooPlugin : public IPluginV2DynamicExt
{
...implement all class methods for your plugin
};
//插件工厂类
class FooPluginFactory : public nvcaffeparser1::IPluginFactoryV2
{
//创建插件
virtual nvinfer1::IPluginV2* createPlugin(...)
{
...create and return plugin object of type FooPlugin
}
//检查是否层名对应着插件
bool isPlugin(const char* name)
{
...check if layer name corresponds to plugin
}
}
//插件FooPluginCreator创建器类
class FooPluginCreator : public IPluginCreator
{
...implement all creator methods here
};
//注册插件
REGISTER_TENSORRT_PLUGIN(FooPluginCreator);
//---------------------------------------------以下是INT8/IO需要的
//UffPoolPluginV2 是一个插件,用于演示如何扩展 INT8 I/O 自定义池化层
class UffPoolPluginV2 : public IPluginV2IOExt
{
...override virtual methods inherited from IPluginV2IOExt.
};
//supportsFormatCombination方法
bool UffPoolPluginV2::supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const override
{
assert(nbInputs == 1 && nbOutputs == 1 && pos < nbInputs + nbOutputs);
bool condition = inOut[pos].format == TensorFormat::kLINEAR;
condition &= inOut[pos].type != DataType::kINT32;
condition &= inOut[pos].type == inOut[0].type;
return condition;
}
//configurePlugin方法
void UffPoolPluginV2::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput)
{
...
mPoolingParams.mC = mInputDims.d[0];
mPoolingParams.mH = mInputDims.d[1];
mPoolingParams.mW = mInputDims.d[2];
mPoolingParams.mP = mOutputDims.d[1];
mPoolingParams.mQ = mOutputDims.d[2];
mInHostScale = in[0].scale >= 0.0f ? in[0].scale : -1.0f;
mOutHostScale = out[0].scale >= 0.0f ? out[0].scale : -1.0f;
}
//enqueue方法
int UffPoolPluginV2::enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream)
{
...
CHECK(cudnnPoolingForward(mCudnn, mPoolingDesc, &kONE, mSrcDescriptor, input, &kZERO, mDstDescriptor, output));
...
return 0;
}
示例:使用 C++ API 实现GELU算子
//准备常量值
const float f3 = 3.0f;
const float x3Coeff = 0.044715f;
const float sqrt2OverPi = 0.7978846f;
const float f1 = 1.0f;
const float f05 = 0.5f;
//实现GELU操作
auto dim = nvinfer1::Dims3{1, 1, 1};
// y = x ^ 3
auto c3 = network->addConstant(dim, Weights{DataType::kFLOAT, &f3, 1});
auto pow1 = network->addElementWise(*x->getOutput(0), *c3->getOutput(0), ElementWiseOperation::kPOW);//幂运算
// y = y * 0.044715f
auto cX3Coeff = network->addConstant(dim, Weights{DataType::kFLOAT, &x3Coeff, 1});
auto mul1 = network->addElementWise(*pow1->getOutput(0), *cX3Coeff->getOutput(0), ElementWiseOperation::kPROD);//乘法
// y = y + x
auto add1 = network->addElementWise(*mul1->getOutput(0), *x->getOutput(0), ElementWiseOperation::kSUM);//加法
// y = y * 0.7978846f
auto cSqrt2OverPi = network->addConstant(dim, Weights{DataType::kFLOAT, &sqrt2OverPi, 1});
auto mul2 = network->addElementWise(*add1->getOutput(0), *cSqrt2OverPi->getOutput(0), ElementWiseOperation::kPROD);//乘法
// y = tanh(y)
auto tanh1 = network->addActivation(*mul2->getOutput(0), ActivationType::kTANH);//tanh运算
// y = y + 1
auto c1 = network->addConstant(dim, Weights{DataType::kFLOAT, &f1, 1});
auto add2 = network->addElementWise(*tanh1->getOutput(0), *c1->getOutput(0), ElementWiseOperation::kSUM);//加法
// y = y * 0.5
auto c05 = network->addConstant(dim, Weights{DataType::kFLOAT, &f05, 1});
auto mul3 = network->addElementWise(*add2->getOutput(0), *c05->getOutput(0), ElementWiseOperation::kPROD);//乘法
// y = y * x
auto y = network->addElementWise(*mul3->getOutput(0), *x->getOutput(0), ElementWiseOperation::kPROD);//乘法
示例:创建持久化LSTM插件
//创建creator
auto creator = getPluginRegistry()->getPluginCreator("CgPersistentLSTMPlugin_TRT", "1")
//创建插件
IPluginV2* cgPersistentLSTMPlugin = creator->createPlugin("CgPersistentLSTMPlugin_TRT", &fc);
/*fc 是一个 插件字段元数据,由 4 个参数组成:
hiddenSize: 这是一个 INT32 参数,用于指定 LSTM 的隐藏大小
numLayers: 这是一个 INT32 参数,用于指定 LSTM 中的层数。
bidirectionFactor: 这是一个 INT32 参数,表示 LSTM 是否是双向的。 如果 LSTM 是双向的,则该值应为 设置为 2,否则,该值设置为 1。
setInitialStates: 这是一个 INT32 参数,表示 LSTM 是否有初始状态和单元格值作为输入。 如果设置为 0, 初始状态和单元格值将为零。 为了更好性能,建议使用 此标志而不是提供零状态和单元格值作为输入。
*/
//将插件层添加到网络
auto lstmLayer = network->addPluginV2(&inputs[0], 6, *cgPersistentLSTMPlugin);
/*
input:这些是 LSTM 的输入序列
seqLenTensor: 每个序列的有效长度
weight:
bias:
initial hidden state:
initial cell state:
*/