以lenet5网络为例(2个卷积 + 3个全连接)。
1.首先要生成builder,再生成network,用network搭建网络结构。
static Logger gLogger; IBuilder* builder = createInferBuilder(gLogger); // kEXPLICIT_BATCH = 0, //!< Mark the network to be an explicit batch network // With dynamic shapes, any of the input dimensions can vary at run-time, // kEXPLICIT_PRECISION = 1, //!< Mark the network to be an explicit precision network //! Setting the network to be an explicit precision network has the following implications: //! 1) Precision of all input tensors to the network have to be specified with ITensor::setType() function //! 2) Precision of all layer output tensors in the network have to be specified using ILayer::setOutputType() //! function //! 3) The builder will not quantize the weights of any layer including those running in lower precision(INT8). It //! will //! simply cast the weights into the required precision. //! 4) Dynamic ranges must not be provided to run the network in int8 mode. Dynamic ranges of each tensor in the //! explicit //! precision network is [-127,127]. //! 5) Quantizing and dequantizing activation values between higher (FP32) and lower (INT8) precision //! will be performed using explicit Scale layers with input/output precision set appropriately. INetworkDefinition* network = builder->createNetworkV2(0U);
2.输入层 addInput
// const char* INPUT_BLOB_NAME = "data";
// DataType dt = DataType::kFLOAT
// Create input tensor of shape { 1, 32, 32 } with name INPUT_BLOB_NAME
//! \param name The name of the tensor.
//! \param type The type of the data held in the tensor.
//! \param dimensions The dimensions of the tensor.
//! \return The new tensor or nullptr if there is an error.
ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{1, INPUT_H, INPUT_W});
assert(data);
// 或者
// auto data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{-1, 1, INPUT_H, INPUT_W});
3.创建卷积层 addConvolutionNd
// Add convolution layer with 6 outputs and a 5x5 filter.
// virtual IConvolutionLayer* addConvolutionNd(ITensor& input, int32_t nbOutputMaps, Dims kernelSize, Weights kernelWeights, Weights biasWeights) TRTNOEXCEPT = 0;
//! \param input The input tensor to the convolution.
//! \param nbOutputMaps The number of output feature maps for the convolution.
//! \param kernelSize The multi-dimensions of the convolution kernel.
//! \param kernelWeights The kernel weights for the convolution.
//! \param biasWeights The optional bias weights for the convolution.
//! \return The new convolution layer, or nullptr if it could not be created.
// 输入tensor是*data, 输出6通道,kernel大小是5x5,后面是权重和偏置的值。
IConvolutionLayer* conv1 = network->addConvolutionNd(*data, 6, DimsHW{5, 5}, weightMap["conv1.weight"], weightMap["conv1.bias"]);
assert(conv1);
conv1->setStrideNd(DimsHW{1, 1});
// 或者
auto conv1 = network->addConvolution(*data->getOutput(0), 6, DimsHW{5, 5}, weightMap["conv1.weight"], weightMap["conv1.bias"]);
4.激活层 addActivation
// Add activation layer using the ReLU algorithm.
//! \param input The input tensor to the layer.
//! \param type The type of activation function to apply.
IActivationLayer* relu1 = network->addActivation(*conv1->getOutput(0), ActivationType::kRELU);
assert(relu1);
5.池化层addPoolingNd
// Add max pooling layer with stride of 2x2 and kernel size of 2x2.
IPoolingLayer* pool1 = network->addPoolingNd(*relu1->getOutput(0), PoolingType::kAVERAGE, DimsHW{2, 2});
assert(pool1);
pool1->setStrideNd(DimsHW{2, 2});
6.全连接层 addFullyConnected
IFullyConnectedLayer* fc1 = network->addFullyConnected(*pool2->getOutput(0), 120, weightMap["fc1.weight"], weightMap["fc1.bias"]);
assert(fc1);
7.Softmax函数
// Add softmax layer to determine the probability.
ISoftMaxLayer* prob = network->addSoftMax(*fc3->getOutput(0));
assert(prob);
prob->getOutput(0)->setName(OUTPUT_BLOB_NAME); // Set the tensor name.
network->markOutput(*prob->getOutput(0)); // Mark a tensor as a network output.
lenet5完整网络结构代码
// Creat the engine using only the API and not any parser.
ICudaEngine* createLenetEngine(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config, DataType dt)
{
// Mark the network to be an explicit batch network
INetworkDefinition* network = builder->createNetworkV2(0U);
// Create input tensor of shape { 1, 32, 32 } with name INPUT_BLOB_NAME
ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{1, INPUT_H, INPUT_W});
assert(data);
// Add convolution layer with 6 outputs and a 5x5 filter.
std::map<std::string, Weights> weightMap = loadWeights("../lenet5.wts");
IConvolutionLayer* conv1 = network->addConvolutionNd(*data, 6, DimsHW{5, 5}, weightMap["conv1.weight"], weightMap["conv1.bias"]);
assert(conv1);
conv1->setStrideNd(DimsHW{1, 1});
// Add activation layer using the ReLU algorithm.
IActivationLayer* relu1 = network->addActivation(*conv1->getOutput(0), ActivationType::kRELU);
assert(relu1);
// Add max pooling layer with stride of 2x2 and kernel size of 2x2.
IPoolingLayer* pool1 = network->addPoolingNd(*relu1->getOutput(0), PoolingType::kAVERAGE, DimsHW{2, 2});
assert(pool1);
pool1->setStrideNd(DimsHW{2, 2});
// Add second convolution layer with 16 outputs and a 5x5 filter.
IConvolutionLayer* conv2 = network->addConvolutionNd(*pool1->getOutput(0), 16, DimsHW{5, 5}, weightMap["conv2.weight"], weightMap["conv2.bias"]);
assert(conv2);
conv2->setStrideNd(DimsHW{1, 1});
// Add activation layer using the ReLU algorithm.
IActivationLayer* relu2 = network->addActivation(*conv2->getOutput(0), ActivationType::kRELU);
assert(relu2);
// Add second max pooling layer with stride of 2x2 and kernel size of 2x2>
IPoolingLayer* pool2 = network->addPoolingNd(*relu2->getOutput(0), PoolingType::kAVERAGE, DimsHW{2, 2});
assert(pool2);
pool2->setStrideNd(DimsHW{2, 2});
// Add fully connected layer
IFullyConnectedLayer* fc1 = network->addFullyConnected(*pool2->getOutput(0), 120, weightMap["fc1.weight"], weightMap["fc1.bias"]);
assert(fc1);
// Add activation layer using the ReLU algorithm.
IActivationLayer* relu3 = network->addActivation(*fc1->getOutput(0), ActivationType::kRELU);
assert(relu3);
// Add second fully connected layer
IFullyConnectedLayer* fc2 = network->addFullyConnected(*relu3->getOutput(0), 84, weightMap["fc2.weight"], weightMap["fc2.bias"]);
assert(fc2);
// Add activation layer using the ReLU algorithm.
IActivationLayer* relu4 = network->addActivation(*fc2->getOutput(0), ActivationType::kRELU);
assert(relu4);
// Add third fully connected layer
IFullyConnectedLayer* fc3 = network->addFullyConnected(*relu4->getOutput(0), OUTPUT_SIZE, weightMap["fc3.weight"], weightMap["fc3.bias"]);
assert(fc3);
// Add softmax layer to determine the probability.
ISoftMaxLayer* prob = network->addSoftMax(*fc3->getOutput(0));
assert(prob);
prob->getOutput(0)->setName(OUTPUT_BLOB_NAME);
network->markOutput(*prob->getOutput(0));
// Build engine
builder->setMaxBatchSize(maxBatchSize);
config->setMaxWorkspaceSize(1 << 20);
ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
// Don't need the network any more
network->destroy();
// Release host memory
for (auto& mem : weightMap)
{
free((void*) (mem.second.values));
}
return engine;
}