TensorRTX 生成Engine笔记 Alexnet

11 篇文章 0 订阅

A

本文为https://github.com/wang-xinyu/tensorrtx/tree/master/alexnet笔记
当所有plugin都有tensorrt api支持时,不需要用cuda自己写plugin,否则需要

include

NvInfer.h
用于Engine构造和使用所必须要的API
官网提供的c++API里面 Nvinfer.h

cuda_runtime_api.h
用于cudarun必须要的API

logging.h
顾名思义,这是日志组件,用于管理builder, engine 和 runtime 的日志信息。
通常来说,logger 会作为一个必须的参数传递给 builder runtime parser的实例化接口。
该类为 TensorRT 工具和示例提供了一个通用接口来将信息记录到控制台,并支持记录两种类型的消息:

  • 具有相关严重性(信息、警告、错误或内部错误/致命)的调试消息
  • 测试通过/失败消息
    与直接发送到 stdout/stderr 相比,让所有样本都使用此类进行日志记录的优势在于,控制样本输出的详细程度和格式的逻辑集中在一个位置。
    将来,可以扩展此类以支持将测试结果转储到某种标准格式(例如,JUnit XML)的文件中,并提供额外的元数据(例如,对测试运行的持续时间进行计时)。

Alexnet-python

class alexnet(nn.Module):
    
    def __init__(self, num_classes=1000):
        super(alexnet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, stride=1, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True)  ,  
            nn.MaxPool2d(kernel_size=3, stride=2)
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6,6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(6*6*256, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes)
        )
    
    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        out = self.classifier(x)
        return out 

构建Engine

官方API构造步骤

在这里插入图片描述在这里插入图片描述

Code

part1 Main

 size_t size{0};

 if (std::string(argv[1]) == "-s") {
     IHostMemory* modelStream{nullptr};
     APIToModel(1, &modelStream);
     assert(modelStream != nullptr);

     std::ofstream p("alexnet.engine", std::ios::binary);
     if (!p)
     {
         std::cerr << "could not open plan output file" << std::endl;
         return -1;
     }
     p.write(reinterpret_cast<const char*>(modelStream->data()), modelStream->size());
     modelStream->destroy();
     return 1;

size_t size{0};

C/C++标准在stddef.h中定义的,size_t 类型表示C中任何对象所能达到的最大长度,它是无符号整数。

IHostMemory modelStream{nullptr};

https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/classnvinfer1_1_1_i_host_memory.html
https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/FoundationalTypes/HostMemory.html
申请enginestream内存
> 在这里插入图片描述

std::ofstream p(“alexnet.engine”, std::ios::binary) ;

二进制形式打开alexnet.engine

reinterpret_cast<const char>(modelStream->data())

强制转换为const char*用于写入二进制文件alexnet.engine
在这里插入图片描述

p.write(reinterpret_cast<const char>(modelStream->data()), modelStream->size());

ostream& write(const char * buffer, int len); 参数解释:字符指针 buffer 指向内存中一段存储空间。len 是读写的字节数。

part2 APIToModel(1, &modelStream);

void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream)
{
    // Create builder
    IBuilder* builder = createInferBuilder(gLogger);
    IBuilderConfig* config = builder->createBuilderConfig();

    // Create model to populate the network, then set the outputs and create an engine
    ICudaEngine* engine = createEngine(maxBatchSize, builder, config, DataType::kFLOAT);
    assert(engine != nullptr);

    // Serialize the engine
    (*modelStream) = engine->serialize();

    // Close everything down
    engine->destroy();
    builder->destroy();
}

IBuilder builder = createInferBuilder(gLogger);

必须要的 我也不懂
在这里插入图片描述

IBuilderConfig config = builder->createBuilderConfig();

必须要的 我也不懂
在这里插入图片描述

(*modelStream) = engine->serialize();

IHostMemory** modelStream 传入的是双重指针,所以这里这么用
在这里插入图片描述

part3 CreateEngine

ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config, DataType dt)
{
    INetworkDefinition* network = builder->createNetworkV2(0U);

    // Create input tensor of shape { 1, 1, 32, 32 } with name INPUT_BLOB_NAME
    ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{3, INPUT_H, INPUT_W});
    assert(data);

    std::map<std::string, Weights> weightMap = loadWeights("../alexnet.wts");
    Weights emptywts{DataType::kFLOAT, nullptr, 0};

    IConvolutionLayer* conv1 = network->addConvolutionNd(*data, 64, DimsHW{11, 11}, weightMap["features.0.weight"], weightMap["features.0.bias"]);
    assert(conv1);
    conv1->setStrideNd(DimsHW{4, 4});
    conv1->setPaddingNd(DimsHW{2, 2});

    // Add activation layer using the ReLU algorithm.
    IActivationLayer* relu1 = network->addActivation(*conv1->getOutput(0), ActivationType::kRELU);
    assert(relu1);

    // Add max pooling layer with stride of 2x2 and kernel size of 2x2.
    IPoolingLayer* pool1 = network->addPoolingNd(*relu1->getOutput(0), PoolingType::kMAX, DimsHW{3, 3});
    assert(pool1);
    pool1->setStrideNd(DimsHW{2, 2});

    IConvolutionLayer* conv2 = network->addConvolutionNd(*pool1->getOutput(0), 192, DimsHW{5, 5}, weightMap["features.3.weight"], weightMap["features.3.bias"]);
    assert(conv2);
    conv2->setPaddingNd(DimsHW{2, 2});
    IActivationLayer* relu2 = network->addActivation(*conv2->getOutput(0), ActivationType::kRELU);
    assert(relu2);
    IPoolingLayer* pool2 = network->addPoolingNd(*relu2->getOutput(0), PoolingType::kMAX, DimsHW{3, 3});
    assert(pool2);
    pool2->setStrideNd(DimsHW{2, 2});

    IConvolutionLayer* conv3 = network->addConvolutionNd(*pool2->getOutput(0), 384, DimsHW{3, 3}, weightMap["features.6.weight"], weightMap["features.6.bias"]);
    assert(conv3);
    conv3->setPaddingNd(DimsHW{1, 1});
    IActivationLayer* relu3 = network->addActivation(*conv3->getOutput(0), ActivationType::kRELU);
    assert(relu3);

    IConvolutionLayer* conv4 = network->addConvolutionNd(*relu3->getOutput(0), 256, DimsHW{3, 3}, weightMap["features.8.weight"], weightMap["features.8.bias"]);
    assert(conv4);
    conv4->setPaddingNd(DimsHW{1, 1});
    IActivationLayer* relu4 = network->addActivation(*conv4->getOutput(0), ActivationType::kRELU);
    assert(relu4);

    IConvolutionLayer* conv5 = network->addConvolutionNd(*relu4->getOutput(0), 256, DimsHW{3, 3}, weightMap["features.10.weight"], weightMap["features.10.bias"]);
    assert(conv5);
    conv5->setPaddingNd(DimsHW{1, 1});
    IActivationLayer* relu5 = network->addActivation(*conv5->getOutput(0), ActivationType::kRELU);
    assert(relu5);
    IPoolingLayer* pool3 = network->addPoolingNd(*relu5->getOutput(0), PoolingType::kMAX, DimsHW{3, 3});
    assert(pool3);
    pool3->setStrideNd(DimsHW{2, 2});

    IFullyConnectedLayer* fc1 = network->addFullyConnected(*pool3->getOutput(0), 4096, weightMap["classifier.1.weight"], weightMap["classifier.1.bias"]);
    assert(fc1);

    IActivationLayer* relu6 = network->addActivation(*fc1->getOutput(0), ActivationType::kRELU);
    assert(relu6);

    IFullyConnectedLayer* fc2 = network->addFullyConnected(*relu6->getOutput(0), 4096, weightMap["classifier.4.weight"], weightMap["classifier.4.bias"]);
    assert(fc2);

    IActivationLayer* relu7 = network->addActivation(*fc2->getOutput(0), ActivationType::kRELU);
    assert(relu7);

    IFullyConnectedLayer* fc3 = network->addFullyConnected(*relu7->getOutput(0), 1000, weightMap["classifier.6.weight"], weightMap["classifier.6.bias"]);
    assert(fc3);

    fc3->getOutput(0)->setName(OUTPUT_BLOB_NAME);
    std::cout << "set name out" << std::endl;
    network->markOutput(*fc3->getOutput(0));

    // Build engine
    builder->setMaxBatchSize(maxBatchSize);
    config->setMaxWorkspaceSize(1 << 20);
    ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
    std::cout << "build out" << std::endl;

    // Don't need the network any more
    network->destroy();

    // Release host memory
    for (auto& mem : weightMap)
    {
        free((void*) (mem.second.values));
    }

    return engine;
}

INetworkDefinition network = builder->createNetworkV2(0U);

INetworkDefinition 网络结构构建器 类似sequential
在这里插入图片描述

后面就是按照wts生成map读进INetworkDefinition network里面了

// Build engine
    builder->setMaxBatchSize(maxBatchSize);
    config->setMaxWorkspaceSize(1 << 20);
    ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
    std::cout << "build out" << std::endl;

    // Don't need the network any more
    network->destroy();
// Release host memory
    for (auto& mem : weightMap)
    {
        free((void*) (mem.second.values));
    }

引用

TensorRT 8.4.1官方API文档 C++
TensorRT 8.4.1官方API文档 python
TensorRT中的一些基本概念
wang-xingyu TensorRT-Alexnet

  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值