wts-engine

使用wts构建engine
  • step1 : 构建gLooger

    example:

    class Logger : public ILogger           
    {
        void log(Severity severity, const char* msg) noexcept override
        {
            // suppress info-level messages
            if (severity <= Severity::kWARNING)
                std::cout << msg << std::endl;
        }
    } logger;
    

    Logger用于捕获所有的警告信息

  • step2 构建builder:创建构建器实例

  • step3 构建config,config用于指导构建器如何去优化

  • step4 创建一个空白网络,后面可以使用onnx的内置函数处理onnx文件完成网络的填充,也可以使用wts文件加载权重+自己构建网络的方法进行构建

  • step5 创建一个输入层,约定输入层的大小,输入数据的类型

  • step6 加载权重文件,构建网路、填充网络(onnx方法和wts方法)

  • step7 设置最大的batchsiez,builder->setMaxBatchSize(maxBatchSize)

  • step8 设置工作空间大小

  • step9 创建engine文件

  • step10 序列化engine文件,并且保存序列化文件

  • step 11 关闭所有

// 注意,这个函数并不能够直接使用,这个代码只是为了理清楚逻辑而写,并不能够直接运行
int creatEngine(){
    // 创建 builder, 在这个之前需要将gLogger创建好
    IBuilder* builder = createInferBuilder(gLogger);
    // 创建 config,指定TensorRT的运行模式,怎么进行优化
    IBuilderConfig* config = builder->createBuilderConfig();
    // 创建网络,这里是空白的
    INetworkDefinition* network = builder->createNetworkV2(0U); // 创建一个空白的Network
    // 创建输入层,这里是3通道224x224的图片
    ITensor* data = network->addInput("data", DataType::kFLOAT, Dims3{ 1, 3, 224, 224 });
    assert(data);

    // 加载权重文件
    std::map<std::string, Weights> weightMap = loadWeights("weights.wts"); // 加载权重文件
    /* 构建网络 */
    /*-------------yolov5 backbone--------- */
    // exmaple : 使用内部自带的库进行构建
    /* example
        auto focus0 = focus(network, weightMap, *data, 3, get_width(64, gw), 3, "model.0");
        ILayer* focus(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int inch, int outch, int ksize, std::string lname) {
            ISliceLayer *s1 = network->addSlice(input, Dims3{ 0, 0, 0 }, Dims3{ inch, Yolo::INPUT_H / 2, Yolo::INPUT_W / 2 }, Dims3{ 1, 2, 2 });
            ISliceLayer *s2 = network->addSlice(input, Dims3{ 0, 1, 0 }, Dims3{ inch, Yolo::INPUT_H / 2, Yolo::INPUT_W / 2 }, Dims3{ 1, 2, 2 });
            ISliceLayer *s3 = network->addSlice(input, Dims3{ 0, 0, 1 }, Dims3{ inch, Yolo::INPUT_H / 2, Yolo::INPUT_W / 2 }, Dims3{ 1, 2, 2 });
            ISliceLayer *s4 = network->addSlice(input, Dims3{ 0, 1, 1 }, Dims3{ inch, Yolo::INPUT_H / 2, Yolo::INPUT_W / 2 }, Dims3{ 1, 2, 2 });
            ITensor* inputTensors[] = { s1->getOutput(0), s2->getOutput(0), s3->getOutput(0), s4->getOutput(0) };
            auto cat = network->addConcatenation(inputTensors, 4);
            auto conv = convBlock(network, weightMap, *cat->getOutput(0), outch, ksize, 1, 1, lname + ".conv");
            return conv;
        }
     */
    /*-------------yolov5 head------------- */

    /*-------------yolov5 detect----------- */

    /* build engine */
    // 设置最大batch size
    builder->setMaxBatchSize(maxBatchSize);
    // 设置工作空间大小
    config->setMaxWorkspaceSize(16 * (1 << 20)); // 设置最大工作空间大小为16MB
    config->setFlag(BuilderFlag::kFP16);
    // 构建engine, 这里的config也只是配置了空间大小??
    ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
    assert(engine != nullptr);
    std::cout << "Build engine successfully" << std::endl;

    network->destroy();

    // 序列化模型
    IHostMemory** modelStream;  // 存放序列化后的模型
    (*modleStream) = engine->serialize(); // 序列化模型
    assert((*modelStream) != nullptr);

    // Close everything down
    engine->destroy();
    builder->destroy();
    config->destroy();
    
    // 使用ifstream,将modleStream写入文件中
    
    return 0;
}

完整可运行的代码如下

int create_engine_nof16() {
    cudaSetDevice(0); // 设置默认的GPU

    std::string wts_name = "D:\\VisualStdioProgram\\yolov5_program\\yolov5-5.0_tensorrt\\model\\yolov5s.wts"; // 权重文件路径
    std::string engine_name = "yolov5s.engine";  // 保存的engine文件名
    bool is_p6 = false;  // 关闭p6
    
    // 如果输入模型是s, gd = 0.33; gw = 0.50;
    float gd = 0.33,   // model depth multiple
    float gw = 0.50;   // layer channel multiple
    int maxBatchSize = 1; // 设置最大batch size

    // Create builder
    IBuilder* builder = createInferBuilder(gLogger);   
    IBuilderConfig* config = builder->createBuilderConfig(); 

    // Create model to populate the network, then set the outputs and create an engine
    ICudaEngine* engine = nullptr;
    INetworkDefinition* network = builder->createNetworkV2(0U); // 创建一个空白的Network

    // Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME
    ITensor* data = network->addInput(INPUT_BLOB_NAME, DataType::kFLOAT, Dims3{ 3, INPUT_H, INPUT_W }); // 创建一个Tensor,输入类型为Dims3{}
    assert(data);

    std::map<std::string, Weights> weightMap = loadWeights(wts_name);  // 加载wts文件,权重文件

    /* ------ yolov5 backbone------ */
    auto focus0 = focus(network, weightMap, *data, 3, get_width(64, gw), 3, "model.0");
    auto conv1 = convBlock(network, weightMap, *focus0->getOutput(0), get_width(128, gw), 3, 2, 1, "model.1");
    auto bottleneck_CSP2 = C3(network, weightMap, *conv1->getOutput(0), get_width(128, gw), get_width(128, gw), get_depth(3, gd), true, 1, 0.5, "model.2");
    auto conv3 = convBlock(network, weightMap, *bottleneck_CSP2->getOutput(0), get_width(256, gw), 3, 2, 1, "model.3");
    auto bottleneck_csp4 = C3(network, weightMap, *conv3->getOutput(0), get_width(256, gw), get_width(256, gw), get_depth(9, gd), true, 1, 0.5, "model.4");
    auto conv5 = convBlock(network, weightMap, *bottleneck_csp4->getOutput(0), get_width(512, gw), 3, 2, 1, "model.5");
    auto bottleneck_csp6 = C3(network, weightMap, *conv5->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(9, gd), true, 1, 0.5, "model.6");
    auto conv7 = convBlock(network, weightMap, *bottleneck_csp6->getOutput(0), get_width(1024, gw), 3, 2, 1, "model.7");
    auto spp8 = SPP(network, weightMap, *conv7->getOutput(0), get_width(1024, gw), get_width(1024, gw), 5, 9, 13, "model.8");

    /* ------ yolov5 head ------ */
    auto bottleneck_csp9 = C3(network, weightMap, *spp8->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.9");
    auto conv10 = convBlock(network, weightMap, *bottleneck_csp9->getOutput(0), get_width(512, gw), 1, 1, 1, "model.10");

    auto upsample11 = network->addResize(*conv10->getOutput(0));
    assert(upsample11);
    upsample11->setResizeMode(ResizeMode::kNEAREST);
    upsample11->setOutputDimensions(bottleneck_csp6->getOutput(0)->getDimensions());

    ITensor* inputTensors12[] = { upsample11->getOutput(0), bottleneck_csp6->getOutput(0) };
    auto cat12 = network->addConcatenation(inputTensors12, 2);
    auto bottleneck_csp13 = C3(network, weightMap, *cat12->getOutput(0), get_width(1024, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.13");
    auto conv14 = convBlock(network, weightMap, *bottleneck_csp13->getOutput(0), get_width(256, gw), 1, 1, 1, "model.14");

    auto upsample15 = network->addResize(*conv14->getOutput(0));
    assert(upsample15);
    upsample15->setResizeMode(ResizeMode::kNEAREST);
    upsample15->setOutputDimensions(bottleneck_csp4->getOutput(0)->getDimensions());

    ITensor* inputTensors16[] = { upsample15->getOutput(0), bottleneck_csp4->getOutput(0) };
    auto cat16 = network->addConcatenation(inputTensors16, 2);

    auto bottleneck_csp17 = C3(network, weightMap, *cat16->getOutput(0), get_width(512, gw), get_width(256, gw), get_depth(3, gd), false, 1, 0.5, "model.17");

    /* ------ detect ------ */
    IConvolutionLayer* det0 = network->addConvolutionNd(*bottleneck_csp17->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.0.weight"], weightMap["model.24.m.0.bias"]);
    auto conv18 = convBlock(network, weightMap, *bottleneck_csp17->getOutput(0), get_width(256, gw), 3, 2, 1, "model.18");
    ITensor* inputTensors19[] = { conv18->getOutput(0), conv14->getOutput(0) };
    auto cat19 = network->addConcatenation(inputTensors19, 2);
    auto bottleneck_csp20 = C3(network, weightMap, *cat19->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.20");
    IConvolutionLayer* det1 = network->addConvolutionNd(*bottleneck_csp20->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.1.weight"], weightMap["model.24.m.1.bias"]);
    auto conv21 = convBlock(network, weightMap, *bottleneck_csp20->getOutput(0), get_width(512, gw), 3, 2, 1, "model.21");
    ITensor* inputTensors22[] = { conv21->getOutput(0), conv10->getOutput(0) };
    auto cat22 = network->addConcatenation(inputTensors22, 2);
    auto bottleneck_csp23 = C3(network, weightMap, *cat22->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.23");
    IConvolutionLayer* det2 = network->addConvolutionNd(*bottleneck_csp23->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.2.weight"], weightMap["model.24.m.2.bias"]);

    auto yolo = addYoLoLayer(network, weightMap, "model.24", std::vector<IConvolutionLayer*>{det0, det1, det2});
    yolo->getOutput(0)->setName(OUTPUT_BLOB_NAME);
    network->markOutput(*yolo->getOutput(0));

    // Build engine
    builder->setMaxBatchSize(maxBatchSize);
    config->setMaxWorkspaceSize(16 * (1 << 20));  // 16MB
    config->setFlag(BuilderFlag::kFP16);

    std::cout << "Building engine, please wait for a while..." << std::endl;
    engine = builder->buildEngineWithConfig(*network, *config);
    assert(engine);
    std::cout << "Build engine successfully!" << std::endl;

    // Don't need the network any more
    network->destroy();

    // Release host memory
    for (auto& mem : weightMap) {
        free((void*)(mem.second.values));
    }
    assert(engine != nullptr);

    // Serialize the engine
    IHostMemory* modelStream{ nullptr }; // 定义一个Host的内存块, 存放序列化后的模型
    (modelStream) = engine->serialize(); 
    assert(modelStream != nullptr);

    // Close everything down
    engine->destroy();
    builder->destroy();
    config->destroy();

    // 将modelStream写入文件
    std::ofstream p(engine_name, std::ios::binary); // 
    if (!p) {
        std::cerr << "could not open plan output file" << std::endl;
        return -1;
    }
    p.write(reinterpret_cast<const char*>(modelStream->data()), modelStream->size());
    modelStream->destroy();
    std::cout << "in !wts_name.empty" << std::endl;

    return 0;
}
  • 4
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值