wts-engine

AILOCK

已于 2024-07-03 16:36:05 修改

阅读量226

点赞数 4

分类专栏： C++学习笔记 AI 文章标签： c++ 人工智能计算机视觉

于 2024-07-03 16:35:06 首次发布

本文链接：https://blog.csdn.net/smilejfy/article/details/140156046

版权

AI 同时被 2 个专栏收录

9 篇文章 0 订阅

订阅专栏

C++学习笔记

6 篇文章 1 订阅

订阅专栏

使用wts构建engine

step1 : 构建gLooger

example:

class Logger : public ILogger           
{
    void log(Severity severity, const char* msg) noexcept override
    {
        // suppress info-level messages
        if (severity <= Severity::kWARNING)
            std::cout << msg << std::endl;
    }
} logger;

Logger用于捕获所有的警告信息

step2 构建builder:创建构建器实例
step3 构建config，config用于指导构建器如何去优化
step4 创建一个空白网络，后面可以使用onnx的内置函数处理onnx文件完成网络的填充，也可以使用wts文件加载权重+自己构建网络的方法进行构建
step5 创建一个输入层，约定输入层的大小，输入数据的类型
step6 加载权重文件，构建网路、填充网络（onnx方法和wts方法）
step7 设置最大的batchsiez,builder->setMaxBatchSize(maxBatchSize)
step8 设置工作空间大小
step9 创建engine文件
step10 序列化engine文件，并且保存序列化文件
step 11 关闭所有

// 注意，这个函数并不能够直接使用，这个代码只是为了理清楚逻辑而写，并不能够直接运行
int creatEngine(){
    // 创建 builder， 在这个之前需要将gLogger创建好
    IBuilder* builder = createInferBuilder(gLogger);
    // 创建 config，指定TensorRT的运行模式，怎么进行优化
    IBuilderConfig* config = builder->createBuilderConfig();
    // 创建网络，这里是空白的
    INetworkDefinition* network = builder->createNetworkV2(0U); // 创建一个空白的Network
    // 创建输入层，这里是3通道224x224的图片
    ITensor* data = network->addInput("data", DataType::kFLOAT, Dims3{ 1, 3, 224, 224 });
    assert(data);

    // 加载权重文件
    std::map<std::string, Weights> weightMap = loadWeights("weights.wts"); // 加载权重文件
    /* 构建网络 */
    /*-------------yolov5 backbone--------- */
    // exmaple : 使用内部自带的库进行构建
    /* example
        auto focus0 = focus(network, weightMap, *data, 3, get_width(64, gw), 3, "model.0");
        ILayer* focus(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int inch, int outch, int ksize, std::string lname) {
            ISliceLayer *s1 = network->addSlice(input, Dims3{ 0, 0, 0 }, Dims3{ inch, Yolo::INPUT_H / 2, Yolo::INPUT_W / 2 }, Dims3{ 1, 2, 2 });
            ISliceLayer *s2 = network->addSlice(input, Dims3{ 0, 1, 0 }, Dims3{ inch, Yolo::INPUT_H / 2, Yolo::INPUT_W / 2 }, Dims3{ 1, 2, 2 });
            ISliceLayer *s3 = network->addSlice(input, Dims3{ 0, 0, 1 }, Dims3{ inch, Yolo::INPUT_H / 2, Yolo::INPUT_W / 2 }, Dims3{ 1, 2, 2 });
            ISliceLayer *s4 = network->addSlice(input, Dims3{ 0, 1, 1 }, Dims3{ inch, Yolo::INPUT_H / 2, Yolo::INPUT_W / 2 }, Dims3{ 1, 2, 2 });
            ITensor* inputTensors[] = { s1->getOutput(0), s2->getOutput(0), s3->getOutput(0), s4->getOutput(0) };
            auto cat = network->addConcatenation(inputTensors, 4);
            auto conv = convBlock(network, weightMap, *cat->getOutput(0), outch, ksize, 1, 1, lname + ".conv");
            return conv;
        }
     */
    /*-------------yolov5 head------------- */

    /*-------------yolov5 detect----------- */

    /* build engine */
    // 设置最大batch size
    builder->setMaxBatchSize(maxBatchSize);
    // 设置工作空间大小
    config->setMaxWorkspaceSize(16 * (1 << 20)); // 设置最大工作空间大小为16MB
    config->setFlag(BuilderFlag::kFP16);
    // 构建engine， 这里的config也只是配置了空间大小？？
    ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
    assert(engine != nullptr);
    std::cout << "Build engine successfully" << std::endl;

    network->destroy();

    // 序列化模型
    IHostMemory** modelStream；  // 存放序列化后的模型
    (*modleStream) = engine->serialize(); // 序列化模型
    assert((*modelStream) != nullptr);

    // Close everything down
    engine->destroy();
    builder->destroy();
    config->destroy();
    
    // 使用ifstream，将modleStream写入文件中
    
    return 0;
}

完整可运行的代码如下

int create_engine_nof16() {
    cudaSetDevice(0); // 设置默认的GPU

    std::string wts_name = "D:\\VisualStdioProgram\\yolov5_program\\yolov5-5.0_tensorrt\\model\\yolov5s.wts"; // 权重文件路径
    std::string engine_name = "yolov5s.engine";  // 保存的engine文件名
    bool is_p6 = false;  // 关闭p6
    
    // 如果输入模型是s, gd = 0.33; gw = 0.50;
    float gd = 0.33,   // model depth multiple
    float gw = 0.50;   // layer channel multiple
    int maxBatchSize = 1; // 设置最大batch size

    // Create builder
    IBuilder* builder = createInferBuilder(gLogger);   
    IBuilderConfig* config = builder->createBuilderConfig(); 

    // Create model to populate the network, then set the outputs and create an engine
    ICudaEngine* engine = nullptr;
    INetworkDefinition* network = builder->createNetworkV2(0U); // 创建一个空白的Network

    // Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME
    ITensor* data = network->addInput(INPUT_BLOB_NAME, DataType::kFLOAT, Dims3{ 3, INPUT_H, INPUT_W }); // 创建一个Tensor，输入类型为Dims3{}
    assert(data);

    std::map<std::string, Weights> weightMap = loadWeights(wts_name);  // 加载wts文件，权重文件

    /* ------ yolov5 backbone------ */
    auto focus0 = focus(network, weightMap, *data, 3, get_width(64, gw), 3, "model.0");
    auto conv1 = convBlock(network, weightMap, *focus0->getOutput(0), get_width(128, gw), 3, 2, 1, "model.1");
    auto bottleneck_CSP2 = C3(network, weightMap, *conv1->getOutput(0), get_width(128, gw), get_width(128, gw), get_depth(3, gd), true, 1, 0.5, "model.2");
    auto conv3 = convBlock(network, weightMap, *bottleneck_CSP2->getOutput(0), get_width(256, gw), 3, 2, 1, "model.3");
    auto bottleneck_csp4 = C3(network, weightMap, *conv3->getOutput(0), get_width(256, gw), get_width(256, gw), get_depth(9, gd), true, 1, 0.5, "model.4");
    auto conv5 = convBlock(network, weightMap, *bottleneck_csp4->getOutput(0), get_width(512, gw), 3, 2, 1, "model.5");
    auto bottleneck_csp6 = C3(network, weightMap, *conv5->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(9, gd), true, 1, 0.5, "model.6");
    auto conv7 = convBlock(network, weightMap, *bottleneck_csp6->getOutput(0), get_width(1024, gw), 3, 2, 1, "model.7");
    auto spp8 = SPP(network, weightMap, *conv7->getOutput(0), get_width(1024, gw), get_width(1024, gw), 5, 9, 13, "model.8");

    /* ------ yolov5 head ------ */
    auto bottleneck_csp9 = C3(network, weightMap, *spp8->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.9");
    auto conv10 = convBlock(network, weightMap, *bottleneck_csp9->getOutput(0), get_width(512, gw), 1, 1, 1, "model.10");

    auto upsample11 = network->addResize(*conv10->getOutput(0));
    assert(upsample11);
    upsample11->setResizeMode(ResizeMode::kNEAREST);
    upsample11->setOutputDimensions(bottleneck_csp6->getOutput(0)->getDimensions());

    ITensor* inputTensors12[] = { upsample11->getOutput(0), bottleneck_csp6->getOutput(0) };
    auto cat12 = network->addConcatenation(inputTensors12, 2);
    auto bottleneck_csp13 = C3(network, weightMap, *cat12->getOutput(0), get_width(1024, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.13");
    auto conv14 = convBlock(network, weightMap, *bottleneck_csp13->getOutput(0), get_width(256, gw), 1, 1, 1, "model.14");

    auto upsample15 = network->addResize(*conv14->getOutput(0));
    assert(upsample15);
    upsample15->setResizeMode(ResizeMode::kNEAREST);
    upsample15->setOutputDimensions(bottleneck_csp4->getOutput(0)->getDimensions());

    ITensor* inputTensors16[] = { upsample15->getOutput(0), bottleneck_csp4->getOutput(0) };
    auto cat16 = network->addConcatenation(inputTensors16, 2);

    auto bottleneck_csp17 = C3(network, weightMap, *cat16->getOutput(0), get_width(512, gw), get_width(256, gw), get_depth(3, gd), false, 1, 0.5, "model.17");

    /* ------ detect ------ */
    IConvolutionLayer* det0 = network->addConvolutionNd(*bottleneck_csp17->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.0.weight"], weightMap["model.24.m.0.bias"]);
    auto conv18 = convBlock(network, weightMap, *bottleneck_csp17->getOutput(0), get_width(256, gw), 3, 2, 1, "model.18");
    ITensor* inputTensors19[] = { conv18->getOutput(0), conv14->getOutput(0) };
    auto cat19 = network->addConcatenation(inputTensors19, 2);
    auto bottleneck_csp20 = C3(network, weightMap, *cat19->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.20");
    IConvolutionLayer* det1 = network->addConvolutionNd(*bottleneck_csp20->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.1.weight"], weightMap["model.24.m.1.bias"]);
    auto conv21 = convBlock(network, weightMap, *bottleneck_csp20->getOutput(0), get_width(512, gw), 3, 2, 1, "model.21");
    ITensor* inputTensors22[] = { conv21->getOutput(0), conv10->getOutput(0) };
    auto cat22 = network->addConcatenation(inputTensors22, 2);
    auto bottleneck_csp23 = C3(network, weightMap, *cat22->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.23");
    IConvolutionLayer* det2 = network->addConvolutionNd(*bottleneck_csp23->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.2.weight"], weightMap["model.24.m.2.bias"]);

    auto yolo = addYoLoLayer(network, weightMap, "model.24", std::vector<IConvolutionLayer*>{det0, det1, det2});
    yolo->getOutput(0)->setName(OUTPUT_BLOB_NAME);
    network->markOutput(*yolo->getOutput(0));

    // Build engine
    builder->setMaxBatchSize(maxBatchSize);
    config->setMaxWorkspaceSize(16 * (1 << 20));  // 16MB
    config->setFlag(BuilderFlag::kFP16);

    std::cout << "Building engine, please wait for a while..." << std::endl;
    engine = builder->buildEngineWithConfig(*network, *config);
    assert(engine);
    std::cout << "Build engine successfully!" << std::endl;

    // Don't need the network any more
    network->destroy();

    // Release host memory
    for (auto& mem : weightMap) {
        free((void*)(mem.second.values));
    }
    assert(engine != nullptr);

    // Serialize the engine
    IHostMemory* modelStream{ nullptr }; // 定义一个Host的内存块, 存放序列化后的模型
    (modelStream) = engine->serialize(); 
    assert(modelStream != nullptr);

    // Close everything down
    engine->destroy();
    builder->destroy();
    config->destroy();

    // 将modelStream写入文件
    std::ofstream p(engine_name, std::ios::binary); // 
    if (!p) {
        std::cerr << "could not open plan output file" << std::endl;
        return -1;
    }
    p.write(reinterpret_cast<const char*>(modelStream->data()), modelStream->size());
    modelStream->destroy();
    std::cout << "in !wts_name.empty" << std::endl;

    return 0;
}