使用wts构建engine
-
step1 : 构建gLooger
example:
class Logger : public ILogger { void log(Severity severity, const char* msg) noexcept override { // suppress info-level messages if (severity <= Severity::kWARNING) std::cout << msg << std::endl; } } logger;
Logger用于捕获所有的警告信息
-
step2 构建builder:创建构建器实例
-
step3 构建config,config用于指导构建器如何去优化
-
step4 创建一个空白网络,后面可以使用onnx的内置函数处理onnx文件完成网络的填充,也可以使用wts文件加载权重+自己构建网络的方法进行构建
-
step5 创建一个输入层,约定输入层的大小,输入数据的类型
-
step6 加载权重文件,构建网路、填充网络(onnx方法和wts方法)
-
step7 设置最大的batchsiez,
builder->setMaxBatchSize(maxBatchSize)
-
step8 设置工作空间大小
-
step9 创建engine文件
-
step10 序列化engine文件,并且保存序列化文件
-
step 11 关闭所有
// 注意,这个函数并不能够直接使用,这个代码只是为了理清楚逻辑而写,并不能够直接运行
int creatEngine(){
// 创建 builder, 在这个之前需要将gLogger创建好
IBuilder* builder = createInferBuilder(gLogger);
// 创建 config,指定TensorRT的运行模式,怎么进行优化
IBuilderConfig* config = builder->createBuilderConfig();
// 创建网络,这里是空白的
INetworkDefinition* network = builder->createNetworkV2(0U); // 创建一个空白的Network
// 创建输入层,这里是3通道224x224的图片
ITensor* data = network->addInput("data", DataType::kFLOAT, Dims3{ 1, 3, 224, 224 });
assert(data);
// 加载权重文件
std::map<std::string, Weights> weightMap = loadWeights("weights.wts"); // 加载权重文件
/* 构建网络 */
/*-------------yolov5 backbone--------- */
// exmaple : 使用内部自带的库进行构建
/* example
auto focus0 = focus(network, weightMap, *data, 3, get_width(64, gw), 3, "model.0");
ILayer* focus(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int inch, int outch, int ksize, std::string lname) {
ISliceLayer *s1 = network->addSlice(input, Dims3{ 0, 0, 0 }, Dims3{ inch, Yolo::INPUT_H / 2, Yolo::INPUT_W / 2 }, Dims3{ 1, 2, 2 });
ISliceLayer *s2 = network->addSlice(input, Dims3{ 0, 1, 0 }, Dims3{ inch, Yolo::INPUT_H / 2, Yolo::INPUT_W / 2 }, Dims3{ 1, 2, 2 });
ISliceLayer *s3 = network->addSlice(input, Dims3{ 0, 0, 1 }, Dims3{ inch, Yolo::INPUT_H / 2, Yolo::INPUT_W / 2 }, Dims3{ 1, 2, 2 });
ISliceLayer *s4 = network->addSlice(input, Dims3{ 0, 1, 1 }, Dims3{ inch, Yolo::INPUT_H / 2, Yolo::INPUT_W / 2 }, Dims3{ 1, 2, 2 });
ITensor* inputTensors[] = { s1->getOutput(0), s2->getOutput(0), s3->getOutput(0), s4->getOutput(0) };
auto cat = network->addConcatenation(inputTensors, 4);
auto conv = convBlock(network, weightMap, *cat->getOutput(0), outch, ksize, 1, 1, lname + ".conv");
return conv;
}
*/
/*-------------yolov5 head------------- */
/*-------------yolov5 detect----------- */
/* build engine */
// 设置最大batch size
builder->setMaxBatchSize(maxBatchSize);
// 设置工作空间大小
config->setMaxWorkspaceSize(16 * (1 << 20)); // 设置最大工作空间大小为16MB
config->setFlag(BuilderFlag::kFP16);
// 构建engine, 这里的config也只是配置了空间大小??
ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
assert(engine != nullptr);
std::cout << "Build engine successfully" << std::endl;
network->destroy();
// 序列化模型
IHostMemory** modelStream; // 存放序列化后的模型
(*modleStream) = engine->serialize(); // 序列化模型
assert((*modelStream) != nullptr);
// Close everything down
engine->destroy();
builder->destroy();
config->destroy();
// 使用ifstream,将modleStream写入文件中
return 0;
}
完整可运行的代码如下
int create_engine_nof16() {
cudaSetDevice(0); // 设置默认的GPU
std::string wts_name = "D:\\VisualStdioProgram\\yolov5_program\\yolov5-5.0_tensorrt\\model\\yolov5s.wts"; // 权重文件路径
std::string engine_name = "yolov5s.engine"; // 保存的engine文件名
bool is_p6 = false; // 关闭p6
// 如果输入模型是s, gd = 0.33; gw = 0.50;
float gd = 0.33, // model depth multiple
float gw = 0.50; // layer channel multiple
int maxBatchSize = 1; // 设置最大batch size
// Create builder
IBuilder* builder = createInferBuilder(gLogger);
IBuilderConfig* config = builder->createBuilderConfig();
// Create model to populate the network, then set the outputs and create an engine
ICudaEngine* engine = nullptr;
INetworkDefinition* network = builder->createNetworkV2(0U); // 创建一个空白的Network
// Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME
ITensor* data = network->addInput(INPUT_BLOB_NAME, DataType::kFLOAT, Dims3{ 3, INPUT_H, INPUT_W }); // 创建一个Tensor,输入类型为Dims3{}
assert(data);
std::map<std::string, Weights> weightMap = loadWeights(wts_name); // 加载wts文件,权重文件
/* ------ yolov5 backbone------ */
auto focus0 = focus(network, weightMap, *data, 3, get_width(64, gw), 3, "model.0");
auto conv1 = convBlock(network, weightMap, *focus0->getOutput(0), get_width(128, gw), 3, 2, 1, "model.1");
auto bottleneck_CSP2 = C3(network, weightMap, *conv1->getOutput(0), get_width(128, gw), get_width(128, gw), get_depth(3, gd), true, 1, 0.5, "model.2");
auto conv3 = convBlock(network, weightMap, *bottleneck_CSP2->getOutput(0), get_width(256, gw), 3, 2, 1, "model.3");
auto bottleneck_csp4 = C3(network, weightMap, *conv3->getOutput(0), get_width(256, gw), get_width(256, gw), get_depth(9, gd), true, 1, 0.5, "model.4");
auto conv5 = convBlock(network, weightMap, *bottleneck_csp4->getOutput(0), get_width(512, gw), 3, 2, 1, "model.5");
auto bottleneck_csp6 = C3(network, weightMap, *conv5->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(9, gd), true, 1, 0.5, "model.6");
auto conv7 = convBlock(network, weightMap, *bottleneck_csp6->getOutput(0), get_width(1024, gw), 3, 2, 1, "model.7");
auto spp8 = SPP(network, weightMap, *conv7->getOutput(0), get_width(1024, gw), get_width(1024, gw), 5, 9, 13, "model.8");
/* ------ yolov5 head ------ */
auto bottleneck_csp9 = C3(network, weightMap, *spp8->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.9");
auto conv10 = convBlock(network, weightMap, *bottleneck_csp9->getOutput(0), get_width(512, gw), 1, 1, 1, "model.10");
auto upsample11 = network->addResize(*conv10->getOutput(0));
assert(upsample11);
upsample11->setResizeMode(ResizeMode::kNEAREST);
upsample11->setOutputDimensions(bottleneck_csp6->getOutput(0)->getDimensions());
ITensor* inputTensors12[] = { upsample11->getOutput(0), bottleneck_csp6->getOutput(0) };
auto cat12 = network->addConcatenation(inputTensors12, 2);
auto bottleneck_csp13 = C3(network, weightMap, *cat12->getOutput(0), get_width(1024, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.13");
auto conv14 = convBlock(network, weightMap, *bottleneck_csp13->getOutput(0), get_width(256, gw), 1, 1, 1, "model.14");
auto upsample15 = network->addResize(*conv14->getOutput(0));
assert(upsample15);
upsample15->setResizeMode(ResizeMode::kNEAREST);
upsample15->setOutputDimensions(bottleneck_csp4->getOutput(0)->getDimensions());
ITensor* inputTensors16[] = { upsample15->getOutput(0), bottleneck_csp4->getOutput(0) };
auto cat16 = network->addConcatenation(inputTensors16, 2);
auto bottleneck_csp17 = C3(network, weightMap, *cat16->getOutput(0), get_width(512, gw), get_width(256, gw), get_depth(3, gd), false, 1, 0.5, "model.17");
/* ------ detect ------ */
IConvolutionLayer* det0 = network->addConvolutionNd(*bottleneck_csp17->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.0.weight"], weightMap["model.24.m.0.bias"]);
auto conv18 = convBlock(network, weightMap, *bottleneck_csp17->getOutput(0), get_width(256, gw), 3, 2, 1, "model.18");
ITensor* inputTensors19[] = { conv18->getOutput(0), conv14->getOutput(0) };
auto cat19 = network->addConcatenation(inputTensors19, 2);
auto bottleneck_csp20 = C3(network, weightMap, *cat19->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.20");
IConvolutionLayer* det1 = network->addConvolutionNd(*bottleneck_csp20->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.1.weight"], weightMap["model.24.m.1.bias"]);
auto conv21 = convBlock(network, weightMap, *bottleneck_csp20->getOutput(0), get_width(512, gw), 3, 2, 1, "model.21");
ITensor* inputTensors22[] = { conv21->getOutput(0), conv10->getOutput(0) };
auto cat22 = network->addConcatenation(inputTensors22, 2);
auto bottleneck_csp23 = C3(network, weightMap, *cat22->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.23");
IConvolutionLayer* det2 = network->addConvolutionNd(*bottleneck_csp23->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.2.weight"], weightMap["model.24.m.2.bias"]);
auto yolo = addYoLoLayer(network, weightMap, "model.24", std::vector<IConvolutionLayer*>{det0, det1, det2});
yolo->getOutput(0)->setName(OUTPUT_BLOB_NAME);
network->markOutput(*yolo->getOutput(0));
// Build engine
builder->setMaxBatchSize(maxBatchSize);
config->setMaxWorkspaceSize(16 * (1 << 20)); // 16MB
config->setFlag(BuilderFlag::kFP16);
std::cout << "Building engine, please wait for a while..." << std::endl;
engine = builder->buildEngineWithConfig(*network, *config);
assert(engine);
std::cout << "Build engine successfully!" << std::endl;
// Don't need the network any more
network->destroy();
// Release host memory
for (auto& mem : weightMap) {
free((void*)(mem.second.values));
}
assert(engine != nullptr);
// Serialize the engine
IHostMemory* modelStream{ nullptr }; // 定义一个Host的内存块, 存放序列化后的模型
(modelStream) = engine->serialize();
assert(modelStream != nullptr);
// Close everything down
engine->destroy();
builder->destroy();
config->destroy();
// 将modelStream写入文件
std::ofstream p(engine_name, std::ios::binary); //
if (!p) {
std::cerr << "could not open plan output file" << std::endl;
return -1;
}
p.write(reinterpret_cast<const char*>(modelStream->data()), modelStream->size());
modelStream->destroy();
std::cout << "in !wts_name.empty" << std::endl;
return 0;
}