TensorRT编程 - 单层感知机结构
build.cpp
#include<iostream>
#include<NvInfer.h>
class TRTLogger : public nvinfer1 :: ILogger
{
void log(Severity severity, const char *msg) noexcept override
{
if (severity == Severity::kINFO)
{
std :: cout << msg << std :: endl;
}
}
}gLogger;
void saveWeights(const std :: string &filename, const float *data, int size)
{
std :: ofstream outfile(filename, std :: ios :: binary);
assert(outfile.is_open() && "save weights failed");
outfile.write((char *)(&size), sizeof(int));
outfile.write((char *)(data), size * sizeof(float));
outfile.close();
}
std :: vector<float> loadWeights(const std :: string &filename)
{
std :: ifstream infile(filename, std :: ios :: binary);
assert(infile.is_open() && "load weights failed");
int size;
infile.read((char *)(&size), sizeof(int));
std :: vector<float> data(size);
infile.read((char *)(data.data()), size * sizeof(float));
infile.close();
return data;
}
int main()
{
TRTLogger logger;
nvinfer1 :: IBuilder* builder = nvinfer1 :: createInferBuilder(logger);
nvinfer1 :: INetworkDefinition *network = builder -> createNetworkV2(1);
const int input_size = 3;
nvinfer1 :: ITensor *input = network -> addInput("data", nvinfer1 :: DataType :: kFLOAT, nvinfer1 :: Dims4(1, input_size, 1, 1));
const float *fc1_weight_data = new float [input_size * 2] {0.1, 0.2, 0.3, 0.4, 0.5, 0.6};
const float *fc1_bias_data = new float [2] {0.1, 0.5};
saveWeights("./model/fc1.wts", fc1_weight_data, 6);
saveWeights("./model/fc1.bias", fc1_bias_data, 2);
auto fc1_weights_vec = loadWeights("./model/fc1.wts");
auto fc1_bias_vec = loadWeights("./model/fc1.bias");
nvinfer1 :: Weights fc1_weight {nvinfer1 :: DataType :: kFLOAT, fc1_weights_vec.data(), fc1_weights_vec.size()};
nvinfer1 :: Weights fc1_bias {nvinfer1 :: DataType :: kFLOAT, fc1_bias_vec.data(), fc1_bias_vec.size()};
const int output_size = 2;
nvinfer1 :: IFullyConnectedLayer *fc1 = network -> addFullyConnected(*input, output_size, fc1_weight, fc1_bias);
nvinfer1 :: IActivationLayer *sigmoid = network -> addActivation(*fc1->getOutput(0), nvinfer1 :: ActivationType :: kSIGMOID);
sigmoid -> getOutput(0) -> setName("output");
network -> markOutput(*sigmoid->getOutput(0));
builder -> setMaxBatchSize(1);
nvinfer1 :: IBuilderConfig *config = builder -> createBuilderConfig();
config -> setMaxWorkspaceSize(1 << 28);
nvinfer1 :: ICudaEngine *engine = builder -> buildEngineWithConfig(*network, *config);
if(!engine)
{
std :: cout << "Failed to create engine!" << std :: endl;
return -1;
}
nvinfer1 :: IHostMemory *serialized_engine = engine->serialize();
std :: ofstream outfile("./model/mlp.engine", std :: ios :: binary);
assert(outfile.is_open() && "Failed to open file for writing");
outfile.write((char *)serialized_engine->data(), serialized_engine->size());
outfile.close();
delete serialized_engine;
delete engine;
delete config;
delete network;
delete builder;
std :: cout << "engine 文件生成成功" << std :: endl;
return 0;
}
runtime.cu
#include<iostream>
#include<vector>
#include<cassert>
#include<fstream>
#include<NvInfer.h>
#include<cuda_runtime.h>
class TRTLogger : public nvinfer1 :: ILogger
{
void log(Severity severity, const char *msg) noexcept override
{
if (severity == Severity::kINFO)
{
std :: cout << msg << std :: endl;
}
}
}gLogger;
std :: vector<unsigned char> loadEngineModel(const std :: string &filename)
{
std :: ifstream infile(filename, std :: ios :: binary);
assert(infile.is_open() && "load weights failed");
file.seekg(0, std :: ios :: binary);
size_t size = file.tellg();
std :: vector<unsigned char> data(size);
file.seekg(0, std :: ios :: beg);
file.read((char *)(data.data()), size);
file.close();
return data;
}
int main()
{
TRTLogger logger;
nvinfer1 :: IRuntime *runtime = nvinfer1 :: createInferRuntime(logger);
auto engineModel = loadEngineModel("./model/mlp.engine");
nvinfer1 :: ICudaEngine *engine = runtime -> deserializeCudaEngine(engineModel.data(), engineModel.size(), nullprt);
if(!engine)
{
std :: cout << "deserialize engine failed" << std :: endl;
return -1;
}
nvinfer1 :: IExecutionContext *context = engine -> createExecutionContext();
cudaStream_t stream = nullptr;
cudaStreamCreate(&stream);
float *host_input_data = new float[3] {2, 4, 8};
int input_data_size = 3 * sizeof(float);
float *device_input_data = nullptr;
float *host_output_data = new float[3] {0, 0,};
int output_data_size = 2 * sizeof(float);
float *device_output_data = nullptr;
cudaMalloc((void **)&device_input_data, input_data_size);
cudaMalloc((void **)&device_output_data, output_data_size);
cudaMemcpyAsync(device_input_data, host_input_data, input_data_size, cudaMemcpyHostToDevice, stream);
float *bindings[] = {device_input_data, device_output_data};
bool success = context -> enqueueV2((void **)bindings, stream, nullptr);
cudaMemcpyAsync(host_output_data, device_output_data, output_data_size, cudaMemcpyDeviceToHost, stream);
cudaStreamSynchronize(stream);
std :: cout << "输出结果: " << host_output_data[0] << " " << host_output_data[1] << std :: endl;
cudaStreamDestroy(stream);
cudaFree(device_input_data);
cudaFree(device_output_data);
delete host_input_data;
delete host_output_data;
delete context;
delete engine;
delete runtime;
return 0;
}