C++ Tensorrt ncnn resnet推理
使用C++ Tensorrt 进行resnet推理
- 将resnet模型转换成onnx格式
- 然后使用trtexec工具转换成trt格式
- 然后编译执行即可
#include <iostream>
#include <vector>
#include <NvInfer.h>
#include <NvInferRuntimeCommon.h>
#include <opencv2/opencv.hpp>
#include <fstream>
#include <string>
#include "logging.h"
using namespace std;
using namespace nvinfer1;
#define CHECK(status) \
do \
{ \
auto ret = (status); \
if (ret != 0) \
{ \
std::cerr << "Cuda failure: " << ret << std::endl; \
abort(); \
} \
} while (0)
float *preProcess(const char *imgPath, int INPUT_W, int INPUT_H);
vector<unsigned char> load_file(const string &file);
int main()
{
Logger logger;
char *trtModelStream;
auto engine_data = load_file("./resnet.engine");
const char *imgPath = "./input.jpg";
const int batchSize = 1;
const int inputChannel = 3;
const int inputHeight = 224;
const int inputWidth = 224;
const int outputSize = 1000;
IRuntime *runtime = createInferRuntime(logger);
ICudaEngine *engine = runtime->deserializeCudaEngine(engine_data.data(), engine_data.size());
IExecutionContext *context = engine->createExecutionContext();
void *buffers[2];
const char *INPUT_BLOB_NAME = "input";
const char *OUTPUT_BLOB_NAME = "output";
const int inputIndex = engine->getBindingIndex(INPUT_BLOB_NAME);
const int outputIndex = engine->getBindingIndex(OUTPUT_BLOB_NAME);
CHECK(cudaMalloc(&buffers[inputIndex], batchSize * inputChannel * inputHeight * inputWidth * sizeof(float)));
CHECK(cudaMalloc(&buffers[outputIndex], batchSize * outputSize * sizeof(float)));
vector<float> outputData(batchSize * outputSize);
float *inputData = preProcess(imgPath, inputHeight, inputWidth);
cudaStream_t stream;
CHECK(cudaStreamCreate(&stream));
CHECK(cudaMemcpyAsync(buffers[inputIndex], inputData, batchSize * inputChannel * inputHeight * inputWidth * sizeof(float), cudaMemcpyHostToDevice, stream));
auto start = chrono::system_clock::now();
for (int i = 0; i < 1000; i++)
{
context->enqueue(batchSize, buffers, stream, nullptr);
}
auto end = chrono::system_clock::now();
std::cout << chrono::duration_cast<chrono::milliseconds>(end - start).count() << "ms" << std::endl;
CHECK(cudaMemcpyAsync(outputData.data(), buffers[outputIndex], batchSize * outputSize * sizeof(float), cudaMemcpyDeviceToHost, stream));
float maxVal = 0.0f;
int maxIndex = -1;
for (int i = 0; i < 1000; i++)
{
if (outputData[i] > maxVal)
{
maxVal = outputData[i];
maxIndex = i;
}
}
std::cout << "Top 5 Predictions:" << std::endl;
for (int i = 0; i < 5; i++)
{
maxVal = 0.0f;
maxIndex = -1;
for (int j = 0; j < 1000; j++)
{
if (outputData[j] > maxVal)
{
maxVal = outputData[j];
maxIndex = j;
}
}
std::cout << "Class " << maxIndex << ": " << maxVal << std::endl;
outputData[maxIndex] = -1.0f;
}
cudaFree(buffers[inputIndex]);
cudaFree(buffers[outputIndex]);
context->destroy();
engine->destroy();
delete inputData;
inputData = nullptr;
return 0;
}
vector<unsigned char> load_file(const string &file)
{
ifstream in(file, ios::in | ios::binary);
if (!in.is_open())
return {};
in.seekg(0, ios::end);
size_t length = in.tellg();
std::vector<uint8_t> data;
if (length > 0)
{
in.seekg(0, ios::beg);
data.resize(length);
in.read((char *)&data[0], length);
}
in.close();
return data;
}
float *preProcess(const char *imgPath, int INPUT_W, int INPUT_H)
{
cv::Mat img = cv::imread(imgPath);
float *data = new float[3 * INPUT_H * INPUT_W];
vector<float> mean_value{0.406, 0.456, 0.485};
vector<float> std_value{0.225, 0.224, 0.229};
cv::Mat src_img;
cv::resize(img, src_img, cv::Size(INPUT_W, INPUT_H));
int count = 0;
for (int i = 0; i < INPUT_H; i++)
{
uchar *uc_pixel = src_img.data + i * src_img.step;
for (int j = 0; j < INPUT_W; j++)
{
data[count] = (uc_pixel[0] / 255. - mean_value[0]) / std_value[0];
data[count + src_img.rows * src_img.cols] = (uc_pixel[1] / 255. - mean_value[1]) / std_value[1];
data[count + 2 * src_img.rows * src_img.cols] = (uc_pixel[2] / 255. - mean_value[2]) / std_value[2];
uc_pixel += 3;
count++;
}
}
return data;
}
C++ ncnn推理:
#include <algorithm>
#include <iostream>
#include <fstream>
#include <vector>
#include <opencv2/opencv.hpp>
#include "net.h"
#include <numeric>
using namespace std;
int main(int argc, char **argv)
{
const int inputHeight = 224;
const int inputWidth = 224;
ncnn::Net net;
net.opt.use_vulkan_compute = true;
net.load_param("resnet.param");
net.load_model("resnet.bin");
cv::Mat img = cv::imread("input.jpg");
cv::resize(img,img,cv::Size(inputWidth,inputHeight));
ncnn::Mat in = ncnn::Mat::from_pixels(img.data, ncnn::Mat::PIXEL_BGR, img.cols, img.rows);
const float mean_vals[3] = { 127.5f, 127.5f, 127.5f };
const float norm_vals[3] = { 1.0/127.5f, 1.0/127.5f, 1.0/127.5f };
in.substract_mean_normalize(mean_vals, norm_vals);
ncnn::Mat out;
auto start = chrono::system_clock::now();
for (int i = 0; i < 1000; i++)
{
ncnn::Extractor ex = net.create_extractor();
ex.input("input", in);
ex.extract("output", out);
}
auto end = chrono::system_clock::now();
std::cout << chrono::duration_cast<chrono::milliseconds>(end - start).count() << "ms" << std::endl;
std::vector<float> scores(out.w);
for (int i = 0; i < out.w; i++)
{
scores[i] = out[i];
}
std::vector<int> indices(out.w);
std::iota(indices.begin(), indices.end(), 0);
std::sort(indices.begin(), indices.end(), [&scores](int lhs, int rhs)
{ return scores[lhs] > scores[rhs]; });
std::cout << "Top-5 predictions:" << std::endl;
for (int i = 0; i < 5; i++)
{
std::cout << "Class "
<< ": " << indices[i] << std::endl;
}
net.clear();
return 0;
}