ONNX(一)-ResNet分类
基于这位的代码,记录基于ONNX的ResNet分类问题的单图推理,多图(batch)推理和动态推理,完整的代码请移步这里https://github.com/Bruce-WangGF/ONNXRuntime-CPP?tab=readme-ov-file:如果我的博文对你有帮助,麻烦github给个⭐⭐,谢谢.
ONNX杂记:ONNX单图推理
首先补充相关的.ONNX文件导出的代码。
如果是使用官方的1000类分类,就直接运行代码即可,如果需要导出自己的分类模型,则将第10行的pretrained关闭,改成自己的类别数量,将21行注释的代码打开换上自己的权重路径。
import torch
import torch.onnx
from torchvision import datasets, models, transforms
import torch.nn as nn
import cv2
import numpy as np
from PIL import Image
import nets
model_ft = nets.resnet.resnet18(pretrained=True,num_classes=1000)
device = torch.device("cuda:0")
model_ft = model_ft.to(device)
model_ft.eval()
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
inference_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean, std)
])
# model_ft.load_state_dict(torch.load(r'E:\Pycharm_project\classification-pytorch-main\logs\ep070-loss0.007-val_loss0.917.pth'))
dummy_input = np.ones((224,224),dtype=np.uint8)
dummy_input = Image.fromarray(cv2.cvtColor(dummy_input,cv2.COLOR_BGR2RGB))
img_tensor = inference_transform(dummy_input).unsqueeze_(0).cuda()
torch.onnx.export(model_ft,img_tensor,'./resnet18.onnx',opset_version=14)
将环境配置好之后,直接运行CPP代码应该是可以直接跑通的,如果出现缺失dll文件,则自行下载,这里给出一个例子:zlibwapi.dll缺失,转自:https://blog.csdn.net/qq_40280673/article/details/132229908。
链接:https://pan.baidu.com/s/1v0E0Q3kpHv6ovZ7ZNIy0wg
提取码:irjk
下载好包后解压,并将各个文件添加到指定路径,如下:
zlibwapi.lib文件放到C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.5\lib
zlibwapi.dll文件放到C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.5\bin
注:如果要测试自己的类别,则需要对应改一些参数,如类别,如果类别不足5类,那对应的显示代码需要修改,如下,可以将5改成1,即只显示输出最终预测结果。
// show Top5
for (size_t i = 0; i < 5; ++i) {
const auto& result = indexValuePairs[i];
std::cout << i + 1 << ": " << labels[result.first] << " " << result.second << std::endl;
}
ONNX杂记:ONNX多图推理
当需要进行多图推理时,需要将上面生成onnx文件的代码简单修改下
import torch
import torch.onnx
from torchvision import datasets, models, transforms
import torch.nn as nn
import cv2
import numpy as np
from PIL import Image
import nets
def inference_transform(image):
# 这里假设 inference_transform 是一个已经定义好的图像转换函数
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean, std)
# 添加其他的 transform 例如标准化
])
return transform(image)
model_ft = nets.resnet.resnet50(pretrained=True,num_classes=1000)
device = torch.device("cuda:0")
model_ft = model_ft.to(device)
model_ft.eval()
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
batch_size = 2
dummy_inputs = []
for i in range(batch_size):
dummy_input = np.ones((224, 224), dtype=np.uint8) * (i + 1) # 每个输入不同的值
dummy_input = Image.fromarray(cv2.cvtColor(dummy_input, cv2.COLOR_BGR2RGB))
dummy_inputs.append(inference_transform(dummy_input).unsqueeze_(0))
img_tensor_batch = torch.cat(dummy_inputs, dim=0).cuda()
torch.onnx.export(model_ft,img_tensor_batch,'./batch2resnet50.onnx',opset_version=14)
这个代码可以生成一个batchsize为2的模型,当需要多图推理时,也需要对cpp文件进行相应的修改,主要是读取的图像数量,内存分配,数据拷贝,以及一些后处理方式,根据不同的任务需要修改相应的代码,这里只看ResNet的分类任务。
// OnnxRuntimeResNet.cpp : This file contains the 'main' function. Program execution begins and ends there.
#include <onnxruntime_cxx_api.h>
#include <iostream>
#include "Helpers.cpp"
int main()
{
Ort::Env env;
Ort::RunOptions runOptions;
Ort::Session session(nullptr);
constexpr int64_t Batchsize = 2;
constexpr int64_t numChannels = 3;
constexpr int64_t width = 224;
constexpr int64_t height = 224;
constexpr int64_t numClasses = 1000;
constexpr int64_t numInputElements = numChannels * height * width;
const std::string labelFile = "./assets./imagenet_classes.txt";
auto modelPath = L"./assets/batch2resnet50.onnx";
//load labels
std::vector<std::string> labels = loadLabels(labelFile);
if (labels.empty()) {
std::cout << "Failed to load labels: " << labelFile << std::endl;
return 1;
}
//读取图像,保证图像数量大于batchsize;
const std::string imagePath = "./assets/";
std::string _strPattern = imagePath + "*.jpg"; // test_images
std::vector<cv::String> filesVec;
cv::glob(_strPattern, filesVec);
std::vector<std::vector<float>> ImageBatch;
for (int i = 0; i < Batchsize; i++)
{
const std::vector<float> imageVec = loadImage(filesVec[i]);
if (imageVec.empty()) {
std::cout << "Failed to load image: " << filesVec[i] << std::endl;
return 1;
}
if (imageVec.size() != numInputElements) {
std::cout << "Invalid image format. Must be 224x224 RGB image." << std::endl;
return 1;
}
ImageBatch.push_back(imageVec);
}
// Use CUDA GPU
Ort::SessionOptions ort_session_options;
OrtCUDAProviderOptions options;
options.device_id = 0;
OrtSessionOptionsAppendExecutionProvider_CUDA(ort_session_options, options.device_id);
//create session
session = Ort::Session(env, modelPath, ort_session_options);
// Use CPU
//session = Ort::Session(env, modelPath, Ort::SessionOptions{ nullptr });
//Main changes one /start
// Define shape
const std::array<int64_t, 4> inputShape = { Batchsize, numChannels, height, width };
const std::array<int64_t, 2> outputShape = { Batchsize, numClasses };
// Define array
std::vector<float> input(Batchsize * numInputElements);
std::vector<float> results(Batchsize * numClasses);
//Main changes one /end
// Define Tensor
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
auto inputTensor = Ort::Value::CreateTensor<float>(memory_info, input.data(), input.size(), inputShape.data(), inputShape.size());
auto outputTensor = Ort::Value::CreateTensor<float>(memory_info, results.data(), results.size(), outputShape.data(), outputShape.size());
// Copy image data to input array
for (int i = 0; i < Batchsize; ++i) {
std::copy(ImageBatch[i].begin(), ImageBatch[i].end(), input.begin() + i * numInputElements);
}
// Define names
Ort::AllocatorWithDefaultOptions ort_alloc;
Ort::AllocatedStringPtr inputName = session.GetInputNameAllocated(0, ort_alloc);
Ort::AllocatedStringPtr outputName = session.GetOutputNameAllocated(0, ort_alloc);
const std::array<const char*, 1> inputNames = { inputName.get() };
const std::array<const char*, 1> outputNames = { outputName.get() };
inputName.release();
outputName.release();
// Run inference
try {
session.Run(runOptions, inputNames.data(), &inputTensor, 1, outputNames.data(), &outputTensor, 1);
}
catch (Ort::Exception& e) {
std::cout << e.what() << std::endl;
return 1;
}
// Sort results and show Top5 for each image in the batch
for (int b = 0; b < Batchsize; ++b) {
std::vector<std::pair<size_t, float>> indexValuePairs;
for (size_t i = 0; i < numClasses; ++i) {
indexValuePairs.emplace_back(i, results[b * numClasses + i]);
}
std::sort(indexValuePairs.begin(), indexValuePairs.end(), [](const auto& lhs, const auto& rhs) { return lhs.second > rhs.second; });
std::cout << "Image " << b + 1 << ":" << std::endl;
// Show Top5
for (size_t i = 0; i < 5; ++i) {
const auto& result = indexValuePairs[i];
std::cout << i + 1 << ": " << labels[result.first] << " " << result.second << std::endl;
}
}
system("pause");
return 0;
}
ONNX杂记:ONNX动态推理
动态推理和多图推理几乎是一致的,首先还是生成对应的.onnx文件,在单图推理得到的onnx修改,也可以自己直接生成。如果有Netron,打开三个onnx文件就可以看到它们的属于维度分别是:[1,3,224,224],[8,3,224,224],[batch_size,3,224,224]。
import onnx
from onnx import shape_inference
model = onnx.load(r"*.onnx")
input_info = model.graph.input[0]
input_name = input_info.name
input_shape = input_info.type.tensor_type.shape.dim
input_dtype = input_info.type.tensor_type.elem_type
output_info = model.graph.output
input_shape[0].dim_param = "batch_size"
for o in output_info:
o.type.tensor_type.shape.dim[0].dim_param = "batch_size"
model = shape_inference.infer_shapes(model) # infer shapes
onnx.checker.check_model(model) # check the model for errors
onnx.save(model, "dynamic_*.onnx") # save the model
对于动态推理,同样需要修改相关的CPP代码,主要是batch_size数值的确定,也就是将之前写死的batchsize在读取图像时作为一个变量.
//读取图像,保证图像数量大于batchsize;
const std::string imagePath = "./assets/";
std::string _strPattern = imagePath + "*.jpg"; // test_images
std::vector<cv::String> filesVec;
cv::glob(_strPattern, filesVec);
int Batchsize = 0; //Main change
std::vector<std::vector<float>> ImageBatch;
for (int i = 0; i < filesVec.size(); i++)
{
const std::vector<float> imageVec = loadImage(filesVec[i]);
if (imageVec.empty()) {
std::cout << "Failed to load image: " << filesVec[i] << std::endl;
return 1;
}
if (imageVec.size() != numInputElements) {
std::cout << "Invalid image format. Must be 224x224 RGB image." << std::endl;
return 1;
}
Batchsize++;
ImageBatch.push_back(imageVec);
}