1,介绍
onnxruntime是一个用于onnx模型推理的引擎。
2,安装
2.1 cuda,cudnn
2.2 cmake,版本>=3.13.0
sudo apt-get install libssl-dev
sudo apt-get autoremove cmake # 卸载
wget https://cmake.org/files/v3.23/cmake-3.23.4.tar.gz
tar -xf cmake-3.23.4
cd cmake-3.23.4
./bootstrap --prefix=/usr
make -j 8
sudo make install
cmake -version
cmake version 3.23.4
CMake suite maintained and supported by Kitware (kitware.com/cmake).
2.3 tensorrt
2.4 onnxruntime
conda activate py36 # 切换虚拟环境
git clone https://github.com/microsoft/onnxruntime.git
cd onnxruntime
git submodule sync
git submodule update --init --recursive
./build.sh \
--use_cuda \
--cuda_version=11.0 \
--cuda_home=/usr/local/cuda \
--cudnn_home=/usr/local/cuda \
--use_tensorrt --tensorrt_home=$TENSORRT_ROOT \
--build_shared_lib --enable_pybind \
--build_wheel --update --build
pip build/Linux/Debug/dist/onnxruntime_gpu_tensorrt-1.6.0-cp36-cp36m-linux_x86_64.whl
# 获取指定版本
git clone -b v1.6.0 https://github.com/microsoft/onnxruntime.git
cd onnxruntime
git checkout -b v1.6.0
git submodule sync
git submodule update --init --recursive
3,pip 安装
pip install onnxruntime-gpu==1.6.0 onnx==1.9.0 onnxconverter_common==1.6.0 # cuda 10.2
pip install onnxruntime-gpu==1.8.1 onnx==1.9.0 onnxconverter_common==1.8.1 # cuda 11.0
import onnxruntime
onnxruntime.get_available_providers()
sess = onnxruntime.InferenceSession(onnx_path)
sess.set_providers(['CUDAExecutionProvider'], [ {'device_id': 0}])
result = sess.run([output_name], {input_name:x}) # x is input
3,onnxruntime and pytorch inference
# -*- coding: utf-8 -*-
import torch
from torchvision import models
import onnxruntime
import numpy as np
model = models.resnet18(pretrained=True)
model.eval().cuda()
## pytorch
x = torch.rand(2,3,224,224).cuda()
out_pt = model(x)
print(out_pt.size())
# onnx
onnx_path = "resnet18.onnx"
dynamic_axes = {'input': {0: 'batch_size'}}
torch.onnx.export(model,
x,
onnx_path,
export_params=True,
opset_version=11,
do_constant_folding=True,
input_names=['input'],
dynamic_axes=dynamic_axes)
sess = onnxruntime.InferenceSession(onnx_path)
sess.set_providers(['CUDAExecutionProvider'], [ {'device_id': 0}])
input_name = sess.get_inputs()[0].name
output_name = sess.get_outputs()[0].name
# onnxruntime inference
xx = x.cpu().numpy().astype(np.float32)
result = sess.run([output_name], {input_name:xx}) # x is input
print(result[0].shape)
# MSE
mse = np.mean((out_pt.data.cpu().numpy() - result[0]) ** 2)
print(mse)
输出结果
torch.Size([2, 1000])
(2, 1000)
9.275762e-13