1、导出onnx文件
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = torch.load("model_data/last_model.pth") # pytorch模型加载
batch_size = 1 # 批处理大小
input_shape = (3, 224, 224) # 输入数据,改成自己的输入shape
model.eval()
x = torch.randn(batch_size, *input_shape) # 生成张量
x = x.to(device)
export_onnx_file = "model.onnx" # 目的ONNX文件名
torch.onnx.export(model,
x,
export_onnx_file,
opset_version=10,
do_constant_folding=True, # 是否执行常量折叠优化
input_names=["input"], # 输入名
output_names=["output"], # 输出名
dynamic_axes={"input": {0: "batch_size"}, # 批处理变量
"output": {0: "batch_size"}})
2、测试onnx文件输出
import onnxruntime as ort
ort_session = ort.InferenceSession('resnet50.onnx')
outputs = ort_session.run(None, {'input': test_arr})
print('onnx result:', outputs[0])
3、onnx文件转为xml文件和bin文件
from openvino.runtime import Core
from openvino.runtime import serialize
ie = Core()
onnx_model_path = "model.onnx"
model_onnx = ie.read_model(model=onnx_model_path)
compiled_model_onnx = ie.compile_model(model=model_onnx, device_name="CPU")
serialize(model=model_onnx, xml_path="model.xml", bin_path="model.bin",
version="UNSPECIFIED")