# -*- coding: utf-8 -*-
from tqdm import tqdm
import pandas as pd
import time
import torch
import torchvision
import numpy as np
import onnxruntime as ort
def save_onnx(path, device):
dummy_input = torch.randn(3, 3, 224, 224).to(device)
model = torchvision.models.alexnet(pretrained=False).to(device)
input_names = ["actual_input_1"] + ["learned_%d" % i for i in range(16)]
output_names = ["output1"]
torch.onnx.export(model, dummy_input, path, verbose=False, input_names=input_names, output_names=output_names,
dynamic_axes={'actual_input_1': [0], 'output1': [0]})
# print(model)
return model
def torch_t(model, batch_size, device):
s = time.time()
input_ = torch.randn(batch_size, 3, 224, 224).to(device)
for i in range(epoch):
model(input_)
cost = time.time() - s
return round(cost, 2)
def onnx_t_fun(path, batch_size, device):
if device == 'cpu':
ort_session = ort.InferenceSession(path, providers=['CPUExecutionProvider'])
else:
ort_session = ort.InferenceSession(path, providers=['CUDAExecutionProvider'])
s = time.time()
input_ = np.random.randn(batch_size, 3, 224, 224).astype(np.float32)
for i in range(epoch):
ort_session.run(
None,
{"actual_input_1": input_},
)
cost = time.time() - s
return round(cost, 2)
if __name__ == '__main__':
batch_size_list = [2 ** i for i in range(8)]
device_list = ['cpu', torch.device(0)]
tuples = [(device, batch_size) for device in device_list for batch_size in batch_size_list]
index = pd.MultiIndex.from_tuples(tuples)
epoch = 100
path = "alexnet.onnx"
df = pd.DataFrame(columns=['torch', 'onnx'], index=index)
for batch_size in tqdm(batch_size_list):
for device in device_list:
model = save_onnx(path, device)
cost_torch = torch_t(model, batch_size, device)
cost_onnx = onnx_t_fun(path, batch_size, device)
df.loc[(device, batch_size), :] = [cost_torch, cost_onnx]
print(df)
torch onnx
cpu 1 1.2 0.89
2 2.47 1.15
4 2.86 1.68
8 3.67 2.7
16 5.93 4.72
32 9.47 8.85
64 17.33 17.26
128 32.77 34.27
cuda:0 1 0.07 0.42
2 0.08 0.65
4 0.12 0.6
8 0.2 0.27
16 0.24 0.4
32 0.38 0.89
64 0.69 1.72
128 1.26 3.12
可以发现cpu时,onnx在batch_size较小时,速度能提升部分,随着batch_size变大,越来越慢
gpu上反而变慢了,不知道是否合理
cpu查询命令为cat /proc/cpuinfo | grep 'model name' |uniq
结果为 Intel(R) Core(TM) i7-9700K CPU @ 3.60GHz