简介
YOLOv8是一个广泛使用的目标检测、图像分割工具,笔者在机器人感知任务中也曾有所应用。目前深度学习的部署框架也有很多,各有优势,这里针对三个常用的部署框架:PyTorch、ONNXRuntime、OpenVINO对YOLOv8的推理进行一个资源占用和推理效率的比较。
依赖
PyTorch2.0、ONNXRuntime、OpenVINO
笔者的算力较低:NVIDIA GeForce MX250,2G显存
YOLOv8项目直接提供PyTorch、ONNXRuntime、OpenVINO推理引擎文件
代码
PyTorch推理
import time
import os
from ultralytics import YOLO
# Load a pretrained YOLOv8n model
model = YOLO('/home/lz/yolov5/runs/train/best_20230921switch.pt')
inference_time = []
# Define path to the image file
source_dir = '/home/lz/yolov5/data/robot_arm_vision/fps_test'
sources = [f for f in os.listdir(source_dir)]
for source in sources:
image = os.path.join(source_dir, source)
start_time = time.time()
results = model(image, imgsz=1280) # list of Results objects
end_time = time.time()
inference_time.append(end_time-start_time)
average_inference_time = sum(inference_time) / len(inference_time)
print(f"Inference time is {average_inference_time} seconds")
fps = 1.0 / average_inference_time
print(f"FPS is {fps}")
ONNXRuntime推理
import time
import os
import onnxruntime
from PIL import Image
import numpy as np
import torchvision.transforms as transforms
# 设置图像转换
transform = transforms.Compose([
transforms.Resize((736, 1280)),
transforms.ToTensor(),
])
# 使用ONNX Runtime进行推理
ort_session = onnxruntime.InferenceSession("/home/lz/yolov5/runs/train/best_20230921switch.onnx", providers=["CUDAExecutionProvider"])
input_name = ort_session.get_inputs()[0].name
output_name = ort_session.get_outputs()[0].name
inference_time = []
# Define path to the image file
source_dir = '/home/lz/yolov5/data/robot_arm_vision/fps_test'
sources = [f for f in os.listdir(source_dir)]
for source in sources:
image = os.path.join(source_dir, source)
img = Image.open(image).convert('RGB')
image_tensor = transform(img).unsqueeze(0)
image_np = np.array(img)
start_time = time.time()
# 进行推理
result = ort_session.run([output_name], {input_name: image_tensor.numpy()})
end_time = time.time()
print(result)
inference_time.append(end_time-start_time)
average_inference_time = sum(inference_time) / len(inference_time)
print(f"Inference time is {average_inference_time} seconds")
fps = 1.0 / average_inference_time
print(f"FPS is {fps}")
OpenVINO推理
import time
import os
from PIL import Image
import numpy as np
from openvino.inference_engine import IECore
import cv2
ie = IECore()
net = ie.read_network(model="/home/lz/yolov5/runs/train/best_20230921switch_openvino_model/best_20230921switch.xml", weights="/home/lz/yolov5/runs/train/best_20230921switch_openvino_model/best_20230921switch.bin")
# 加载推理插件
exec_net = ie.load_network(network=net, device_name="CPU") # 可根据需要更改设备名称
inference_time = []
# Define path to the image file
source_dir = '/home/lz/yolov5/data/robot_arm_vision/fps_test'
sources = [f for f in os.listdir(source_dir)]
for source in sources:
image = os.path.join(source_dir, source)
img = Image.open(image).convert('RGB')
image_array = np.array(img)
# 预处理
input_blob = next(iter(net.input_info))
n, c, h, w = net.input_info[input_blob].input_data.shape
image_resized = cv2.resize(image_array, (w, h))
image_resized = image_resized.transpose((2, 0, 1)) # 转换通道顺序
image_resized = image_resized.reshape((n, c, h, w))
start_time = time.time()
# 进行推理
result = exec_net.infer(inputs={input_blob: image_resized})
end_time = time.time()
print(result)
inference_time.append(end_time-start_time)
average_inference_time = sum(inference_time) / len(inference_time)
print(f"Inference time is {average_inference_time} seconds")
fps = 1.0 / average_inference_time
print(f"FPS is {fps}")