1.创建一个lat.py (名字可以自己取)
2.修改自己模型yaml的路径 例如我这里是
r'D:\Desktop\v10-11\ultralytics\cfg\models\v8\yolov8-ADown.yaml',
3.相关环境要已经装好,保证显卡是可以用的,这里就不多赘述了
4.导入源码
import warnings
warnings.filterwarnings('ignore')
from ultralytics import YOLO
import os
import torch
import time
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
def measure_latency(model, input_size, num_iterations=100):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# 生成归一化的随机输入
dummy_input = torch.rand(1, 3, input_size, input_size).to(device)
# 禁用输出
model.verbose = False
# 预热
for _ in range(10):
_ = model(dummy_input)
# 测量延迟
start_time = time.time()
for _ in range(num_iterations):
with torch.no_grad():
_ = model(dummy_input)
end_time = time.time()
avg_latency = (end_time - start_time) / num_iterations
return avg_latency * 1000 # 转换为毫秒
def main():
yolo_versions = [
# r'D:\Desktop\v10-11\ultralytics\cfg\models\v8\yolov8.yaml',
r'D:\Desktop\v10-11\ultralytics\cfg\models\v8\yolov8-ADown.yaml',
]
input_size = 640 # YOLO的标准输入大小
print(f"设备: {torch.device('cuda' if torch.cuda.is_available() else 'cpu')}")
print(f"输入大小: {input_size}x{input_size}")
print("模型\t\t延迟 (ms)")
print("-" * 30)
for version in yolo_versions:
model = YOLO(version)
latency = measure_latency(model, input_size)
model_name = os.path.basename(version).split('.')[0] # 提取文件名,去掉扩展名
print(f"{model_name}\t\t{latency:.2f}")
if __name__ == "__main__":
main()
5.手动点击运行即可 或者在终端那边运行.
运行结果如下:
从中选了两个简单的模型测试:可以看到加了新的轻量化Adown下采样模块(从yolov9引入的)之后 推理时间变短了