lite-mono 使用工业数据集kitti 进行训练,目的使用单目摄像头实现物体深度预测,关于kitti数据集的介绍和下载参考
另一篇介绍lite-mono的帖子可以提前看看
本文主要尝试把模型转化为onnx, 再利用这个单眼深度估计模型Lite-Mono进行推理
权重的下载
wget -O weights/lite-mono_640x192.zip 'https://surfdrive.surf.nl/files/index.php/s/CUjiK221EFLyXDY/download'
wget -O weights/lite-mono-small_640x192.zip 'https://surfdrive.surf.nl/files/index.php/s/8cuZNH1CkNtQwxQ/download'
wget -O weights/lite-mono-tiny_640x192.zip 'https://surfdrive.surf.nl/files/index.php/s/TFDlF3wYQy0Nhmg/download'
wget -O weights/lite-mono-8m_640x192.zip 'https://surfdrive.surf.nl/files/index.php/s/UlkVBi1p99NFWWI/download'
wget -O weights/lite-mono_1024x320.zip 'https://surfdrive.surf.nl/files/index.php/s/IK3VtPj6b5FkVnl/download'
wget -O weights/lite-mono-small_1024x320.zip 'https://surfdrive.surf.nl/files/index.php/s/w8mvJMkB1dP15pu/download'
wget -O weights/lite-mono-tiny_1024x320.zip 'https://surfdrive.surf.nl/files/index.php/s/myxcplTciOkgu5w/download'
wget -O weights/lite-mono-8m_1024x320.zip 'https://surfdrive.surf.nl/files/index.php/s/mgonNFAvoEJmMas/download'
解压
unzip weights/lite-mono_640x192.zip -d weights
unzip weights/lite-mono-small_640x192.zip -d weights
unzip weights/lite-mono-tiny_640x192.zip -d weights
unzip weights/lite-mono-8m_640x192.zip -d weights
unzip weights/lite-mono_1024x320.zip -d weights
unzip weights/lite-mono-small_1024x320.zip -d weights
unzip weights/lite-mono-tiny_1024x320.zip -d weights
unzip weights/lite-mono-8m_1024x320.zip -d weights
Lite-Mono 模型加载
import os
import torch
from torchvision import transforms, datasets
import networks
def load_network(model='lite-mono', load_weights_folder=None, device='cuda'):
device = torch.device('cuda')
encoder_path = os.path.join(load_weights_folder, 'encoder.pth')
decoder_path = os.path.join(load_weights_folder, 'depth.pth')
encoder_dict = torch.load(encoder_path)
decoder_dict = torch.load(decoder_path)
feed_height = encoder_dict['height']
feed_width = encoder_dict['width']
encoder = networks.LiteMono(
model=model,
height=feed_height,
width=feed_width,
)
model_dict = encoder.state_dict()
encoder.load_state_dict(
{k: v
for k, v in encoder_dict.items() if k in model_dict})
encoder.to(device)
encoder.eval()
depth_decoder = networks.DepthDecoder(encoder.num_ch_enc, scales=range(3))
depth_model_dict = depth_decoder.state_dict()
depth_decoder.load_state_dict(
{k: v
for k, v in decoder_dict.items() if k in depth_model_dict})
depth_decoder.to(device)
depth_decoder.eval()
return encoder, depth_decoder
ONNX 模型变换方法
def convert_to_onnx(
input_shape=(640, 192),
output_dir='',
encoder=None,
decoder=None,
device='cpu',
):
os.makedirs(output_dir, exist_ok=True)
# encoder
input_image = torch.randn(1, 3, input_shape[1], input_shape[0]).to(device)
input_layer_names = ['input_image']
output_layer_names = ['features']
torch.onnx.export(
encoder,
input_image,
file_name + '/encoder.onnx',
verbose=True,
input_names=input_layer_names,
output_names=output_layer_names,
do_constant_folding=False,
opset_version=13,
)
# decoder
encoder_results = encoder(input_image)
features = []
features.append(torch.randn(*list(encoder_results[0].shape)).to(device))
features.append(torch.randn(*list(encoder_results[1].shape)).to(device))
features.append(torch.randn(*list(encoder_results[2].shape)).to(device))
input_layer_names = ['features_1', 'features_2', 'features_3']
output_layer_names = ['depth']
torch.onnx.export(
decoder,
features,
file_name + '/decoder.onnx',
verbose=True,
input_names=input_layer_names,
output_names=output_layer_names,
do_constant_folding=False,
opset_version=13,
)
onnx格式变换
file_name = 'lite-mono_640x192'
input_shape = (640, 192)
model='lite-mono'
load_weights_folder = 'weights/' + file_name
encoder, decoder = load_network(model, load_weights_folder)
convert_to_onnx(
input_shape=input_shape,
output_dir=file_name,
encoder=encoder,
decoder=decoder,
device='cuda:0',
)
file_name = 'lite-mono-small_640x192'
input_shape = (640, 192)
model='lite-mono-small'
load_weights_folder = 'weights/' + file_name
encoder, decoder = load_network(model, load_weights_folder)
convert_to_onnx(
input_shape=input_shape,
output_dir=file_name,
encoder=encoder,
decoder=decoder,
device='cuda:0',
)
file_name = 'lite-mono-tiny_640x192'
input_shape = (640, 192)
model='lite-mono-tiny'
load_weights_folder = 'weights/' + file_name
encoder, decoder = load_network(model, load_weights_folder)
convert_to_onnx(
input_shape=input_shape,
output_dir=file_name,
encoder=encoder,
decoder=decoder,
device='cuda:0',
)
file_name = 'lite-mono-8m_640x192'
input_shape = (640, 192)
model='lite-mono-8m'
load_weights_folder = 'weights/' + file_name
encoder, decoder = load_network(model, load_weights_folder)
convert_to_onnx(
input_shape=input_shape,
output_dir=file_name,
encoder=encoder,
decoder=decoder,
device='cuda:0',
)
file_name = 'lite-mono_1024x320'
input_shape = (1024, 320)
model='lite-mono'
load_weights_folder = 'weights/' + file_name
encoder, decoder = load_network(model, load_weights_folder)
convert_to_onnx(
input_shape=input_shape,
output_dir=file_name,
encoder=encoder,
decoder=decoder,
device='cuda:0',
)
file_name = 'lite-mono-small_1024x320'
input_shape = (1024, 320)
model='lite-mono-small'
load_weights_folder = 'weights/' + file_name
encoder, decoder = load_network(model, load_weights_folder)
convert_to_onnx(
input_shape=input_shape,
output_dir=file_name,
encoder=encoder,
decoder=decoder,
device='cuda:0',
)
file_name = 'lite-mono-tiny_1024x320'
input_shape = (1024, 320)
model='lite-mono-tiny'
load_weights_folder = 'weights/' + file_name
encoder, decoder = load_network(model, load_weights_folder)
convert_to_onnx(
input_shape=input_shape,
output_dir=file_name,
encoder=encoder,
decoder=decoder,
device='cuda:0',
)
file_name = 'lite-mono-8m_1024x320'
input_shape = (1024, 320)
model='lite-mono-8m'
load_weights_folder = 'weights/' + file_name
encoder, decoder = load_network(model, load_weights_folder)
convert_to_onnx(
input_shape=input_shape,
output_dir=file_name,
encoder=encoder,
decoder=decoder,
device='cuda:0',
)
使用onnx进行推理
- 视频捕获:首先初始化视频捕获设备。
- 模型加载:加载编码器和解码器的 ONNX 模型。
- 主循环:在无限循环中读取视频帧,进行推理,处理输出图像,直到按下 ESC 键。
- 资源释放:在结束时释放视频捕获资源并关闭所有窗口。
需要以下库
os:用于文件和路径操作。
copy:用于深拷贝对象。
time:用于时间测量。
argparse:用于处理命令行参数。
cv2:OpenCV库,用于图像处理。
numpy:用于处理数组和矩阵。
onnxruntime:用于运行 ONNX 模型。
def run_inference(encoder, decoder, image):
- 输入:编码器和解码器模型,以及输入图像。
- 处理:
- 预处理:调整图像大小、颜色空间转换、转置和归一化。
- 推理:将处理后的图像输入编码器和解码器,生成深度图。
- 后处理:归一化深度图并转换为
uint8
格式。
def main():
- 解析命令行参数:包括设备编号、视频文件路径和模型路径。
- 视频捕获:使用 OpenCV 从指定设备或视频文件读取帧。
- 加载模型:使用 ONNX Runtime 加载编码器和解码器模型。
- 循环处理:
- 捕获视频帧,进行深度推理,并绘制调试信息。
- 显示输入和输出图像。
def draw_debug(image, elapsed_time, depth_map):
- 输入:原始图像、推理耗时和深度图。
- 处理:
- 使用
cv.applyColorMap
为深度图应用色彩映射。 - 在调试图像上显示推理耗时。
- 使用
完整代码如下
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import copy
import time
import argparse
import cv2 as cv
import numpy as np
import onnxruntime
def run_inference(encoder, decoder, image):
# ONNX Input Size
input_size = encoder.get_inputs()[0].shape
input_width = input_size[3]
input_height = input_size[2]
# Pre process:Resize, BGR->RGB, Transpose, float32 cast
input_image = cv.resize(image, dsize=(input_width, input_height))
input_image = cv.cvtColor(input_image, cv.COLOR_BGR2RGB)
input_image = input_image.transpose(2, 0, 1)
input_image = np.expand_dims(input_image, axis=0)
input_image = input_image.astype('float32')
input_image = input_image / 255.0
# Inference
input_name = encoder.get_inputs()[0].name
features = encoder.run(None, {input_name: input_image})
input_name_01 = decoder.get_inputs()[0].name
input_name_02 = decoder.get_inputs()[1].name
input_name_03 = decoder.get_inputs()[2].name
depth_map = decoder.run(
None,
{
input_name_01: features[0],
input_name_02: features[1],
input_name_03: features[2]
},
)
# Post process
depth_map = np.squeeze(depth_map[0])
d_min = np.min(depth_map)
d_max = np.max(depth_map)
depth_map = (depth_map - d_min) / (d_max - d_min)
depth_map = depth_map * 255.0
depth_map = np.asarray(depth_map, dtype="uint8")
return depth_map
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--device", type=int, default=0)
parser.add_argument("--movie", type=str, default=None)
parser.add_argument(
"--model",
type=str,
default='model/lite-mono-tiny_640x192',
)
args = parser.parse_args()
model_dir = args.model
encoder_path = os.path.join(model_dir, 'encoder.onnx')
decoder_path = os.path.join(model_dir, 'decoder.onnx')
# Initialize video capture
cap_device = args.device
if args.movie is not None:
cap_device = args.movie
cap = cv.VideoCapture(cap_device)
# Load model
encoder = onnxruntime.InferenceSession(
encoder_path,
providers=[
'CUDAExecutionProvider',
'CPUExecutionProvider',
],
)
decoder = onnxruntime.InferenceSession(
decoder_path,
providers=[
'CUDAExecutionProvider',
'CPUExecutionProvider',
],
)
while True:
start_time = time.time()
# Capture read
ret, frame = cap.read()
if not ret:
break
debug_image = copy.deepcopy(frame)
# Inference execution
depth_map = run_inference(
encoder,
decoder,
frame,
)
elapsed_time = time.time() - start_time
# Draw
debug_image, depth_image = draw_debug(
debug_image,
elapsed_time,
depth_map,
)
key = cv.waitKey(1)
if key == 27: # ESC
break
cv.imshow('Input', debug_image)
cv.imshow('Output', depth_image)
cap.release()
cv.destroyAllWindows()
def draw_debug(image, elapsed_time, depth_map):
image_width, image_height = image.shape[1], image.shape[0]
debug_image = copy.deepcopy(image)
# Apply ColorMap
depth_image = cv.applyColorMap(depth_map, cv.COLORMAP_JET)
depth_image = cv.resize(depth_image, dsize=(image_width, image_height))
# Inference elapsed time
cv.putText(debug_image,
"Elapsed Time : " + '{:.1f}'.format(elapsed_time * 1000) + "ms",
(10, 40), cv.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0), 2,
cv.LINE_AA)
return debug_image, depth_image
window下启动测试main会有报错
cv2.error: OpenCV(4.10.0) D:\a\opencv-python\opencv-python\opencv\modules\highgui\src\window.cpp:1301: error: (-2:Unspecified error) The function is not implemented. Rebuild the library with Windows, GTK+ 2.x or Cocoa support. If you are on Ubuntu or Debian, install libgtk2.0-dev and pkg-config, then re-run cmake or configure script in function 'cvShowImage'
解决方法参考
安装下面模块
pip install opencv-contrib-python
使用一个行车记录仪视频进行测试