YOLOV8 + 双目实现三维跟踪测速
相关文章
1. YOLOv5+双目测距(python)
2. YOLOv7+双目测距(python)
3. YOLOv8+双目测距(python)
下载链接:https://github.com/up-up-up-up/yolov8-stereo-speed—(求STAR)
1. 相关工作
本代码是在双目测距的基础上完成的,具体内容可见: YOLOV8 + 双目测距
2. 测速流程和原理(必读)
(1)速度是根据提取的前后两帧的三维坐标位置计算的,位置感知的准确度会对其造成影响。此外计算值的准确度还与GPU 速度相关
(2)自己做这一块时候走了一些弯路,一开始看到官方有个估计速度的代码,按照 使用Ultralytics YOLOv8 🚀 进行速度估计 ,他的大体思想是画一根线,然后计算前后帧在x方向上的坐标差,然后除以帧率计算速度
(3)但是尝试修改,把三维坐标设置为全局变量传进去,后续运行不知道哪里出问题了,一直报错,也没改出来,下边把自己改的代码贴出来,有会改的也可以按照这个思路继续改一下
# Ultralytics YOLO 🚀, AGPL-3.0 license
import math
from collections import defaultdict
from time import time
import cv2
import numpy as np
from ultralytics.utils.checks import check_imshow
from ultralytics.utils.plotting import Annotator, colors
import gol
class SpeedEstimator:
"""A class to estimation speed of objects in real-time video stream based on their tracks."""
def __init__(self):
"""Initializes the speed-estimator class with default values for Visual, Image, track and speed parameters."""
# Visual & im0 information
self.im0 = None
self.annotator = None
self.view_img = False
# Region information
# self.reg_pts = [(20, 400), (1260, 400)]
self.region_thickness = 3
# Predict/track information
self.clss = None
self.names = None
self.boxes = None
self.trk_ids = None
self.trk_pts = None
self.line_thickness = 2
self.trk_history = defaultdict(list)
# Speed estimator information
self.current_time = 0
self.dist_data = {}
self.trk_idslist = []
self.spdl_dist_thresh = 10
self.trk_previous_times = {}
self.trk_previous_points = {}
# Check if environment support imshow
self.env_check = check_imshow(warn=True)
def set_args(
self,
# reg_pts,
names,
view_img=False,
line_thickness=2,
region_thickness=5,
spdl_dist_thresh=10,
):
"""
Configures the speed estimation and display parameters.
Args:
reg_pts (list): Initial list of points defining the speed calculation region.
names (dict): object detection classes names
view_img (bool): Flag indicating frame display
line_thickness (int): Line thickness for bounding boxes.
region_thickness (int): Speed estimation region thickness
spdl_dist_thresh (int): Euclidean distance threshold for speed line
"""
# if reg_pts is None:
# print("Region points not provided, using default values")
# else:
# self.reg_pts = reg_pts
self.names = names
self.view_img = view_img
self.line_thickness = line_thickness
self.region_thickness = region_thickness
self.spdl_dist_thresh = spdl_dist_thresh
def extract_tracks(self, tracks):
self.boxes = tracks[0].boxes.xyxy.cpu()
self.clss = tracks[0].boxes.cls.cpu().tolist()
self.trk_ids = tracks[0].boxes.id.int().cpu().tolist()
def store_track_info(self, track_id, box):
track = self.trk_history[track_id]
bbox_center = (float((box[0] + box[2]) / 2), float((box[1] + box[3]) / 2))
x_center = (box[0] + box[2]) / 2
y_center = (box[1] + box[3]) / 2
# points_3d = gol.get_value('points_3d')
# a = points_3d[int(y_center), int(x_center), 0] / 1000
# b = points_3d[int(y_center), int(x_center), 1] / 1000
# c = points_3d[int(y_center), int(x_center), 2] / 1000
a = gol.get_value('a')
b = gol.get_value('b')
c = gol.get_value('c')
bbox_center_3d = (float(a), float(b), float(c))
# track.append(bbox_center)
track.append(bbox_center_3d)
if len(track) > 30:
track.pop(0)
self.trk_pts = np.hstack(track).astype(np.int32).reshape((-1, 1, 3)) # 修改
return track
def plot_box_and_track(self, track_id, box, cls, track):
speed_label = f"{int(self.dist_data[track_id])}km/ph" if track_id in self.dist_data else self.names[int(cls)]
bbox_color = colors(int(track_id)) if track_id in self.dist_data else (255, 0, 255)
self.annotator.box_label(box, speed_label, bbox_color)
# cv2.polylines(self.im0, [self.trk_pts], isClosed=False, color=(0, 255, 0), thickness=1)
# cv2.circle(self.im0, (int(track[-1][0]), int(track[-1][1])), 5, bbox_color, -1)
# 绘制轨迹
for i in range(len(track) - 1):
cv2.line(self.im0, (int(track[i][0]), int(track[i][1])), (int(track[i + 1][0]), int(track[i + 1][1])),
(0, 255, 0), 1)
# 绘制最后一个点
cv2.circle(self.im0, (int(track[-1][0]), int(track[-1][1])), 5, bbox_color, -1)
def calculate_speed(self, trk_id, track):
if self.trk_previous_times[trk_id] != 0 and trk_id not in self.trk_idslist:
self.trk_idslist.append(trk_id)
time_difference = time() - self.trk_previous_times[trk_id]
if time_difference > 0:
dist_difference_0 = np.abs(track[-1][0] - self.trk_previous_points[trk_id][0])
dist_difference_1 = np.abs(track[-1][1] - self.trk_previous_points[trk_id][1])
dist_difference_2 = np.abs(track[-1][2] - self.trk_previous_points[trk_id][2])
dist_difference = math.sqrt(dist_difference_0 ** 2 + dist_difference_1 ** 2 + dist_difference_2 ** 2)
speed = dist_difference / time_difference
self.dist_data[trk_id] = speed
self.trk_previous_times[trk_id] = time()
self.trk_previous_points[trk_id] = track[-1]
def estimate_speed(self, im0, tracks, region_color=(255, 0, 0)):
self.im0 = im0
if tracks[0].boxes.id is None:
if self.view_img and self.env_check:
self.display_frames()
return im0
self.extract_tracks(tracks)
self.annotator = Annotator(self.im0, line_width=2)
# self.annotator.draw_region(reg_pts=self.reg_pts, color=region_color, thickness=self.region_thickness)
for box, trk_id, cls in zip(self.boxes, self.trk_ids, self.clss):
track = self.store_track_info(trk_id, box) # 将返回的轨迹信息存储在 track 中
if trk_id not in self.trk_previous_times:
self.trk_previous_times[trk_id] = 0
self.plot_box_and_track(trk_id, box, cls, track)
self.calculate_speed(trk_id, track) # 计算速度
if self.view_img and self.env_check:
self.display_frames()
return im0
def display_frames(self):
"""Display frame."""
cv2.imshow("Ultralytics Speed Estimation", self.im0)
if cv2.waitKey(1) & 0xFF == ord("q"):
return
if __name__ == "__main__":
SpeedEstimator()
(4)上述代码改不同于是换了了个思路,在主代码修改,使用跟踪方式获取前后帧的三维位置信息,把前一帧三维坐标存储在一个名为previous_positions
列表中,如果有下一帧位置出现,计算两者之间坐标差,除以时间,然后更新列表previous_positions
,将新一帧坐标存储进去
(5)本代码可能计算效果不是很好,但是思想是对的,希望大家多多学习一下计算思想,有问题可以讨论
3. 代码部分解析
3.1 测距部分
这一部分我用了多线程加快速度,计算目标检测框中心点的深度值
config = stereoconfig_040_2.stereoCamera()
map1x, map1y, map2x, map2y, Q = getRectifyTransform(720, 1280, config)
thread = MyThread(stereo_threading, args=(config, im0, map1x, map1y, map2x, map2y, Q))
thread.start()
results = model.predict(im0, save=False, conf=0.5)
annotated_frame = results[0].plot()
boxes = results[0].boxes.xywh.cpu()
for i, box in enumerate(boxes):
# for box, class_idx in zip(boxes, classes):
x_center, y_center, width, height = box.tolist()
x1 = x_center - width / 2
y1 = y_center - height / 2
x2 = x_center + width / 2
y2 = y_center + height / 2
if (0 < x2 < 1280):
thread.join()
points_3d = thread.get_result()
a = points_3d[int(y_center), int(x_center), 0] / 1000
b = points_3d[int(y_center), int(x_center), 1] / 1000
c = points_3d[int(y_center), int(x_center), 2] / 1000
distance = ((a ** 2 + b ** 2 + c ** 2) ** 0.5)
3.2 测速部分
previous_positions = {}
tracks = model.track(im0, persist=True)
bbox_center_3d = (float(a), float(b), float(c))
current_position = (a, b, c)
if i not in previous_positions:
previous_positions[i] = current_position
continue
tracks.append(bbox_center_3d)
previous_x, previous_y, previous_z = previous_positions[i]
displacement = ((a - previous_x) ** 2 + (b - previous_y) ** 2 + + (c - previous_z) ** 2) ** 0.5
end_time = time.time()
run_time = end_time-start_time
speed = displacement / run_time
print(run_time)
# 更新上一帧位置
previous_positions[i] = current_position
# 在这里使用速度进行你想要的操作,比如打印出来
# print("目标 {} 的速度为: {} m/s".format(i, speed))
print("速度为: {} m/s".format(speed))
3.3 绘制轨迹
track_history = defaultdict(lambda: [])
track = track_history[track_id]
track.append((float(x_center), float(y_center))) # x, y center point
if len(track) > 30: # retain 90 tracks for 90 frames
track.pop(0)
# Draw the tracking lines
points = np.hstack(track).astype(np.int32).reshape((-1, 1, 2))
cv2.polylines(annotated_frame, [points], isClosed=False, color=(0, 0, 255), thickness=3)
3.4 主代码
版本一:stereo-speed.py(绘制轨迹)
(1)加入了多线程处理,加快处理速度
(2)具体帧率开始和结束时间怎么设定看你自己,官方设定是model.track后进行speed_estimation是开始时间,计算速度结束后为结束时间,中间的时间差就是他们用于计算速度的时间。但是我感觉应该用在model.track前,因为这是整个一帧所用的时间
import cv2
import torch
import argparse
from ultralytics import YOLO
from stereo import stereoconfig_040_2
from stereo.stereo import stereo_40
from stereo.stereo import stereo_threading, MyThread
from stereo.dianyuntu_yolo import preprocess, undistortion, getRectifyTransform, draw_line, rectifyImage, \
stereoMatchSGBM
def main():
cap = cv2.VideoCapture('ultralytics/assets/a2.mp4')
model = YOLO('yolov8n.pt')
cv2.namedWindow('00', cv2.WINDOW_NORMAL)
cv2.resizeWindow('00', 1280, 360) # 设置宽高
out_video = cv2.VideoWriter('output.avi', cv2.VideoWriter_fourcc(*'XVID'), 30, (2560, 720))
while True:
ret, im0 = cap.read()
if not ret:
print("Video frame is empty or video processing has been successfully completed.")
break
# img = cv2.cvtColor(image_net, cv2.COLOR_BGRA2BGR)
im0 = cv2.resize(im0, (2560, 720))
config = stereoconfig_040_2.stereoCamera()
map1x, map1y, map2x, map2y, Q = getRectifyTransform(720, 1280, config)
thread = MyThread(stereo_threading, args=(config, im0, map1x, map1y, map2x, map2y, Q))
thread.start()
results = model.track(im0, save=False, conf=0.5)
annotated_frame = results[0].plot()
boxes = results[0].boxes.xywh.cpu()
for i, box in enumerate(boxes):
# for box, class_idx in zip(boxes, classes):
x_center, y_center, width, height = box.tolist()
x1 = x_center - width / 2
y1 = y_center - height / 2
x2 = x_center + width / 2
y2 = y_center + height / 2
print('x2',x2)
if (0 < x2 < 1280):
thread.join()
points_3d = thread.get_result()
# gol.set_value('points_3d', points_3d)
a = points_3d[int(y_center), int(x_center), 0] / 1000
b = points_3d[int(y_center), int(x_center), 1] / 1000
c = points_3d[int(y_center), int(x_center), 2] / 1000
distance = ((a ** 2 + b ** 2 + c ** 2) ** 0.5)
if (distance != 0):
text_dis_avg = "dis:%0.2fm" % distance
cv2.putText(annotated_frame, text_dis_avg, (int(x2 + 5), int(y1 + 30)), cv2.FONT_ITALIC, 1.2,
(0, 255, 255), 3)
cv2.imshow('00', annotated_frame)
out_video.write(annotated_frame)
key = cv2.waitKey(1)
if key == 'q':
break
out_video.release()
cap.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str, default='yolov8n.pt', help='model.pt path(s)')
parser.add_argument('--svo', type=str, default=None, help='optional svo file')
parser.add_argument('--img_size', type=int, default=416, help='inference size (pixels)')
parser.add_argument('--conf_thres', type=float, default=0.4, help='object confidence threshold')
opt = parser.parse_args()
with torch.no_grad():
main()
版本二:stereo-speed-n.py(不绘制轨迹)
import time
import cv2
from ultralytics import YOLO
from stereo import stereoconfig_040_2
from stereo.stereo import stereo_40
from stereo.stereo import stereo_threading, MyThread
from stereo.dianyuntu_yolo import preprocess, undistortion, getRectifyTransform, draw_line, rectifyImage, \
stereoMatchSGBM
def detect():
model = YOLO("yolov8n.pt")
cv2.namedWindow('Speed Estimation', cv2.WINDOW_NORMAL)
cv2.resizeWindow('Speed Estimation', 1280, 360) # 设置宽高
cap = cv2.VideoCapture('ultralytics/assets/a2.mp4')
out_video = cv2.VideoWriter('output.avi', cv2.VideoWriter_fourcc(*'XVID'), 30, (2560, 720))
previous_positions = {}
assert cap.isOpened(), "Error reading video file"
while cap.isOpened():
success, im0 = cap.read()
if not success:
print("Video frame is empty or video processing has been successfully completed.")
break
# start_time = time.time()
config = stereoconfig_040_2.stereoCamera()
map1x, map1y, map2x, map2y, Q = getRectifyTransform(720, 1280, config)
thread = MyThread(stereo_threading, args=(config, im0, map1x, map1y, map2x, map2y, Q))
thread.start()
start_time = time.time()
tracks = model.track(im0, persist=True)
annotated_frame = tracks[0].plot()
boxes = tracks[0].boxes.xywh.cpu()
for i, box in enumerate(boxes):
x_center, y_center, width, height = box.tolist()
x1 = x_center - width / 2
y1 = y_center - height / 2
x2 = x_center + width / 2
y2 = y_center + height / 2
if (0 < x2 < 1280):
thread.join()
points_3d = thread.get_result()
a = points_3d[int(y_center), int(x_center), 0] / 1000
b = points_3d[int(y_center), int(x_center), 1] / 1000
c = points_3d[int(y_center), int(x_center), 2] / 1000
distance = ((a ** 2 + b ** 2 + c ** 2) ** 0.5)
bbox_center_3d = (float(a), float(b), float(c))
current_position = (a, b, c)
if i not in previous_positions:
previous_positions[i] = current_position
continue
tracks.append(bbox_center_3d)
previous_x, previous_y, previous_z = previous_positions[i]
displacement = ((a - previous_x) ** 2 + (b - previous_y) ** 2 + + (c - previous_z) ** 2) ** 0.5
end_time = time.time()
run_time = end_time-start_time
speed = displacement / run_time
print(run_time)
# 更新上一帧位置
previous_positions[i] = current_position
# 在这里使用速度进行你想要的操作,比如打印出来
# print("目标 {} 的速度为: {} m/s".format(i, speed))
print("速度为: {} m/s".format(speed))
if (distance != 0):
text_dis_avg = "dis:%0.2fm" % distance
cv2.putText(annotated_frame, text_dis_avg, (int(x2 + 5), int(y1 + 30)), cv2.FONT_ITALIC, 1.0,
(0, 255, 255), 2)
text_speed = "v:%0.2fm/s" % speed
cv2.putText(annotated_frame, text_speed, (int(x2 + 5), int(y1 + 60)), cv2.FONT_ITALIC, 1.0,
(255, 255, 0), 2)
cv2.imshow("Speed Estimation", annotated_frame)
out_video.write(annotated_frame)
if cv2.waitKey(1) == ord('q'):
break
out_video.release()
cap.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
detect()
3.4 调用摄像头测速
如果想打开相机,直接把cap = cv2.VideoCapture('ultralytics/assets/a2.mp4')
改成cap = cv2.VideoCapture(0)
即可
4. 实验结果
可实现测距、跟踪和分割功能,实现不同功能仅需修改以下代码,具体见 此篇文章