项目简介
项目基于deepsort和Yolov5实现了一个简单的船舶追踪算法,deepsort是一个基于检测的目标追踪算法,可以方便的替换检测算法。Yolov5是船舶检测模型,使用onnx模型进行推理,也可以替换为自己的检测模型。项目中追踪和检测分模块进行实现,方便替换自己的的检测算法模型。
项目github地址: https://github.com/wzl639/yolov5-deepsort?tab=readme-ov-file
deepsort目标追踪
DeepSORT(Deep Simple Online Realtime Tracking)是一种在线的多目标跟踪算法,它能够实现连续帧中对同一个目标的关联,并对目标进行ID分配,以便在整个视频序列中对目标进行跟踪。
项目中追踪算法的实现在deep_sort模块,通过DeepSort类实现追踪功能,DeepSort类中update方法执行轨迹的更新操作,方法接收当前帧目标检测信息,返回当前帧追踪的目标信息。
def update(self, bbox_xywh, confidences, ori_img):
"""
轨迹update
:param bbox_xywh: 目标检测框 np数组 np([[x,y,h,w], ...])
:param confidences: 目标检测框对应的置信度 np数组 np([0.8, ...])
:param ori_img: 当前帧图像,cv2读取格式
:return: 当前帧追踪到的目标,list, exp: [[x1, y1, x2, y2, track_id], ...]
"""
self.height, self.width = ori_img.shape[:2]
# generate detections
features = self._get_features(bbox_xywh, ori_img)
bbox_tlwh = self._xywh_to_tlwh(bbox_xywh)
detections = [Detection(bbox_tlwh[i], conf, features[i]) for i,conf in enumerate(confidences) if conf>self.min_confidence]
# run on non-maximum supression
boxes = np.array([d.tlwh for d in detections])
scores = np.array([d.confidence for d in detections])
indices = non_max_suppression(boxes, self.nms_max_overlap, scores)
detections = [detections[i] for i in indices]
# update tracker
self.tracker.predict()
self.tracker.update(detections)
# output bbox identities
outputs = []
for track in self.tracker.tracks:
if not track.is_confirmed() or track.time_since_update > 1:
continue
box = track.to_tlwh()
x1,y1,x2,y2 = self._tlwh_to_xyxy(box)
track_id = track.track_id
# outputs.append(np.array([x1,y1,x2,y2,track_id], dtype=np.int32))
outputs.append([x1, y1, x2, y2, track_id])
# if len(outputs) > 0:
# outputs = np.stack(outputs, axis=0)
return outputs
Yolov5目标检测
Yolov5是一种基于anchor的单阶段目标检测算法,通过卷积神经网络对图片进行特征提取,然后对图片进行密集预测,输出目标的类别和位置信息,从而将目标检测任务变成分类和回归任务。
项目中使用yolov5 onnx模型进行推理,模型是作者自己训练的船舶检测模型,也可以修改为自己的检测模型。目标检测算法的实现在detector.yolov5_onnx.py模块的Yolo5Onnx类,该类实现了onnx模型加载,数据预处理和模型后处理,以及整理图片检测流程
数据预处理:
def preprocess(self, img, size=(640, 640)):
"""
yolo数据预处理
:param img: cv2读取的图片
:param size: 模型输入尺寸
:return: 模型输入的np数组
"""
image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
image = cv2.resize(image, size)
image = image.transpose(2, 0, 1) # 调整通道顺序
image = image / 255.0 # 归一化
image = image.astype(np.float32)
image = np.expand_dims(image, axis=0) # 增加批次维度
return image
模型后处理:
def postprocess(self, pre, conf_thres, iou_thres, model_size=(640, 640), img_size=(1920, 1400), label={0: "boat"}):
"""
yolov5模型输出进行解码阈值、类别和NMS处理后的框,框的坐标还原
:param pre: np数组 模型预测输出,格式[1,25200,5+classnum] 类别概率和坐标已经进过解码
:param conf_thres: 物体置信度阈值
:param iou_thres: NMS阈值
:param model_size: 模型输入尺寸(h, w)
:param img_size: 实际图片尺寸(h, w)
:return: out_boxs: [[x1,y1,x2,y2,pro,cls]..]
"""
# objectness score置信度过滤
pre = np.squeeze(pre) # 删除为1的维度 (25200, 9)
conf = pre[..., 4] > conf_thres # […,4]:代表了取最里边一层的第4号,置信度 > conf_thres 的
box = pre[conf == True] # 根据objectness score生成(n, 4+1+numclass),只留下符合要求的框
# print('box:符合要求的框:', box.shape)
if len(box) == 0:
return []
# 通过argmax获取置信度最大的类别 (n, x,y,w,h,pro,numclass) -> (n, x,y,x,y,pro,cls)
new_box = []
all_cls = set() # 用来记录所有检测到的内别
for i in range(len(box)):
cls = np.argmax(box[i][5:])
box[i][5] = cls
new_box.append(box[i][:6]) # (x,y,w,h,cls)
all_cls.add(cls)
# 坐标形式转换 (n, (x,y,w,h,pro, cls)) -> (n, (x,y,x,y,pro, cls))
box = np.array(new_box)
box = self.xywh2xyxy(box)
# 框坐标还原到原始图片
img_scale = np.array([img_size[1] / model_size[1], img_size[0] / model_size[0],
img_size[1] / model_size[1], img_size[0] / model_size[0]])
# 分别对每个类别进行非极大抑制过滤
out_boxs = []
for cls in all_cls:
# 获取当前类别的所有框
cla_msk = box[:, 5] == cls
curr_cls_box = box[cla_msk == True]
# 当前类别框NMS
curr_out_box = self.nms(curr_cls_box, iou_thres)
# print(curr_out_box)
for k in curr_out_box:
boxx, pre, cls = curr_cls_box[k][:4] * img_scale, curr_cls_box[k][4], int(curr_cls_box[k][5])
out_boxs.append([boxx[0], boxx[1], boxx[2], boxx[3], pre, label.get(cls, cls)])
return out_boxs
图片检测流程
def detect(self, image):
# 读图预处理
# image_path = 'test.jpg'
# image = cv2.imread(image_path)
h, w, _ = image.shape
input_np = self.preprocess(image)
# onnx进行推理
pre = self.session.run(None, {self.input_name: input_np})[0] # <class 'numpy.ndarray'> (1, 25200, 15)
# print(type(pre), pre.shape)
# 推理结果解码后处理
outbox = self.postprocess(pre, 0.7, 0.3, (640, 640), (h, w))
return outbox
演示示例
main.py读取了一个船舶过境视频,然后对该船舶进行追踪。直接运行脚本,即可可视化船舶追踪过程。
def main(rid_model_path, detect_model_path, video_path):
# fourcc = cv2.VideoWriter_fourcc(*'mp4v')
# out = cv2.VideoWriter('output.mp4', fourcc, 20.0, (1920, 1080))
# 初始化追踪器
deepsort = DeepSort(model_path=rid_model_path,
max_dist=0.2, min_confidence=0.3,
nms_max_overlap=0.5, max_iou_distance=0.7,
max_age=70, n_init=3, nn_budget=100, use_cuda=True)
# 初始化检测器
model = Yolov5Onnx(detect_model_path)
# 读视频流,检测追踪,可视化
cap = cv2.VideoCapture(video_path)
cnt = 0
while True:
ret, frame = cap.read()
cnt += 1
if not ret:
break
# if cnt % 1 != 0:
# continue
frame = cv2.resize(frame, (1920, 1080))
# 当前帧做检测
det_boxes = model.detect(frame)
if len(det_boxes) == 0:
continue
# print(det_boxes)
# 结果可视化显示
bbox_xywh = []
confs = []
labels = []
for detbox in det_boxes:
x1, y1, x2, y2 = detbox[:4]
obj = [int((x1 + x2) / 2), int((y1 + y2) / 2), x2 - x1, y2 - y1]
bbox_xywh.append(obj)
confs.append(detbox[4])
labels.append(detbox[5])
xywhs = np.array(bbox_xywh)
confss = np.array(confs)
# 追踪
outputs = deepsort.update(xywhs, confss, frame) # [[ 0 77 1919 790 3]]
# print(outputs)
# 可视化追踪结果
for (x1, y1, x2, y2, track_id) in outputs:
p1, p2 = (int(x1), int(y1)), (int(x2), int(y2))
cv2.putText(frame, "track_id:" + str(track_id), p1, cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
cv2.rectangle(frame, p1, p2, (0, 255, 0), thickness=3, lineType=cv2.LINE_8)
cv2.namedWindow('show', 0)
cv2.resizeWindow('show', 1000, 600)
cv2.imshow('show', frame)
cv2.waitKey(1)
# out.write(frame)
# 释放视频捕获对象和关闭窗口
cap.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
rid_model_path = "./deep_sort/deep/checkpoint/ckpt.t7" # ReId 模型路径
detect_model_path = "./detector/boat_det_siyang.onnx" # 检测模型路径
video_path = "test.mp4" # 视频路径
main(rid_model_path, detect_model_path, video_path)