【目标检测】基于yolo3和sort的多目标检测与跟踪

最新推荐文章于 2025-01-30 01:00:00 发布

望天边星宿

最新推荐文章于 2025-01-30 01:00:00 发布

阅读量5.7k

点赞数 6

分类专栏：深度学习 Python 文章标签：算法 python 深度学习人工智能

本文链接：https://blog.csdn.net/See_Star/article/details/108804256

版权

Python 同时被 2 个专栏收录

66 篇文章

订阅专栏

深度学习

34 篇文章

订阅专栏

前言

本来是毕业论文的一部分，但是一直懒得写复现过程，不过最近又要做相关的内容了，所以重新搞了搞，简单的写一下过程。

yolo3论文：https://arxiv.org/abs/1804.02767
yolo3源码：https://github.com/qqwweee/keras-yolo3

sort论文：https://arxiv.org/abs/1602.00763
sort源码：https://github.com/abewley/sort

依赖：

目标检测：
tensorflow-gpu==1.12.0
keras-gpu==2.2.4
opencv==4.2.0
pillow==6.2.2
numpy
matplotlib

多目标跟踪：
filterpy==1.4.5
numba==0.49.0
scikit-image==0.14.0
lap==0.4.0

一、Yolo3目标检测

第一步是检测，只有对已检测的目标才能形成跟踪，检测部分采用yolov3。

【目标检测】基于YOLOv3的海上船舶目标检测分类（Tensorflow/keras）

二、Sort多目标跟踪

原理：匈牙利算法+卡尔曼滤波器，具体原理我也解释不清楚。

流程图：
在这里插入图片描述
SORT是基于检测的跟踪算法，其跟踪效果主要取决与检测结果，其跟踪准确率取决于检测准确率。

2.1 创建文件夹`Sort`

在检测算法（keras-yolo3）目录下创建sort目录。
在这里插入图片描述
input中存放待识别的视频和图片。
output中存放识别后的结果。

2.2 `sort/sort.py`

下载Sort源码，将sort源码内的sort.py文件复制到刚刚创建的sort目录下。并进行修改：

注释掉第26行：from skimage import io

2.3 `sort/util.py`

同时，在该目录下创建util.py文件：

import numpy as np
import sort.sort


def delete_repeat_bbox(out_boxes, out_scores, out_classes, iou_threshold):
    to_del = []
    for i in range(0, len(out_classes) - 1):
        for j in range(i + 1, len(out_classes)):
            if (i not in to_del) and (j not in to_del):
                # bounding box 1
                y1_1, x1_1, y2_1, x2_1 = out_boxes[i]
                # bounding box 2
                y1_2, x1_2, y2_2, x2_2 = out_boxes[j]
                if sort.sort.iou([x1_1, y1_1, x2_1, y2_1], [x1_2, y1_2, x2_2, y2_2]) >= iou_threshold:
                    if out_scores[i] >= out_scores[j]:
                        to_del.append(j)
                    else:
                        to_del.append(i)

    to_del = sorted(to_del)

    for t in reversed(to_del):
        out_boxes.pop(t)
        out_scores.pop(t)
        out_classes.pop(t)

    return np.array(out_boxes), np.array(out_scores), np.array(out_classes)


def sort_image(sort_class, out_boxes, out_scores, out_classes):
    dets = []

    for i in range(0, len(out_boxes)):
        dets.append([out_boxes[i][1], out_boxes[i][0], out_boxes[i][3], out_boxes[i][2], out_scores[i], out_classes[i]])

    dets = np.array(dets)
    trackers = sort_class.update(dets)

    out_boxes = []
    out_scores = []
    out_classes = []
    object_id = []
    # d [x1,y1,x2,y2,object_id,score,type]

    for d in trackers:
        out_boxes.append(list([d[1], d[0], d[3], d[2]]))
        object_id.append(int(d[4]))
        out_scores.append(float(d[5]))
        out_classes.append(int(d[6]))

    return np.array(out_boxes), np.array(out_scores), np.array(out_classes), np.array(object_id)

2.4 主函数：yolo_sort.py

将sort与yolo中类相结合，形成新的类
注意修改86-99行的配置信息
main函数内配置跟踪视频或图片的路径

import cv2
import numpy as np
import sort.utils

from sort.sort import Sort, associate_detections_to_trackers, KalmanBoxTracker
from yolo import YOLO
from PIL import Image, ImageFont, ImageDraw
from keras import backend as K
from timeit import default_timer as timer
from yolo3.utils import letterbox_image


# 继承sort文件中的Sort类
class Sort_child(Sort):
    def __init__(self, max_age=2, min_hits=3):
        """
        Sets key parameters for SORT
        """
        self.max_age = max_age
        self.min_hits = min_hits
        self.trackers = []
        self.scores = []
        self.types = []
        self.frame_count = 0

    def update(self, dets):
        """
        Params:
        dets - a numpy array of detections in the format [[x1,y1,x2,y2,score,type],[x1,y1,x2,y2,score,type],...]
        Requires: this method must be called once for each frame even with empty detections.
        Returns the a numpy array in the format [x1,y1,x2,y2,object_id,score,type]
        NOTE: The number of objects returned may differ from the number of detections provided.
        """
        self.frame_count += 1
        # get predicted locations from existing trackers.
        trks = np.zeros((len(self.trackers), 6))
        to_del = []
        ret = []
        for t, trk in enumerate(trks):
            pos = self.trackers[t].predict()[0]
            trk[:] = [pos[0], pos[1], pos[2], pos[3], self.scores[t], self.types[t]]
            if np.any(np.isnan(pos)):
                to_del.append(t)
        trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
        for t in reversed(to_del):
            self.trackers.pop(t)
            self.scores.pop(t)
            self.types.pop(t)
        matched, unmatched_dets, unmatched_trks = associate_detections_to_trackers(dets, trks)

        # update matched trackers with assigned detections
        for t, trk in enumerate(self.trackers):
            if t not in unmatched_trks:
                d = matched[np.where(matched[:, 1] == t)[0], 0]
                trk.update(dets[d, :][0])
                self.scores[t] = dets[d, :][0][4]
                self.types[t] = dets[d, :][0][5]

        # create and initialise new trackers for unmatched detections
        for i in unmatched_dets:
            trk = KalmanBoxTracker(dets[i, 0:5])
            self.trackers.append(trk)
            self.scores.append(dets[i, :][4])
            self.types.append(dets[i, :][5])
        i = len(self.trackers)
        for trk in reversed(self.trackers):
            pos = trk.get_state()[0]
            i -= 1
            if (trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits):
                ret.append(np.concatenate((pos, [trk.id + 1], [self.scores[i]], [self.types[i]])).reshape(1,
                                                                                                          -1))  # +1 as MOT benchmark requires positive
            # remove dead tracklet
            if trk.time_since_update > self.max_age:
                self.trackers.pop(i)
                self.scores.pop(i)
                self.types.pop(i)

        if len(ret) > 0:
            return np.concatenate(ret)
        else:
            return np.empty((0, 5))


# 继承yolo中的YOLO类
class yolo_child(YOLO):
    _defaults = {
        "model_path": 'model/001/trained_weights_final.h5',  # 模型
        "anchors_path": 'model_data/yolo_anchors.txt',  
        "classes_path": 'model_data/my_classes.txt',
        "score": 0.3,
        "iou": 0.45,
        "model_image_size": (416, 416),
        "gpu_num": 1,
        # 新配置
        "tracker": True,
        "write_to_file": True,
        "output_path": 'sort/output/',
        "repeat_iou": 0.95,
    }

    def __init__(self, **kwargs):
        super(yolo_child, self).__init__()
        self.__dict__.update(self._defaults)
        self.frame = 1
        self.mot_tracker = Sort_child()

    def detect_image(self, image, fo=None):
        start = timer()

        if self.model_image_size != (None, None):
            assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required'
            assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required'
            boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size)))
        else:
            new_image_size = (image.width - (image.width % 32),
                              image.height - (image.height % 32))
            boxed_image = letterbox_image(image, new_image_size)
        image_data = np.array(boxed_image, dtype='float32')

        print(image_data.shape)
        image_data /= 255.
        image_data = np.expand_dims(image_data, 0)  # Add batch dimension.

        out_boxes, out_scores, out_classes = self.sess.run(
            [self.boxes, self.scores, self.classes],
            feed_dict={
                self.yolo_model.input: image_data,
                self.input_image_shape: [image.size[1], image.size[0]],
                K.learning_phase(): 0
            })

        # delete repeat bbox
        out_boxes, out_scores, out_classes = \
            sort.utils.delete_repeat_bbox(list(out_boxes), list(out_scores), list(out_classes), self.repeat_iou)

        # open or close tracker
        if self.tracker and (self.mot_tracker is not None):
            out_boxes, out_scores, out_classes, object_id = \
                sort.utils.sort_image(self.mot_tracker, out_boxes, out_scores, out_classes)
        else:
            object_id = np.concatenate(np.zeros((1, len(out_boxes))))

        # write to file
        if self.write_to_file:
            for i in reversed(range(0, len(out_boxes))):
                result = [self.frame, object_id[i], out_boxes[i][0], out_boxes[i][1],
                          abs(out_boxes[i][2] - out_boxes[i][0]), abs(out_boxes[i][3] - out_boxes[i][1]), out_scores[i],
                          -1, -1, -1]
                fo.write(', '.join(map(str, result)))
                fo.write('\n')

        print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
		# 'font/times.ttf'
        font = ImageFont.truetype(font='font/FiraMono-Medium.otf',
                    size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
        thickness = (image.size[0] + image.size[1]) // 300

        for i, c in reversed(list(enumerate(out_classes))):
            predicted_class = self.class_names[c]
            box = out_boxes[i]
            score = out_scores[i]
            id = int(object_id[i])

            # bounding box
            top, left, bottom, right = box
            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
            right = min(image.size[0], np.floor(right + 0.5).astype('int32'))

            label = '{} {:.2f} id:{}'.format(predicted_class, score, id)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)

            print(label, (left, top), (right, bottom))

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            # My kingdom for a good redistributable image drawing library.
            for i in range(thickness):
                draw.rectangle(
                    [left + i, top + i, right - i, bottom - i],
                    outline=self.colors[c])
            draw.rectangle(
                [tuple(text_origin), tuple(text_origin + label_size)],
                fill=self.colors[c])
            draw.text(text_origin, label, fill=(0, 0, 0), font=font)
            del draw

        end = timer()
        print('process time:', end - start, 's')
        self.frame = self.frame + 1
        return image


# dectect_video 在detect image的基础上
def detect_video(yolo, video_path, output_path=""):
    vid = cv2.VideoCapture(video_path)
    if not vid.isOpened():
        raise IOError("Couldn't open webcam or video")
    video_FourCC    = int(vid.get(cv2.CAP_PROP_FOURCC))
    video_fps       = vid.get(cv2.CAP_PROP_FPS)
    video_size      = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)),
                        int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    isOutput = True if output_path != "" else False
    if isOutput:
        print("!!! TYPE:", type(output_path), type(video_FourCC), type(video_fps), type(video_size))
        out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size)
    accum_time = 0
    curr_fps = 0
    fps = "FPS: ??"
    prev_time = timer()


    if yolo.write_to_file:
        emptyFile = open(yolo.output_path + 'result.dat', 'w')
    else:
        emptyFile = None

    while True:
        return_value, frame = vid.read()
        try:
            image = Image.fromarray(frame)
        except AttributeError:
            break
        image = yolo.detect_image(image, emptyFile)
        result = np.asarray(image)
        curr_time = timer()
        exec_time = curr_time - prev_time
        prev_time = curr_time
        curr_fps = 1./exec_time
        fps = "FPS: " + str(curr_fps)

        cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                    fontScale=0.50, color=(255, 0, 0), thickness=2)
        cv2.namedWindow("result", cv2.WINDOW_NORMAL)
        cv2.imshow("result", result)
        if isOutput:
            out.write(result)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    if yolo.write_to_file:
        emptyFile.close()
    yolo.close_session()




if __name__ == '__main__':
    yolo_child = yolo_child()

    # detect and track base on image
    if False:
        image_name = '000887.jpg'   # 图片目录:sort/input/
        image_path = 'sort/input/'
        image = Image.open(image_path+image_name)

        output = open('sort/output/result.dat', 'w')
        r_image = yolo_child.detect_image(image, output)
        r_image.save(yolo_child.output_path + image_name)

    # detect and track base on video
    if True:
        video_path = 'sort/input/video10.mp4'
        output = 'sort/output/video10.mp4'
        detect_video(yolo_child, video_path, output)