前言
本来是毕业论文的一部分,但是一直懒得写复现过程,不过最近又要做相关的内容了,所以重新搞了搞,简单的写一下过程。
yolo3论文:https://arxiv.org/abs/1804.02767
yolo3源码:https://github.com/qqwweee/keras-yolo3
sort论文:https://arxiv.org/abs/1602.00763
sort源码:https://github.com/abewley/sort
依赖:
目标检测:
tensorflow-gpu==1.12.0
keras-gpu==2.2.4
opencv==4.2.0
pillow==6.2.2
numpy
matplotlib
多目标跟踪:
filterpy==1.4.5
numba==0.49.0
scikit-image==0.14.0
lap==0.4.0
一、Yolo3目标检测
第一步是检测,只有对已检测的目标才能形成跟踪,检测部分采用yolov3。
【目标检测】基于YOLOv3的海上船舶目标检测分类(Tensorflow/keras)
二、Sort多目标跟踪
原理:匈牙利算法+卡尔曼滤波器,具体原理我也解释不清楚。
流程图:
SORT是基于检测的跟踪算法,其跟踪效果主要取决与检测结果,其跟踪准确率取决于检测准确率。
2.1 创建文件夹Sort
在检测算法(keras-yolo3)目录下创建sort目录。
input中存放待识别的视频和图片。
output中存放识别后的结果。
2.2 sort/sort.py
下载Sort源码,将sort源码内的sort.py
文件复制到刚刚创建的sort目录下。并进行修改:
注释掉第26行:from skimage import io
2.3 sort/util.py
同时,在该目录下创建util.py
文件:
import numpy as np
import sort.sort
def delete_repeat_bbox(out_boxes, out_scores, out_classes, iou_threshold):
to_del = []
for i in range(0, len(out_classes) - 1):
for j in range(i + 1, len(out_classes)):
if (i not in to_del) and (j not in to_del):
# bounding box 1
y1_1, x1_1, y2_1, x2_1 = out_boxes[i]
# bounding box 2
y1_2, x1_2, y2_2, x2_2 = out_boxes[j]
if sort.sort.iou([x1_1, y1_1, x2_1, y2_1], [x1_2, y1_2, x2_2, y2_2]) >= iou_threshold:
if out_scores[i] >= out_scores[j]:
to_del.append(j)
else:
to_del.append(i)
to_del = sorted(to_del)
for t in reversed(to_del):
out_boxes.pop(t)
out_scores.pop(t)
out_classes.pop(t)
return np.array(out_boxes), np.array(out_scores), np.array(out_classes)
def sort_image(sort_class, out_boxes, out_scores, out_classes):
dets = []
for i in range(0, len(out_boxes)):
dets.append([out_boxes[i][1], out_boxes[i][0], out_boxes[i][3], out_boxes[i][2], out_scores[i], out_classes[i]])
dets = np.array(dets)
trackers = sort_class.update(dets)
out_boxes = []
out_scores = []
out_classes = []
object_id = []
# d [x1,y1,x2,y2,object_id,score,type]
for d in trackers:
out_boxes.append(list([d[1], d[0], d[3], d[2]]))
object_id.append(int(d[4]))
out_scores.append(float(d[5]))
out_classes.append(int(d[6]))
return np.array(out_boxes), np.array(out_scores), np.array(out_classes), np.array(object_id)
2.4 主函数:yolo_sort.py
- 将sort与yolo中类相结合,形成新的类
- 注意修改86-99行的配置信息
- main函数内配置跟踪视频或图片的路径
import cv2
import numpy as np
import sort.utils
from sort.sort import Sort, associate_detections_to_trackers, KalmanBoxTracker
from yolo import YOLO
from PIL import Image, ImageFont, ImageDraw
from keras import backend as K
from timeit import default_timer as timer
from yolo3.utils import letterbox_image
# 继承sort文件中的Sort类
class Sort_child(Sort):
def __init__(self, max_age=2, min_hits=3):
"""
Sets key parameters for SORT
"""
self.max_age = max_age
self.min_hits = min_hits
self.trackers = []
self.scores = []
self.types = []
self.frame_count = 0
def update(self, dets):
"""
Params:
dets - a numpy array of detections in the format [[x1,y1,x2,y2,score,type],[x1,y1,x2,y2,score,type],...]
Requires: this method must be called once for each frame even with empty detections.
Returns the a numpy array in the format [x1,y1,x2,y2,object_id,score,type]
NOTE: The number of objects returned may differ from the number of detections provided.
"""
self.frame_count += 1
# get predicted locations from existing trackers.
trks = np.zeros((len(self.trackers), 6))
to_del = []
ret = []
for t, trk in enumerate(trks):
pos = self.trackers[t].predict()[0]
trk[:] = [pos[0], pos[1], pos[2], pos[3], self.scores[t], self.types[t]]
if np.any(np.isnan(pos)):
to_del.append(t)
trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
for t in reversed(to_del):
self.trackers.pop(t)
self.scores.pop(t)
self.types.pop(t)
matched, unmatched_dets, unmatched_trks = associate_detections_to_trackers(dets, trks)
# update matched trackers with assigned detections
for t, trk in enumerate(self.trackers):
if t not in unmatched_trks:
d = matched[np.where(matched[:, 1] == t)[0], 0]
trk.update(dets[d, :][0])
self.scores[t] = dets[d, :][0][4]
self.types[t] = dets[d, :][0][5]
# create and initialise new trackers for unmatched detections
for i in unmatched_dets:
trk = KalmanBoxTracker(dets[i, 0:5])
self.trackers.append(trk)
self.scores.append(dets[i, :][4])
self.types.append(dets[i, :][5])
i = len(self.trackers)
for trk in reversed(self.trackers):
pos = trk.get_state()[0]
i -= 1
if (trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits):
ret.append(np.concatenate((pos, [trk.id + 1], [self.scores[i]], [self.types[i]])).reshape(1,
-1)) # +1 as MOT benchmark requires positive
# remove dead tracklet
if trk.time_since_update > self.max_age:
self.trackers.pop(i)
self.scores.pop(i)
self.types.pop(i)
if len(ret) > 0:
return np.concatenate(ret)
else:
return np.empty((0, 5))
# 继承yolo中的YOLO类
class yolo_child(YOLO):
_defaults = {
"model_path": 'model/001/trained_weights_final.h5', # 模型
"anchors_path": 'model_data/yolo_anchors.txt',
"classes_path": 'model_data/my_classes.txt',
"score": 0.3,
"iou": 0.45,
"model_image_size": (416, 416),
"gpu_num": 1,
# 新配置
"tracker": True,
"write_to_file": True,
"output_path": 'sort/output/',
"repeat_iou": 0.95,
}
def __init__(self, **kwargs):
super(yolo_child, self).__init__()
self.__dict__.update(self._defaults)
self.frame = 1
self.mot_tracker = Sort_child()
def detect_image(self, image, fo=None):
start = timer()
if self.model_image_size != (None, None):
assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required'
assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required'
boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size)))
else:
new_image_size = (image.width - (image.width % 32),
image.height - (image.height % 32))
boxed_image = letterbox_image(image, new_image_size)
image_data = np.array(boxed_image, dtype='float32')
print(image_data.shape)
image_data /= 255.
image_data = np.expand_dims(image_data, 0) # Add batch dimension.
out_boxes, out_scores, out_classes = self.sess.run(
[self.boxes, self.scores, self.classes],
feed_dict={
self.yolo_model.input: image_data,
self.input_image_shape: [image.size[1], image.size[0]],
K.learning_phase(): 0
})
# delete repeat bbox
out_boxes, out_scores, out_classes = \
sort.utils.delete_repeat_bbox(list(out_boxes), list(out_scores), list(out_classes), self.repeat_iou)
# open or close tracker
if self.tracker and (self.mot_tracker is not None):
out_boxes, out_scores, out_classes, object_id = \
sort.utils.sort_image(self.mot_tracker, out_boxes, out_scores, out_classes)
else:
object_id = np.concatenate(np.zeros((1, len(out_boxes))))
# write to file
if self.write_to_file:
for i in reversed(range(0, len(out_boxes))):
result = [self.frame, object_id[i], out_boxes[i][0], out_boxes[i][1],
abs(out_boxes[i][2] - out_boxes[i][0]), abs(out_boxes[i][3] - out_boxes[i][1]), out_scores[i],
-1, -1, -1]
fo.write(', '.join(map(str, result)))
fo.write('\n')
print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
# 'font/times.ttf'
font = ImageFont.truetype(font='font/FiraMono-Medium.otf',
size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
thickness = (image.size[0] + image.size[1]) // 300
for i, c in reversed(list(enumerate(out_classes))):
predicted_class = self.class_names[c]
box = out_boxes[i]
score = out_scores[i]
id = int(object_id[i])
# bounding box
top, left, bottom, right = box
top = max(0, np.floor(top + 0.5).astype('int32'))
left = max(0, np.floor(left + 0.5).astype('int32'))
bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
label = '{} {:.2f} id:{}'.format(predicted_class, score, id)
draw = ImageDraw.Draw(image)
label_size = draw.textsize(label, font)
print(label, (left, top), (right, bottom))
if top - label_size[1] >= 0:
text_origin = np.array([left, top - label_size[1]])
else:
text_origin = np.array([left, top + 1])
# My kingdom for a good redistributable image drawing library.
for i in range(thickness):
draw.rectangle(
[left + i, top + i, right - i, bottom - i],
outline=self.colors[c])
draw.rectangle(
[tuple(text_origin), tuple(text_origin + label_size)],
fill=self.colors[c])
draw.text(text_origin, label, fill=(0, 0, 0), font=font)
del draw
end = timer()
print('process time:', end - start, 's')
self.frame = self.frame + 1
return image
# dectect_video 在detect image的基础上
def detect_video(yolo, video_path, output_path=""):
vid = cv2.VideoCapture(video_path)
if not vid.isOpened():
raise IOError("Couldn't open webcam or video")
video_FourCC = int(vid.get(cv2.CAP_PROP_FOURCC))
video_fps = vid.get(cv2.CAP_PROP_FPS)
video_size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)),
int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)))
isOutput = True if output_path != "" else False
if isOutput:
print("!!! TYPE:", type(output_path), type(video_FourCC), type(video_fps), type(video_size))
out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size)
accum_time = 0
curr_fps = 0
fps = "FPS: ??"
prev_time = timer()
if yolo.write_to_file:
emptyFile = open(yolo.output_path + 'result.dat', 'w')
else:
emptyFile = None
while True:
return_value, frame = vid.read()
try:
image = Image.fromarray(frame)
except AttributeError:
break
image = yolo.detect_image(image, emptyFile)
result = np.asarray(image)
curr_time = timer()
exec_time = curr_time - prev_time
prev_time = curr_time
curr_fps = 1./exec_time
fps = "FPS: " + str(curr_fps)
cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=0.50, color=(255, 0, 0), thickness=2)
cv2.namedWindow("result", cv2.WINDOW_NORMAL)
cv2.imshow("result", result)
if isOutput:
out.write(result)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
if yolo.write_to_file:
emptyFile.close()
yolo.close_session()
if __name__ == '__main__':
yolo_child = yolo_child()
# detect and track base on image
if False:
image_name = '000887.jpg' # 图片目录:sort/input/
image_path = 'sort/input/'
image = Image.open(image_path+image_name)
output = open('sort/output/result.dat', 'w')
r_image = yolo_child.detect_image(image, output)
r_image.save(yolo_child.output_path + image_name)
# detect and track base on video
if True:
video_path = 'sort/input/video10.mp4'
output = 'sort/output/video10.mp4'
detect_video(yolo_child, video_path, output)