前言
- 本次利用Tensorflow-Keras实现YOLO-V3模型,目的是自己动手实现YOLO-V3的重要结构,这样才能更为深入了解模型以及提升编程能力;略去了不太重要的结构,如tiny版本就没有继续实现;
- 重点研究了模型结构,在levio作图的基础上增加了各层的参数设置,以图将模型展示的更加清晰明了;
- 实现了较为细节也很重要的非最大值抑制、loss函数、如何生成true label等;
- 另外做了些测试:图像目标检测、视频实时目标检测测试;
- 详细代码步骤见:GitHub - Teslaxhub/YOLO_V3_Tensorflow_Keras_from_scratch: YOLO-V3 implementation step by step using Tensorflow-Keras from scratch with Structure Detail Figure 含测试样例;
- 转图请留言并注明出处,谢谢;
- 如有错误,欢迎指正。
部分代码
import tensorflow as tf
import tensorflow.keras.backend as K
import keras
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import ZeroPadding2D
from tensorflow.keras.layers import Add
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import UpSampling2D
from tensorflow.keras.layers import Flatten, Concatenate
from tensorflow.keras import Model, Input
from tensorflow.keras.losses import binary_crossentropy
import numpy as np
import pandas as pd
import cv2 as cv
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFont
import os
from time import time
def preprocesing_image(file_path):
'''
处理流程:读取 --> resize至模型的输入尺寸 --> 归一化为[0,1]--> 升维,增加batch维度
Returns:
--------
img_resized_array: shape(1,416,416,3)
'''
img = Image.open(file_path)
img_resized = img.resize((416,416))
img_resized_array = np.asarray(img_resized)
img_resized_array = img_resized_array / 255.
img_resized_array = np.expand_dims(img_resized_array, 0)
return img_resized_array
def load_anchors(file_path):
'''
加载anchors文件
Returns:
-------
anchors: shape(9,2)
'''
with open(file_path) as f:
anchors = f.readline()
anchors = np.array([int(item.strip()) for item in anchors.split(',')]).reshape([-1,2])
return anchors
def load_class_name(file_path):
'''
加载类别文件
Returns:
-------
class_name: 类型为list
'''
with open(file_path) as f:
class_name = f.readlines()
class_name = [item.strip() for item in class_name]
return class_name
def sigmoid(x):
y = 1 / (1+np.exp(-x))
return y
def iou(box1, box2):
'''
box1、box2: xmin,ymin,xmax,ymax
Returns
-------
iou_score: float,shape(0)
'''
intersect_x_min = np.max([box1[0], box2[0]])
intersect_y_min = np.max([box1[1], box2[1]])
intersect_x_max = np.min([box1[2], box2[2]])
intersect_y_max = np.min([box1[3], box2[3]])
intersect_area = (intersect_x_max - intersect_x_min) * (intersect_y_max - intersect_y_min)
box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
iou_score = intersect_area / (box1_area + box2_area - intersect_area)
return iou_score
def iou_batch_array(box1, box2):
'''
Parameters
----------
box: shape(batch,n,n,3,4)
Returns
--------
iou_score: shape(batch,n,n,3)
'''
intersect_x_min = K.maximum(box1[..., 0], box2[..., 0])
intersect_y_min = K.maximum(box1[..., 1], box2[..., 1])
intersect_x_max = K.minimum(box1[..., 2], box2[..., 2])
intersect_y_max = K.minimum(box1[..., 3], box2[..., 3])
intersect_area = (intersect_x_max - intersect_x_min) * (intersect_y_max - intersect_y_min)
box1_area = (box1[..., 2] - box1[..., 0]) * (box1[..., 3] - box1[..., 1])
box2_area = (box2[..., 2] - box2[..., 0]) * (box2[..., 3] - box2[..., 1])
iou_score = intersect_area / (box1_area + box2_area - intersect_area)
return iou_score
def non_max_suspension(scores, boxes, iou_threshold, max_box):
'''
Parameters
----------
scores: 某一个class类别下,shape(n,1)
boxes: 某一个class类别下,shape(n,4)
iou_threshold: iou阈值,剔除高于阈值的box
max_box: TODO, 该class类别下,保留最多几个。
Returns
-------
box_collect: 某个类别下,做非最大值抑制后,保留下来的box,[(xmin,ymin,xmax,ymax,score),...]
'''
# 用于收集确认的box
box_collect = []
box_score_array = np.concatenate([boxes, scores.reshape((-1,1))], axis=-1)
if len(scores) == 1:
box_collect.append(box_score_array[0])
return box_collect
else:
FLAG = 1
while FLAG:
# 拿分数排名第一的box和其余box计算iou
box_score_array = box_score_array[np.argsort(-box_score_array[:,-1])]
to_drop_row = [0] # 用于收集要删除的box
for i in range(1, box_score_array.shape[0]):
iou_score = iou(box_score_array[0], box_score_array[i])
if iou_score >= iou_threshold:
to_drop_row.append(i)
box_collect.append(box_score_array[0])
box_score_array = np.delete(box_score_array, obj=to_drop_row, axis=0)
if len(box_score_array)>=2:
FLAG = 1
elif len(box_score_array)==1:
box_collect.append(box_score_array[0])
FLAG = 0
else:
FLAG = 0
return box_collect
测试部分
见github
参考及致谢:
yolo系列之yolo v3【深度解析】_木盏的博客-CSDN博客_yolo3
GitHub - qqwweee/keras-yolo3: A Keras implementation of YOLOv3 (Tensorflow backend)