从头实现YOLO V3：利用Tensorflow-Keras，含YOLO模型结构图（详细）

置顶 AIIIIZ

已于 2022-06-15 00:04:51 修改

阅读量4.9k

点赞数 3

分类专栏：深度学习目标检测 YOLO 文章标签：目标检测 YOLO YOLO-V3 Tensorflow Keras

于 2019-07-02 21:21:39 首次发布

本文链接：https://blog.csdn.net/aaa_aaa1sdf/article/details/94484328

版权

深度学习同时被 3 个专栏收录

1 篇文章

订阅专栏

目标检测

1 篇文章

订阅专栏

YOLO

1 篇文章

订阅专栏

前言

本次利用Tensorflow-Keras实现YOLO-V3模型，目的是自己动手实现YOLO-V3的重要结构，这样才能更为深入了解模型以及提升编程能力；略去了不太重要的结构，如tiny版本就没有继续实现；
重点研究了模型结构，在levio作图的基础上增加了各层的参数设置，以图将模型展示的更加清晰明了；
实现了较为细节也很重要的非最大值抑制、loss函数、如何生成true label等；
另外做了些测试：图像目标检测、视频实时目标检测测试；
详细代码步骤见：GitHub - Teslaxhub/YOLO_V3_Tensorflow_Keras_from_scratch: YOLO-V3 implementation step by step using Tensorflow-Keras from scratch with Structure Detail Figure 含测试样例；
转图请留言并注明出处，谢谢；
如有错误，欢迎指正。

部分代码

import tensorflow as tf
import tensorflow.keras.backend as K
import keras
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import ZeroPadding2D
from tensorflow.keras.layers import Add
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import UpSampling2D
from tensorflow.keras.layers  import Flatten, Concatenate
from tensorflow.keras import Model, Input
from tensorflow.keras.losses import binary_crossentropy

import numpy as np
import pandas as pd

import cv2 as cv
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFont

import os
from time import time

def preprocesing_image(file_path):
    '''
    处理流程：读取 --> resize至模型的输入尺寸 --> 归一化为[0,1]--> 升维,增加batch维度
    
    Returns: 
    --------
    img_resized_array: shape(1,416,416,3)
    '''
    
    
    img = Image.open(file_path)
    img_resized = img.resize((416,416))
    img_resized_array = np.asarray(img_resized)
    img_resized_array = img_resized_array / 255.
    img_resized_array = np.expand_dims(img_resized_array, 0)
    
    return img_resized_array


def load_anchors(file_path):
    '''
    加载anchors文件
    
    Returns：
    -------
    anchors: shape(9,2)
    '''
    with open(file_path) as f:
        anchors = f.readline()
    anchors = np.array([int(item.strip()) for item in anchors.split(',')]).reshape([-1,2])
    
    return anchors




def load_class_name(file_path):
    '''
    加载类别文件
    
    Returns：
    -------
    class_name: 类型为list
    '''
    with open(file_path) as f:
        class_name = f.readlines()
    class_name = [item.strip() for item in class_name]
    
    return class_name



def sigmoid(x):
    y = 1 / (1+np.exp(-x))
    return y





def iou(box1, box2):
    
    '''
    box1、box2: xmin,ymin,xmax,ymax
    
    Returns
    -------
    iou_score: float,shape(0)

    '''
    
    intersect_x_min = np.max([box1[0], box2[0]])
    intersect_y_min = np.max([box1[1], box2[1]])
    intersect_x_max = np.min([box1[2], box2[2]])
    intersect_y_max = np.min([box1[3], box2[3]])
    
    intersect_area = (intersect_x_max - intersect_x_min) * (intersect_y_max - intersect_y_min)
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
    iou_score = intersect_area / (box1_area + box2_area - intersect_area)
    
    return iou_score





def iou_batch_array(box1, box2):
    
    '''
    Parameters
    ----------
    box: shape(batch,n,n,3,4)
    
    
    Returns
    --------
    iou_score: shape(batch,n,n,3)
    '''
    intersect_x_min = K.maximum(box1[..., 0], box2[..., 0])
    intersect_y_min = K.maximum(box1[..., 1], box2[..., 1])
    intersect_x_max = K.minimum(box1[..., 2], box2[..., 2])
    intersect_y_max = K.minimum(box1[..., 3], box2[..., 3])
    
    
    intersect_area = (intersect_x_max - intersect_x_min) * (intersect_y_max - intersect_y_min)
    
    box1_area = (box1[..., 2] - box1[..., 0]) * (box1[..., 3] - box1[..., 1])
    box2_area = (box2[..., 2] - box2[..., 0]) * (box2[..., 3] - box2[..., 1])
    
    iou_score = intersect_area / (box1_area + box2_area - intersect_area)
    
    
    return iou_score




def non_max_suspension(scores, boxes, iou_threshold, max_box):
    '''
    Parameters
    ----------
    scores: 某一个class类别下，shape(n,1)
    boxes: 某一个class类别下，shape(n,4)
    iou_threshold: iou阈值，剔除高于阈值的box
    max_box: TODO, 该class类别下，保留最多几个。
    
    Returns
    -------
    box_collect: 某个类别下，做非最大值抑制后，保留下来的box，[(xmin,ymin,xmax,ymax,score),...]
    
    '''
    
    # 用于收集确认的box
    box_collect = []
    box_score_array = np.concatenate([boxes, scores.reshape((-1,1))], axis=-1)
    
    
    if len(scores) == 1:
        box_collect.append(box_score_array[0])
        return box_collect
    
    else:
        FLAG = 1
        while FLAG:
            # 拿分数排名第一的box和其余box计算iou
            box_score_array = box_score_array[np.argsort(-box_score_array[:,-1])]
            to_drop_row = [0] # 用于收集要删除的box
            for i in range(1, box_score_array.shape[0]):
                iou_score = iou(box_score_array[0], box_score_array[i])
                if iou_score >= iou_threshold:
                    to_drop_row.append(i)

            box_collect.append(box_score_array[0])
            box_score_array  = np.delete(box_score_array, obj=to_drop_row, axis=0)

            if len(box_score_array)>=2:
                FLAG = 1
            elif len(box_score_array)==1:
                box_collect.append(box_score_array[0])
                FLAG = 0
            else:
                FLAG = 0
            
    return box_collect