从头实现YOLO V3:利用Tensorflow-Keras,含YOLO模型结构图(详细)

前言

  • 本次利用Tensorflow-Keras实现YOLO-V3模型,目的是自己动手实现YOLO-V3的重要结构,这样才能更为深入了解模型以及提升编程能力;略去了不太重要的结构,如tiny版本就没有继续实现;
  • 重点研究了模型结构,在levio作图的基础上增加了各层的参数设置,以图将模型展示的更加清晰明了;
  • 实现了较为细节也很重要的非最大值抑制、loss函数、如何生成true label等;
  • 另外做了些测试:图像目标检测、视频实时目标检测测试;
  • 详细代码步骤见:GitHub - Teslaxhub/YOLO_V3_Tensorflow_Keras_from_scratch: YOLO-V3 implementation step by step using Tensorflow-Keras from scratch with Structure Detail Figure   含测试样例;
  • 转图请留言并注明出处,谢谢;
  • 如有错误,欢迎指正。

 

部分代码

import tensorflow as tf
import tensorflow.keras.backend as K
import keras
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import ZeroPadding2D
from tensorflow.keras.layers import Add
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import UpSampling2D
from tensorflow.keras.layers  import Flatten, Concatenate
from tensorflow.keras import Model, Input
from tensorflow.keras.losses import binary_crossentropy

import numpy as np
import pandas as pd

import cv2 as cv
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFont

import os
from time import time
def preprocesing_image(file_path):
    '''
    处理流程:读取 --> resize至模型的输入尺寸 --> 归一化为[0,1]--> 升维,增加batch维度
    
    Returns: 
    --------
    img_resized_array: shape(1,416,416,3)
    '''
    
    
    img = Image.open(file_path)
    img_resized = img.resize((416,416))
    img_resized_array = np.asarray(img_resized)
    img_resized_array = img_resized_array / 255.
    img_resized_array = np.expand_dims(img_resized_array, 0)
    
    return img_resized_array


def load_anchors(file_path):
    '''
    加载anchors文件
    
    Returns:
    -------
    anchors: shape(9,2)
    '''
    with open(file_path) as f:
        anchors = f.readline()
    anchors = np.array([int(item.strip()) for item in anchors.split(',')]).reshape([-1,2])
    
    return anchors




def load_class_name(file_path):
    '''
    加载类别文件
    
    Returns:
    -------
    class_name: 类型为list
    '''
    with open(file_path) as f:
        class_name = f.readlines()
    class_name = [item.strip() for item in class_name]
    
    return class_name



def sigmoid(x):
    y = 1 / (1+np.exp(-x))
    return y





def iou(box1, box2):
    
    '''
    box1、box2: xmin,ymin,xmax,ymax
    
    Returns
    -------
    iou_score: float,shape(0)

    '''
    
    intersect_x_min = np.max([box1[0], box2[0]])
    intersect_y_min = np.max([box1[1], box2[1]])
    intersect_x_max = np.min([box1[2], box2[2]])
    intersect_y_max = np.min([box1[3], box2[3]])
    
    intersect_area = (intersect_x_max - intersect_x_min) * (intersect_y_max - intersect_y_min)
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
    iou_score = intersect_area / (box1_area + box2_area - intersect_area)
    
    return iou_score





def iou_batch_array(box1, box2):
    
    '''
    Parameters
    ----------
    box: shape(batch,n,n,3,4)
    
    
    Returns
    --------
    iou_score: shape(batch,n,n,3)
    '''
    intersect_x_min = K.maximum(box1[..., 0], box2[..., 0])
    intersect_y_min = K.maximum(box1[..., 1], box2[..., 1])
    intersect_x_max = K.minimum(box1[..., 2], box2[..., 2])
    intersect_y_max = K.minimum(box1[..., 3], box2[..., 3])
    
    
    intersect_area = (intersect_x_max - intersect_x_min) * (intersect_y_max - intersect_y_min)
    
    box1_area = (box1[..., 2] - box1[..., 0]) * (box1[..., 3] - box1[..., 1])
    box2_area = (box2[..., 2] - box2[..., 0]) * (box2[..., 3] - box2[..., 1])
    
    iou_score = intersect_area / (box1_area + box2_area - intersect_area)
    
    
    return iou_score




def non_max_suspension(scores, boxes, iou_threshold, max_box):
    '''
    Parameters
    ----------
    scores: 某一个class类别下,shape(n,1)
    boxes: 某一个class类别下,shape(n,4)
    iou_threshold: iou阈值,剔除高于阈值的box
    max_box: TODO, 该class类别下,保留最多几个。
    
    Returns
    -------
    box_collect: 某个类别下,做非最大值抑制后,保留下来的box,[(xmin,ymin,xmax,ymax,score),...]
    
    '''
    
    # 用于收集确认的box
    box_collect = []
    box_score_array = np.concatenate([boxes, scores.reshape((-1,1))], axis=-1)
    
    
    if len(scores) == 1:
        box_collect.append(box_score_array[0])
        return box_collect
    
    else:
        FLAG = 1
        while FLAG:
            # 拿分数排名第一的box和其余box计算iou
            box_score_array = box_score_array[np.argsort(-box_score_array[:,-1])]
            to_drop_row = [0] # 用于收集要删除的box
            for i in range(1, box_score_array.shape[0]):
                iou_score = iou(box_score_array[0], box_score_array[i])
                if iou_score >= iou_threshold:
                    to_drop_row.append(i)

            box_collect.append(box_score_array[0])
            box_score_array  = np.delete(box_score_array, obj=to_drop_row, axis=0)

            if len(box_score_array)>=2:
                FLAG = 1
            elif len(box_score_array)==1:
                box_collect.append(box_score_array[0])
                FLAG = 0
            else:
                FLAG = 0
            
    return box_collect

测试部分

见github

参考及致谢:

yolo系列之yolo v3【深度解析】_木盏的博客-CSDN博客_yolo3

GitHub - qqwweee/keras-yolo3: A Keras implementation of YOLOv3 (Tensorflow backend)

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值