序言:由于本人最近在做目标检测的方向,对YOLO系列目标检测算法已仰慕已久,但我发现网上的教程大多比较复杂和工程化(对于新手来说很难对代码进行修改,大佬勿喷),对于刚入门或者想做一些研究的人,看着一堆代码是真的上头,说实话我最不喜欢改别人的代码,为此我想写一遍简单版的YOLO v3代码,其目的就是代码极简风格、注释多、方便阅读,我计划分为四节分别发布:
四个方面进行极简代码构建,没有冗余的代码和其他复杂的功能,今天先出网络架构。
本博客只适用于想充分了解YOLO系列算法实现细节的新手和算法研究人员,不适用于工程应用,不要说YOLO v5都出来,还搞什么YOLO v3,如果有描述错误欢迎大家留言我会及时更正,也欢迎大家的批评指正。
(一)YOLO v3 网络架构图
图片引用博客:https://blog.csdn.net/litt1e/article/details/88907542 画得非常好看给作者点赞。
下面将严格按照上述网络架构图进行代码还原,对于刚刚接触目标检测的新手,要充分理解上面的架构图的流程,再结合代码,你要相信一次看不懂,就多看几次,只要你想看懂,你就一定能看懂。
代码注解非常详细,死看就行了。
如果对你能有一些帮助,点个赞不过分叭,点赞是我继续创作动力。
(二)YOLO v3 网络架构代码详解
// An highlighted block
'''
作者:小小博
时间:2022.3.18
环境:一个睡不着的下午
'''
import torch
from numpy import unique
from torch import nn
import torch.nn.functional as F
# -------------------------------------------------------#
# 图中的DBL模块
# 一个卷积层
# 批量归一化处理
# LeakyReLU 激活函数
# -------------------------------------------------------#
class DBL_Layer(nn.Module):
def __init__(self,in_channels,out_channels,kernal_size,stride,padding):
super(DBL_Layer, self).__init__()
self.dbl = nn.Sequential(
nn.Conv2d(in_channels,out_channels,kernal_size,stride,padding),
nn.BatchNorm2d(out_channels),
nn.LeakyReLU(0.1)
)
def forward(self,x):
return self.dbl(x)
# -------------------------------------------------------#
# 图中的块Res_unit模块
# 两个 DBL_Layer层
# 操作先降低通道数一半,在提高通道数一倍 = 输入通道和输出通道不变
# 再做残差连接
# -------------------------------------------------------#
class Res_Layer(nn.Module):
def __init__(self,in_channels):
super(Res_Layer, self).__init__()
self.res_unit = nn.Sequential(
DBL_Layer(in_channels,in_channels // 2,kernal_size=1,stride=1,padding=0),
DBL_Layer(in_channels // 2,in_channels,kernal_size=3,stride=1,padding=1)
)
def forward(self, x):
return x+self.res_unit(x)
# -------------------------------------------------------#
# DownSample下采样模块
# 功能是特征图的尺寸减半卷积核 (c,w,h) ->(c,w/2,h/2)
# -------------------------------------------------------#
class DS_Layer(nn.Module):
def __init__(self,in_channels,out_channels):
super(DS_Layer, self).__init__()
self.ds = nn.Sequential(
DBL_Layer(in_channels,out_channels,kernal_size=3,stride=2,padding=1)
)
def forward(self,x):
return self.ds(x)
# -------------------------------------------------------#
# UpSample上采样模块
# 邻接上采样方式
# -------------------------------------------------------#
class US_Layer(nn.Module):
def __init__(self):
super(US_Layer, self).__init__()
def forward(self,x):
return F.interpolate(x, scale_factor=2, mode='nearest')
# -------------------------------------------------------#
# DBL5模块
# 使用5个DBL合成一个DBL5模块
# -------------------------------------------------------#
class DBL5_Layer(nn.Module):
def __init__(self,in_channel,out_channel):
super(DBL5_Layer, self).__init__()
self.dbl5 = nn.Sequential(
DBL_Layer(in_channel,out_channel,kernal_size=1,stride=1,padding=0),
DBL_Layer(out_channel,in_channel,kernal_size=3,stride=1,padding=1),
DBL_Layer(in_channel,out_channel,kernal_size=1,stride=1,padding=0),
DBL_Layer(out_channel,in_channel,kernal_size=3,stride=1,padding=1),
DBL_Layer(in_channel,out_channel,kernal_size=1,stride=1,padding=0)
)
def forward(self,x):
return self.dbl5(x)
# -------------------------------------------------------#
# Yolo v3整体网络
# 主干网了 Darknet53
# FPN特征金字塔融合结构
# 多尺度检测头(大、中、小)
# 根据网络架构图片可看到
# 五次下采样 两次上采样
# -------------------------------------------------------#
class YOLOV3(nn.Module):
def __init__(self,num_calss):
super(YOLOV3, self).__init__()
# -------------------------------------------------------#
# head52_1模块的输出结果
# 是对应 yolov3 (batch_size,52,52,255)
# 检测头一部分还未做concat和上采样
# -------------------------------------------------------#
self.head52_1 = nn.Sequential(
DBL_Layer(3,32,3,1,1), # 加入输入图像为(batch_size,3,416,416)-> (batch_size,32,416,416)
DS_Layer(32,64), # 第一次下采样 (batch_size,32,416,416)-> (batch_size,64,208,208)
Res_Layer(64), # 残差连接 (batch_size,64,416,416)-> (batch_size,32,208,208)->(batch_size,64,208,208)
DS_Layer(64,128), # 第二次下采样 (batch_size,64,208,208)-> (batch_size,128,104,104)
Res_Layer(128), # 残差连接 (batch_size,128,104,104)-> (batch_size,64,104,104)->(batch_size,128,104,104)
Res_Layer(128), # 残差连接 (batch_size,128,104,104)-> (batch_size,64,104,104)->(batch_size,128,104,104)
DS_Layer(128,256), # 第三次下采样 (batch_size,128,104,104)-> (batch_size,256,52,52)
Res_Layer(256),
Res_Layer(256),
Res_Layer(256),
Res_Layer(256),
Res_Layer(256),
Res_Layer(256),
Res_Layer(256), # 残差连接 (batch_size,256,52,52)-> (batch_size,128,52,52)->(batch_size,256,52,52)
Res_Layer(256) # 上面的八个残差块 同理
)
# -------------------------------------------------------#
# head26_1模块的输出结果
# 是对应 yolov3 (batch_size,26,26,255)
# 检测头一部分还未做concat和上采样
# -------------------------------------------------------#
self.head26_1 = nn.Sequential(
DS_Layer(256,512), # 第四次下采样 (batch_size,256,52,52)-> (batch_size,512,26,26)
Res_Layer(512),
Res_Layer(512),
Res_Layer(512),
Res_Layer(512),
Res_Layer(512),
Res_Layer(512),
Res_Layer(512), # 残差连接 (batch_size,512,26,26)-> (batch_size,256,26,26)->(batch_size,512,26,26)
Res_Layer(512) # 上面的八个残差块 同理
)
# -------------------------------------------------------#
# head13_1模块的输出结果
# 是对应 yolov3 (batch_size,26,26,255)
# 检测头一部分还未做concat和上采样
# -------------------------------------------------------#
self.head13_1 = nn.Sequential(
DS_Layer(512,1024), # 第五次下采样 (batch_size,512,26,26)-> (batch_size,1024,13,13)
Res_Layer(1024),
Res_Layer(1024),
Res_Layer(1024), # 残差连接 ((batch_size,1024,13,13)-> (batch_size,512,13,13)->(batch_size,1024,13,13)
Res_Layer(1024) # 上面的四个残差块 同理
)
# -------------------------------------------------------#
# 以上就是Darknet的五十二层,也就是yolov3主干网络的全部部分
# 下面就是关于yolov3 检测头的构建
# -------------------------------------------------------#
# self.imgSize = img_size # 输入图像尺寸
self.num_calss = num_calss # 训练数据集的类别数量
# -------------------------------------------------------#
# head13_2 模块的经过5个DBL模块
# -------------------------------------------------------#
self.head13_2 = nn.Sequential(
DBL5_Layer(1024, 512) # (batch_size,1024,13,13)-> (batch_size,512,13,13)
)
# -------------------------------------------------------#
# head26_2 模块的经过5个DBL模块
# -------------------------------------------------------#
self.head26_2 = nn.Sequential(
DBL5_Layer(768, 256) # (batch_size,768,26,26)-> (batch_size,256,26,26)
)
# -------------------------------------------------------#
# head52_2 模块的经过5个DBL模块
# -------------------------------------------------------#
self.head52_2 = nn.Sequential(
DBL5_Layer(384,128) # (batch_size,768,26,26)-> (batch_size,256,26,26)
)
# -------------------------------------------------------#
# head13_3 经过一个DBL模块再加一个一层卷积得到最终的检测头输出
# 对于CoCo 数据集有80个类 VOC 数据集 有20个类
# yolo 使用了anchor(锚框)机制 每个检测头分别负责一种尺度,每个检测头的一个点分配三个钟不同大小的anchor
# 所以最终通道数使用YOLO的格式 3种anchor,每种anchor需要表示目标的位置信息和类别信息
# 位置信息(x,y,w,h)中心点位置x、y和 对于宽高w、h 和 objectness score 置信度信息(判定是目标还是背景、目标分数)(x,y,w,h,s)
# 类别信息 (c1,c2,...cn)如果是CoCo n = 80 因此 如果是训练CoCo 数据集 最终的检测头是输出就是 (batch_size,255,13,13)
# 255 = 3*(5+80) 3个anchor 4 (x,y,w,h,s) 80 (c1,c2,...c80)
# head26_3,head52_3 模块同理
# -------------------------------------------------------#
self.head13_3 = nn.Sequential(
DBL_Layer(512,1024,3,1,1), # (batch_size,512,13,13)-> (batch_size,1024,13,13)
nn.Conv2d(1024,3*(num_calss+5),1,1,0) # (batch_size,1024,13,13)-> (batch_size,3*(num_calss+5),13,13)
)
self.head26_3 = nn.Sequential(
DBL_Layer(256,512,3,1,1),
nn.Conv2d(512,3*(num_calss+5),1,1,0)
)
self.head52_3 = nn.Sequential(
DBL_Layer(128,256,3,1,1),
nn.Conv2d(256,3*(num_calss+5),1,1,0)
)
# -------------------------------------------------------#
# head26_up 对 head13_2模块 输出的进行上采样
#
# -------------------------------------------------------#
self.head26_up = nn.Sequential(
DBL_Layer(512,256,1,1,0), # (batch_size,512,13,13)-> (batch_size,256,13,13)
US_Layer() # (batch_size,256,13,13)-> (batch_size,256,26,26)
)
# -------------------------------------------------------#
# head52_up 对 head26_2 模块 输出的进行上采样
#
# -------------------------------------------------------#
self.head52_up = nn.Sequential(
DBL_Layer(256,128,1,1,0), # (batch_size,256,26,26)-> (batch_size,128,26,26)
US_Layer() # (batch_size,128,26,26)-> (batch_size,128,52,52)
)
def forward(self,x):
# -------------------------------------------------------#
# head13 (batch_size,3*(num_calss+5),13,13)
# 大尺寸目标检测头构建 head13 检测头在网络的最深层,感受野尺度最大
# 适合检测大尺寸目标
# -------------------------------------------------------#
head52_1 = self.head52_1(x) # 13*13的检测头 经过一个DBL模块 和res1 res2 res8
head26_1 = self.head26_1(head52_1) # 13*13的检测头 再经过 res8
head13_1 = self.head13_1(head26_1) # 13*13的检测头 再经过 res4
head13_2 = self.head13_2(head13_1) # 13*13的检测头 经过 DBL5模块 这个输出将作为26*26检测头的上采样输入
head13_3 = self.head13_3(head13_2) # 13*13的检测头 最后 DBL模块和一个卷积修改通道数为255(batch_size,255,13,13)格式
# -------------------------------------------------------#
# head26 (batch_size,3*(num_calss+5),26,26)
# 中等尺寸目标检测头构建 head26 检测头在网络的中层,感受野尺度适中
# 适合检测中等尺寸目标
# -------------------------------------------------------#
# -------------------------------------------------------#
# 第一次上采样过程和拼接过程
# head26_up输出是(batch_size,256,13,13)
# head26_1是经过一个DBL模块 和res1 res2 res8 再经过 res8 输出 (batch_size,512,26,26)
# torch.cat((head26_up,head26_1), dim=1) 就是在第一纬度上进行拼接 256+512 = 768
# 输出 concat_13_26 的结果为(batch_size,768,26,26)
# -------------------------------------------------------#
head26_up = self.head26_up(head13_2)
concat_13_26 = torch.cat((head26_up,head26_1), dim=1)
head26_2 = self.head26_2(concat_13_26) # 26*26 的检测头拼接的结果 经过 DBL5模块 这个输出将作为52*52检测头的上采样输入
head26_3 = self.head26_3(head26_2) # 26*26 的检测头 最后 DBL模块和一个卷积修改通道数为255(batch_size,255,26,26)格式
# -------------------------------------------------------#
# head52 (batch_size,3*(num_calss+5),52,52)
# 中等尺寸目标检测头构建 head52 检测头在网络的浅层,感受野尺度小
# 适合检测小尺寸目标
# -------------------------------------------------------#
# -------------------------------------------------------#
# 第二次上采样过程和拼接过程
# head52_up输出是(batch_size,128,52,52)
# head52_1是经过一个DBL模块 和res1 res2 res8 输出(batch_size,256,52,52)
# torch.cat((head26_up,head26_1), dim=1) 就是在第一纬度上进行拼接 128+256 = 384
# 输出 concat_13_26 的结果为(batch_size,384,26,26)
# -------------------------------------------------------#
head52_up = self.head52_up(head26_2)
concat_26_52 = torch.cat((head52_up, head52_1), dim=1)
conval_52 = self.head52_2(concat_26_52) # 52*52 的检测头拼接的结果 经过 DBL5模块
head52_3 = self.head52_3(conval_52) # 52*52 最后 DBL模块和一个卷积修改通道数为255(batch_size,255,52,52)格式
return head13_3, head26_3, head52_3 # 最后返回三个头的输出结果
# -------------------------------------------------------#
# 网络测试
# -------------------------------------------------------#
img = torch.randn(1,3,416,416) # 模拟输入图像数据
net = YOLOV3(80) # 类别参数
for i in net(img): # 检测头输出
print(i.shape)
# -------------------------------------------------------#
# 结果
# torch.Size([1, 255, 13, 13])
# torch.Size([1, 255, 26, 26])
# torch.Size([1, 255, 52, 52])
# -------------------------------------------------------#