yolo通常是通过配置文件来构建模型的。
例如:yolov9的模型配置文件yolov9.yaml内容如下:
# YOLOv9
# parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
# anchors
anchors: 3
# YOLOv9 backbone
backbone:
# [from, repeats, module, args]
- [-1, 1, Silence, []] # 0 # conv down
- [-1, 1, Conv, [64, 3, 2]] # 1-P1/2 # conv down
- [-1, 1, Conv, [128, 3, 2]] # 2-P2/4
- [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]] # 3 # elan-1 block
- [-1, 1, Conv, [256, 3, 2]] # 4-P3/8 # conv down
- [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]] # 5 # elan-2 block
- [-1, 1, Conv, [512, 3, 2]] # 6-P4/16 # conv down
- [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]] # 7 # elan-2 block
- [-1, 1, Conv, [512, 3, 2]] # 8-P5/32 # conv down
- [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]] # 9 # elan-2 block
# YOLOv9 head
head:
# [from, repeats, module, args]
- [-1, 1, SPPELAN, [512, 256]] # 10 # elan-spp block
- [-1, 1, nn.Upsample, [None, 2, 'nearest']] # # up-concat merge
- [[-1, 7], 1, Concat, [1]] # P4 # cat backbone
- [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]] # 13 # elan-2 block
- [-1, 1, nn.Upsample, [None, 2, 'nearest']] # # up-concat merge
- [[-1, 5], 1, Concat, [1]] # P3 # cat backbone
- [-1, 1, RepNCSPELAN4, [256, 256, 128, 1]] # 16 (P3/8-small) # elan-2 block
- [-1, 1, Conv, [256, 3, 2]] # # conv-down merge
- [[-1, 13], 1, Concat, [1]] # P4 # cat head
- [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]] # 19 (P4/16-medium) # elan-2 block
- [-1, 1, Conv, [512, 3, 2]] # # conv-down merge
- [[-1, 10], 1, Concat, [1]] # # cat head P5
- [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]] # 22 (P5/32-large) # elan-2 block
- [5, 1, CBLinear, [[256]]] # 23 # routing
- [7, 1, CBLinear, [[256, 512]]] # 24
- [9, 1, CBLinear, [[256, 512, 512]]] # 25
- [0, 1, Conv, [64, 3, 2]] # 26-P1/2 # conv down
- [-1, 1, Conv, [128, 3, 2]] # 27-P2/4 # conv down
- [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]] # 28 # elan-1 block
- [-1, 1, Conv, [256, 3, 2]] # 29-P3/8 # conv down fuse
- [[23, 24, 25, -1], 1, CBFuse, [[0, 0, 0]]] # 30
- [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]] # 31 # elan-2 block
- [-1, 1, Conv, [512, 3, 2]] # 32-P4/16 # conv down fuse
- [[24, 25, -1], 1, CBFuse, [[1, 1]]] # 33
- [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]] # 34 # elan-2 block
- [-1, 1, Conv, [512, 3, 2]] # 35-P5/32 # conv down fuse
- [[25, -1], 1, CBFuse, [[2]]] # 36
- [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]] # 37 # elan-2 block
- [[31, 34, 37, 16, 19, 22], 1, DualDDetect, [nc]] # DualDDetect(A3, A4, A5, P3, P4, P5) # detect
它构建时,解析的代码如下:
from kit import LOGGER
from kit.utils.torch_utils import make_divisible
from kit.utils.commom import colorstr
# 导入通用的层
from .layer import *
# 导入特殊的层,通常是模型的最后一层
from .yolo import (Detect, DualDetect, TripleDetect, DDetect, DualDDetect, TripleDDetect, Segment)
class ParseModel():
def __init__(self, cfg: dict, ich:int = 3) -> None:
"""
cfg: 模型配置
ich: 输入通道数
"""
self.cfg = cfg
self.ich = ich
self.anchors = cfg['anchors']
self.nc = cfg['nc']
self.gd = cfg['depth_multiple']
self.gw = cfg['width_multiple']
self.act = cfg.get('activation')
# redefine default activation, i.e. Conv.default_act = nn.SiLU()
if self.act:
Conv.default_act = eval(self.act)
LOGGER.info(f"{colorstr('activation:')} {self.act}")
# number of anchors
self.na = (len(self.anchors[0]) // 2) if isinstance(self.anchors, list) else self.anchors
# number of outputs = anchors * (classes + 5)
self.no = self.na * (self.nc + 5)
def parse(self):
""" 解析 """
layers, save = [], [] # layers, savelist
LOGGER.info(f"\n{'index':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}") # 打印
ichannels = [self.ich]
# 解析每一层
# from, number, module, args
for i, (f, number, module, args) in enumerate(self.cfg['backbone'] + self.cfg['head']):
# 解析表达式
# 从字符串转到类
module = eval(module) if isinstance(module, str) else module # 从字符串转到类
# 从字符串转到变量或者参数值
for j, arg in enumerate(args):
if isinstance(arg, str):
# print(arg)
if arg == "None":
args[j] = None
if arg == "nc":
args[j] = self.nc
# depth gain 相同层堆叠数量
number = max(round(number * self.gd), 1) if number > 1 else number
# 每一次的输入c1,输出c2
if module in {Conv, AConv, ConvTranspose, Bottleneck, SPP, SPPF, DWConv, BottleneckCSP,
nn.ConvTranspose2d, DWConvTranspose2d, SPPCSPC, ADown, RepNCSPELAN4, SPPELAN}:
c1, c2 = ichannels[f], args[0]
if c2 != self.no: # if not output
c2 = make_divisible(c2 * self.gw, 8)
args = [c1, c2, *args[1:]]
if number in {BottleneckCSP, SPPCSPC}:
args.insert(2, number) # number of repeats
number = 1
elif module is nn.BatchNorm2d:
args = [ichannels[f]]
elif module is Concat:
c2 = sum(ichannels[x] for x in f)
elif module is Shortcut:
c2 = ichannels[f[0]]
elif module is ReOrg:
c2 = ichannels[f] * 4
elif module is CBLinear:
c2 = args[0]
c1 = ichannels[f]
args = [c1, c2, *args[1:]]
elif module is CBFuse:
c2 = ichannels[f[-1]]
# TODO: channel, gw, gd
elif module in {Detect, DualDetect, TripleDetect, DDetect, DualDDetect, TripleDDetect, Segment}:
args.append([ichannels[x] for x in f])
# if isinstance(args[1], int): # number of anchors
# args[1] = [list(range(args[1] * 2))] * len(f)
if module in {Segment}:
args[2] = make_divisible(args[2] * self.gw, 8)
elif module is Contract:
c2 = ichannels[f] * args[0] ** 2
elif module is Expand:
c2 = ichannels[f] // args[0] ** 2
else:
c2 = ichannels[f]
# 实例类对象
m_ = nn.Sequential(*(module(*args) for _ in range(number))) if number > 1 else module(*args) # module
t = str(module)[8:-2].replace('__main__.', '') # module type
np = sum(x.numel() for x in m_.parameters()) # number params
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
LOGGER.info(f'{i:>3}{str(f):>18}{number:>3}{np:10.0f} {t:<40}{str(args):<30}') # print
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
layers.append(m_)
if i == 0:
ichannels = []
ichannels.append(c2)
# 使用nn.Sequential把模型一层一层的串起来
return nn.Sequential(*layers), sorted(save)
如果有用,请点个三连呗 点赞、关注、收藏
。
你的鼓励是我最大的动力