进行Yolov8模型结构修改的时候,模型的配置文件如何被解析对我们来说十分重要,下面将对此进行讲解。
def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
# Parse a YOLO model.yaml dictionary
if verbose:
LOGGER.info(f"\n{'':>3}{'from':>20}{'n':>3}{'params':>10} {'module':<45}{'arguments':<30}")
# 模型的配置以字典的形式传入,分别获取类别数,缩放模块重复次数,缩放模型通道数,以及模型的激活函数。
# d.get('activation') 如果没有activation键值返回None
nc, gd, gw, act = d['nc'], d['depth_multiple'], d['width_multiple'], d.get('activation')
if act:
# 修改模型的激活函数
# eval()函数常被称为评估函数,它的功能是去掉参数最外侧引号,变成python可执行的语句,并执行语句的函数。
Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU()
if verbose:
LOGGER.info(f"{colorstr('activation:')} {act}") # print
# ch 存储每个layer的输入通道数
ch = [ch]
# layers将每个layer存储进列表,save存储当前layer用到前面layer的index,c2每个layer的输出通道数
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
# 遍历backbone和head组成的列表生成网络
for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
# 首先判断layer的名字是否为字符串,如果是字符串则通过eval()转换为函数
m = eval(m) if isinstance(m, str) else m # eval strings
# 遍历layer的参数
for j, a in enumerate(args):
with contextlib.suppress(NameError):
# 如果参数类型为'int'则通过eval()转换为int;如果是nc,配置文件定义在前面定义nc:80,则将其转换为80;其他参数去掉最外侧引号
args[j] = eval(a) if isinstance(a, str) else a # eval strings
# 如果重复次数大于1则缩放模块重复次数,并且重复次数最小为1;如果重复系数为1,不操作
n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain
if m in {
Classify, Conv, ConvTranspose, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, Focus,
BottleneckCSP, C1, C2, C2f, C3, C3TR, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x}:
# c1为layer的输入通道数,c2为layer的输出通道数
c1, c2 = ch[f], args[0]
# 判断layer的输出通道数是否为类别数,如果是类别数则不进行缩放;如果不是,则进行缩放
if c2 != nc: # if c2 not equal to number of classes (i.e. for Classify() output)
c2 = make_divisible(c2 * gw, 8)
'''
# 返回最近的可被除数整除的x
def make_divisible(x, divisor):
if isinstance(divisor, torch.Tensor):
divisor = int(divisor.max()) # to int
return math.ceil(x / divisor) * divisor
'''
# 将layer需要的所有参数组成一个列表,将在后面将列表作为layer的参数传出layer
args = [c1, c2, *args[1:]]
if m in {BottleneckCSP, C1, C2, C2f, C3, C3TR, C3Ghost, C3x}:
args.insert(2, n) # number of repeats
n = 1
elif m is nn.BatchNorm2d:
args = [ch[f]]
elif m is Concat:
# Concat操作的输出通道数为输入通道数之和
c2 = sum(ch[x] for x in f)
elif m in {Detect, Segment}:
# 检测和分割头的关于通道的参数是将传入特征图的通道数组成列表
args.append([ch[x] for x in f])
if m is Segment:
args[2] = make_divisible(args[2] * gw, 8)
else:
c2 = ch[f]
# 如果重复次数大于1,则将重复的操作组成一个Sequential
m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
# 获取模块的类型(class),如models.commom.Conv
t = str(m)[8:-2].replace('__main__.', '') # module type
# 获取模块的参数量
m.np = sum(x.numel() for x in m_.parameters()) # number params
# m_.i:当前layer的索引, m_.f:当前layer的输入来自于那些layer的索引,m_.type:当前layer的类型
m_.i, m_.f, m_.type = i, f, t # attach index, 'from' index, type
if verbose:
LOGGER.info(f'{i:>3}{str(f):>20}{n_:>3}{m.np:10.0f} {t:<45}{str(args):<30}') # print
# 将当前layer用到的前面layer的index进行存储
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
# 将当前layer加入layers
layers.append(m_)
if i == 0:
ch = []
ch.append(c2)
return nn.Sequential(*layers), sorted(save)