模型初始化
模型初始化是在centerfusion/src/main.py下42行左右,debug进入到creatmodel函数中
def create_model(arch, head, head_conv, opt=None):
'''
arch:用来指定要加载的backbone 代码用的是dla_34表示加载dla34这个主干模型
head: 检测网络头的相关参数:
相关具体说明可以看:https://blog.csdn.net/gui_hai/article/details/129120671
{'hm': 10, 表示预测的类别一共有10类所以通道数为10
'reg': 2, 表示hw计算出来的bbox中心点小数偏置,xy需要预测2个数据
'wh': 2, 预测的bbox的宽高
'dep': 1, 预测bbox的深度
'rot': 8, 旋转角度 8的含义待定(没有完全搞懂这)
'dim': 3, 预测3Dbbox的长宽高
'amodel_offset': 2, 保存根据实际中心点bbox中心点小数偏置
'dep_sec': 1,
'rot_sec': 8,
'nuscenes_att': 8, bbox的状态信息 比如:移动,静止等,作者在convert_Nunscenes.py中定义8类
'velocity': 3} 表示xyz方向的速度
'''
num_layers = int(arch[arch.find('_') + 1:]) if '_' in arch else 0
arch = arch[:arch.find('_')] if '_' in arch else arch
model_class = _network_factory[arch] #加载接下来要出事化的backbone类
model = model_class(num_layers, heads=head, head_convs=head_conv, opt=opt)#创建模型
return model
首先讲述的模型的初始化部分,所以涉及到的模型类只放了模型的__init__函数
定义model的时候,首先会进入到DLASeg类中,这个类继承了BaseModel类,所以在运行super函数的时候会调用BaseModel类,首先介绍一下BaseMode类,之后对DLASeg类进行介绍。
class BaseModel(nn.Module):
'''
这个类是在构建目标检测中的分类头
将head中需要分类和回归的数据定义一个头分支来进行预测
比如head中的hm这一属性会生成如下的模型:
Sequential(
(0): Conv2d(64, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace=True)
(2): Conv2d(256, 10, kernel_size=(1, 1), stride=(1, 1)))
'''
def __init__(self, heads, head_convs, num_stacks, last_channel, opt=None):
super(BaseModel, self).__init__()
self.opt = opt
if opt is not None and opt.head_kernel != 3:
print('Using head kernel:', opt.head_kernel)
head_kernel = opt.head_kernel
else:
head_kernel = 3
self.num_stacks = num_stacks
self.heads = heads
self.secondary_heads = opt.secondary_heads
last_channels = {head: last_channel for head in heads}
for head in self.secondary_heads:
'''
论文中提到的第二个分类头
第二个分类头会将生成的雷达特征和图像特征concat在一起,
所以在定义卷积输入通道个数的时候应该是图像的通道数和雷达通道数之和
'''
last_channels[head] = last_channel+len(opt.pc_feat_lvl)
#下边这部分就是在生成每部分的卷积头
for head in self.heads:
classes = self.heads[head]
head_conv = head_convs[head]
if len(head_conv) > 0:
out = nn.Conv2d(head_conv[-1], classes,
kernel_size=1, stride=1, padding=0, bias=True)
conv = nn.Conv2d(last_channels[head], head_conv[0],
kernel_size=head_kernel,
padding=head_kernel // 2, bias=True)
convs = [conv]
for k in range(1, len(head_conv)):
convs.append(nn.Conv2d(head_conv[k - 1], head_conv[k],
kernel_size=1, bias=True))
if len(convs) == 1:
fc = nn.Sequential(conv, nn.ReLU(inplace=True), out)
elif len(convs) == 2:
fc = nn.Sequential(
convs[0], nn.ReLU(inplace=True),
convs[1], nn.ReLU(inplace=True), out)
elif len(convs) == 3:
fc = nn.Sequential(
convs[0], nn.ReLU(inplace=True),
convs[1], nn.ReLU(inplace=True),
convs[2], nn.ReLU(inplace=True), out)
elif len(convs) == 4:
fc = nn.Sequential(
convs[0], nn.ReLU(inplace=True),
convs[1], nn.ReLU(inplace=True),
convs[2], nn.ReLU(inplace=True),
convs[3], nn.ReLU(inplace=True), out)
if 'hm' in head:
fc[-1].bias.data.fill_(opt.prior_bias)
else:
fill_fc_weights(fc)
else:
fc = nn.Conv2d(last_channels[head], classes,
kernel_size=1, stride=1, padding=0, bias=True)
if 'hm' in head:
fc.bias.data.fill_(opt.prior_bias)
else:
fill_fc_weights(fc)
self.__setattr__(head, fc)
'''
__setattr__相当于在定义实例方法
比如当head为hm,fc为Sequential(
(0): Conv2d(64, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace=True)
(2): Conv2d(256, 10, kernel_size=(1, 1), stride=(1, 1)))
调用上述函数就相当于定义了一个网络结构
self.hm = Sequential((0),(1),(2))
'''
class DLASeg(BaseModel):
def __init__(self, num_layers, heads, head_convs, opt):
super(DLASeg, self).__init__(
heads, head_convs, 1, 64 if num_layers == 34 else 128, opt=opt)
down_ratio=4
self.opt = opt
self.node_type = DLA_NODE[opt.dla_node]
print('Using node type:', self.node_type)
self.first_level = int(np.log2(down_ratio))
self.last_level = 5
self.base = globals()['dla{}'.format(num_layers)](
pretrained=(opt.load_model == ''), opt=opt)
'''
globals()获取当前文件的所有变量,包括变量,函数,类是一个字典的形式
所以只要传入键就可以获取对应的类
这里传入的是dla34相当于调用dla34这个类实例化一个该类的对象赋值个self.base
'''
def dla34(pretrained=True, **kwargs): # DLA-34
'''
具体关于DLA模型可以参考:https://blog.csdn.net/oYeZhou/article/details/114964836
这里就会调用DLA模型来构建backbone,具体原理可以参考上述链接
由于原始的centernet中没有训练velocity和attributes这两个属性
所以作者加上这两个头在原来模型的训练了30个epoch来作为自己模型的预训练模型
这里没有加载imagenet的预训练模型
'''
model = DLA([1, 1, 1, 2, 2, 1],
[16, 32, 64, 128, 256, 512],
block=BasicBlock, **kwargs)
if pretrained:
model.load_pretrained_model(
data='imagenet', name='dla34', hash='ba72cf86')
else:
print('Warning: No ImageNet pretrain!!')
return model
DLASeg代码继续
channels = self.base.channels
scales = [2 ** i for i in range(len(channels[self.first_level:]))]
self.dla_up = DLAUp(
self.first_level, channels[self.first_level:], scales,
node_type=self.node_type)
out_channel = channels[self.first_level]
self.ida_up = IDAUp(
out_channel, channels[self.first_level:self.last_level],
[2 ** i for i in range(self.last_level - self.first_level)],
node_type=self.node_type)
'''
DLAUp和IDAUp也可以在上述介绍DLA的连接中找到
至此backbone网络搭建结束
'''
接下来介绍模型的执行部分forward函数
首先会执行ModelWithLoss类中的forward函数中,大约在src\lib\trainer.py中的110行
class ModelWithLoss(torch.nn.Module):
def __init__(self, model, loss, opt):
super(ModelWithLoss, self).__init__()
self.opt = opt
self.model = model
self.loss = loss
def forward(self, batch, phase):
pc_dep = batch.get('pc_dep', None) #由柱状体生成从的雷达特征
pc_hm = batch.get('pc_hm', None) #由视锥体模型根据pc_dep生成pc_hm雷达热力图
calib = batch['calib'].squeeze(0)
## run the first stage
outputs = self.model(batch['image'], pc_hm=pc_hm, pc_dep=pc_dep, calib=calib)
首先模型会进入到base_model类中:src\lib\model\networks\base_model.py 89行左右
class BaseModel(nn.Module):
def __init__(self, heads, head_convs, num_stacks, last_channel, opt=None):
pass
def img2feats(self, x):
'''
这里是DLASeg中的函数,为了方便说明先复制到这里
'''
x = self.base(x) #这里会获得6个尺度的特则,也就是DLA34模型的输出
x = self.dla_up(x) #这里对特征进行上采样,从而得到x2,x4,x8,x16四个尺度特征的输出
y = []
for i in range(self.last_level - self.first_level):
y.append(x[i])
#y.append(x[i].clone())
self.ida_up(y, 0, len(y)) #获取到最终上采样的特征
return [y[-1]] #返回一个上采样的特征
def forward(self, x, pc_hm=None, pc_dep=None, calib=None):
## extract features from image
feats = self.img2feats(x)
'''
这里会跳转到DLASeg中的img2feats函数,由于DLASeg继承了Basemodel请实现了img2feats函数
这里就先把img2feats函数放到BaseModel中,方便解析,但是实际BaseModel中没有对img2feats进行实现
'''
out = []
for s in range(self.num_stacks):
z = {}
## Run the first stage heads
for head in self.heads:
if head not in self.secondary_heads:
z[head] = self.__getattr__(head)(feats[s])
'''
调用由__setattr__生成的类,只是针对第一阶段的head进行调用
'''
if self.opt.pointcloud:
## get pointcloud heatmap
if not self.training:
if self.opt.disable_frustum:
pc_hm = pc_dep
if self.opt.normalize_depth:
pc_hm[self.opt.pc_feat_channels['pc_dep']] /= self.opt.max_pc_dist
else:
pc_hm = generate_pc_hm(z, pc_dep, calib, self.opt)
ind = self.opt.pc_feat_channels['pc_dep']
z['pc_hm'] = pc_hm[:,ind,:,:].unsqueeze(1)
## Run the second stage heads
sec_feats = [feats[s], pc_hm] #将雷达特征与图像特征进行拼接
sec_feats = torch.cat(sec_feats, 1)
for head in self.secondary_heads: #进行第二次预测
z[head] = self.__getattr__(head)(sec_feats)
out.append(z)
return out
代码继续
loss, loss_stats = self.loss(outputs, batch)
return outputs[-1], loss, loss_stats