point_pillar_fcooper
(紧扣PointPillarFCooper的框架结构,一点一点看代码)
PointPillarFCooper
# -*- coding: utf-8 -*-
# Author: Runsheng Xu <rxx3386@ucla.edu>
# License: TDG-Attribution-NonCommercial-NoDistrib
import pprint
import torch.nn as nn
from opencood.models.sub_modules.pillar_vfe import PillarVFE
from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter
from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone
from opencood.models.sub_modules.downsample_conv import DownsampleConv
from opencood.models.sub_modules.naive_compress import NaiveCompressor
from opencood.models.fuse_modules.f_cooper_fuse import SpatialFusion
class PointPillarFCooper(nn.Module):
"""
F-Cooper implementation with point pillar backbone.
"""
def __init__(self, args):
super(PointPillarFCooper, self).__init__()
print("args: ")
pprint.pprint(args)
self.max_cav = args['max_cav']
# PIllar VFE Voxel Feature Encoding
self.pillar_vfe = PillarVFE(args['pillar_vfe'],
num_point_features=4,
voxel_size=args['voxel_size'],
point_cloud_range=args['lidar_range'])
self.scatter = PointPillarScatter(args['point_pillar_scatter'])
self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64)
# used to downsample the feature map for efficient computation
self.shrink_flag = False
if 'shrink_header' in args:
self.shrink_flag = True
self.shrink_conv = DownsampleConv(args['shrink_header'])
self.compression = False
if args['compression'] > 0:
self.compression = True
self.naive_compressor = NaiveCompressor(256, args['compression'])
self.fusion_net = SpatialFusion()
self.cls_head = nn.Conv2d(128 * 2, args['anchor_number'],
kernel_size=1)
self.reg_head = nn.Conv2d(128 * 2, 7 * args['anchor_number'],
kernel_size=1)
if args['backbone_fix']:
self.backbone_fix()
- args: 其实就是从hypes_yaml配置文件里传来的参数
args:
{
'anchor_number': 2,
'backbone_fix': False,
'base_bev_backbone': {
'layer_nums': [3, 5, 8],
'layer_strides': [2, 2, 2],
'num_filters': [64, 128, 256],
'num_upsample_filter': [128, 128, 128],
'upsample_strides': [1, 2, 4]},
'compression': 0,
'lidar_range': [-140.8, -40, -3, 140.8, 40, 1],
'max_cav': 5,
'pillar_vfe': {
'num_filters': [64],
'use_absolute_xyz': True,
'use_norm': True,
'with_distance': False},
'point_pillar_scatter': {
'grid_size': array([704, 200, 1], dtype=int64),
'num_features': 64},
'shrink_header': {
'dim': [256],
'input_dim': 384,
'kernal_size': [1],
'padding': [0],
'stride': [1]},
'voxel_size': [0.4, 0.4, 4]}
def backbone_fix(self):
"""
Fix the parameters of backbone during finetune on timedelay。
"""
for p in self.pillar_vfe.parameters():
p.requires_grad = False
for p in self.scatter.parameters():
p.requires_grad = False
for p in self.backbone.parameters():
p.requires_grad = False
if self.compression:
for p in self.naive_compressor.parameters():
p.requires_grad = False
if self.shrink_flag:
for p in self.shrink_conv.parameters():
p.requires_grad = False
for p in self.cls_head.parameters():
p.requires_grad = False
for p in self.reg_head.parameters():
p.requires_grad = False
backbone_fix 方法用于在模型微调过程中固定骨干网络的参数,以避免它们被更新。
遍历了模型中各个需要固定参数的组件,并将它们的 requires_grad 属性设置为 False,这意味着这些参数不会被优化器更新。
我们来看 forward 方法:
def forward(self, data_dict):
voxel_features = data_dict['processed_lidar']['voxel_features']
voxel_coords = data_dict['processed_lidar']['voxel_coords']
voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
record_len = data_dict['record_len']
batch_dict = {
'voxel_features': voxel_features,
'voxel_coords': voxel_coords,
'voxel_num_points': voxel_num_points,
'record_len': record_len}
# n, 4 -> n, c
batch_dict = self.pillar_vfe(batch_dict)
# n, c -> N, C, H, W
batch_dict = self.scatter(batch_dict)
batch_dict = self.backbone(batch_dict)
spatial_features_2d = batch_dict['spatial_features_2d']
# downsample feature to reduce memory
if self.shrink_flag:
spatial_features_2d = self.shrink_conv(spatial_features_2d)
# compressor
if self.compression:
spatial_features_2d = self.naive_compressor(spatial_features_2d)
fused_feature = self.fusion_net(spatial_features_2d, record_len)
psm = self.cls_head(fused_feature)
rm = self.reg_head(fused_feature)
output_dict = {
'psm': psm,
'rm': rm}
return output_dict
forward 方法定义了模型的前向传播过程。它接受一个数据字典作为输入,包含了经过处理的点云数据。
首先,从输入字典中提取出点云特征、体素坐标、体素点数等信息。
然后,依次将数据通过 pillar_vfe、scatter 和 backbone 这几个模块进行处理,得到了一个包含了空间特征的张量 spatial_features_2d。
如果启用了特征图的下采样(shrink_flag 为 True),则对 spatial_features_2d 进行下采样。
如果启用了特征压缩(compression 为 True),则对 spatial_features_2d 进行压缩。
最后,将压缩后的特征通过 fusion_net 进行融合,并通过 cls_head 和 reg_head 进行分类和回归,得到预测结果。
整个 forward 方法实现了模型的数据流动过程,从输入数据到最终输出结果的计算过程。
- PointPillarsFcooper结构
PointPillarFCooper(
(pillar_vfe): PillarVFE(
(pfn_layers): ModuleList(
(0): PFNLayer(
(linear): Linear(in_features=10, out_features=64, bias=False)
(norm): BatchNorm1d(64, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
)
)
)
(scatter): PointPillarScatter()
(backbone): BaseBEVBackbone(
(blocks): ModuleList(
(0): Sequential(
(0): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
(1): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), bias=False)
(2): BatchNorm2d(64, eps=0.001, momentum=0.01, affine=True, track_running_stats