最近在研究点云物体检测,基于OpenPCDet框架进行算法开发可以节约大量的重复性工作,专心集中在核心算法的设计上,大量节约时间。同时,因为框架由大公司专业团队进行维护,代码质量稳定。本文以小项目的形式记录如何采用OpenPCDet自定义模块、网络、loss实现训练。
目标任务:
以OpenPCDet中的pointrcnn为基础,单独剥离其中的前景点分割网络,将POINT_HEAD模块替换为自定义的CLS_HEAD。采用新建detector、新建模块、新建网络的方式实现任务。目标网络简单:pointnet+【256 256】的mlp。
步骤一:网络构建
在OpenPCDet中,有八个基本模块, 'vfe', 'backbone_3d', 'map_to_bev_module', 'pfe', 'backbone_2d', 'dense_head', 'point_head', 'roi_head'每个模块中都有若干网络可供选择。
①自定义detector:新建detector.py 若需要在8个基础模块外添加模块,把新建模块名加入module_topology簇
class PointRCNN_cls(Detector3DTemplate):
def __init__(self, model_cfg, num_class, dataset):
super().__init__(model_cfg=model_cfg, num_class=num_class, dataset=dataset)
# 在框架中增加额外模块cls_head
self.module_topology = [
'vfe', 'backbone_3d', 'cls_head', 'map_to_bev_module', 'pfe',
'backbone_2d', 'dense_head', 'point_head', 'roi_head'
]
self.module_list = self.build_networks()
把新detector类加入簇,detector init
__all__ = {
'Detector3DTemplate': Detector3DTemplate,
'SECONDNet': SECONDNet,
'PartA2Net': PartA2Net,
'PVRCNN': PVRCNN,
'PointPillar': PointPillar,
'PointRCNN': PointRCNN,
'SECONDNetIoU': SECONDNetIoU,
'CaDDN': CaDDN,
'VoxelRCNN': VoxelRCNN,
'CenterPoint': CenterPoint,
'PVRCNNPlusPlus': PVRCNNPlusPlus,
'PointRCNN_cls': PointRCNN_cls, # 自定义detector
}
②自定义模块:在detector3d_template定义新模块
def build_cls_head(self, model_info_dict):
if self.model_cfg.get('CLS_HEAD', None) is None:
return None, model_info_dict
num_point_features = model_info_dict['num_point_features']
# 从yaml读取网络相关配置
cls_head_module = dense_heads.__all__[self.model_cfg.CLS_HEAD.NAME](
model_cfg=self.model_cfg.CLS_HEAD, # 根据NAME读取配置
input_channels=num_point_features,
num_class=self.num_class if not self.model_cfg.CLS_HEAD.CLASS_AGNOSTIC else 1, # class-agnostic方式只回归2类bounding box,即前景和背景
)
model_info_dict['module_list'].append(cls_head_module)
return cls_head_module, model_info_dict
③自定义网络:
新建cls2_head_template.py,为之后的ClsHead继承使用:
import torch
import torch.nn as nn
import torch.nn.functional as F
from ...ops.roiaware_pool3d import roiaware_pool3d_utils
from ...utils import common_utils, loss_utils
class CLS2_HeadTemplate(nn.Module):
def __init__(self, model_cfg, num_class):
super().__init__()
self.model_cfg = model_cfg
self.num_class = num_class
self.build_losses(self.model_cfg.LOSS_CONFIG)
self.forward_ret_dict = None
def build_losses(self, losses_cfg):
self.add_module(
'cls_loss_func',
loss_utils.SigmoidFocalClassificationLoss(alpha=0.25, gamma=2.0)
)
@staticmethod
def make_fc_layers(fc_cfg, input_channels, output_channels):
fc_layers = []
c_in = input_channels
for k in range(0, fc_cfg.__len__()):
fc_layers.extend([
nn.Linear(c_in, fc_cfg[k], bias=False),
nn.BatchNorm1d(fc_cfg[k]),
nn.ReLU(),
])
c_in = fc_cfg[k]
fc_layers.append(nn.Linear(c_in, output_channels, bias=True))
return nn.Sequential(*fc_layers)
def assign_stack_targets(self, points, gt_boxes, extend_gt_boxes=None,
ret_box_labels=False, ret_part_labels=False,
set_ignore_flag=True, use_ball_constraint=False, central_radius=2.0):
"""
Args:
points: (N1 + N2 + N3 + ..., 4) [bs_idx, x, y, z]
gt_boxes: (B, M, 8)
extend_gt_boxes: [B, M, 8]
ret_box_labels:
ret_part_labels:
set_ignore_flag:
use_ball_constraint:
central_radius:
Returns:
point_cls_labels: (N1 + N2 + N3 + ...), long type, 0:background, -1:ignored
point_box_labels: (N1 + N2 + N3 + ..., code_size)
"""
assert len(points.shape) == 2 and points.shape[1] == 4, 'points.shape=%s' % str(points.shape)
assert len(gt_boxes.shape) == 3 and gt_boxes.shape[2] == 8, 'gt_boxes.shape=%s' % str(gt_boxes.shape)
assert extend_gt_boxes is None or len(extend_gt_boxes.shape) == 3 and extend_gt_boxes.shape[2] == 8, \
'extend_gt_boxes.shape=%s' % str(extend_gt_boxes.shape)
assert set_ignore_flag != use_ball_constraint, 'Choose one only!'
# 得到一批数据中batch_size的大小,以方便逐帧完成target assign
batch_size = gt_boxes.shape[0]
# 得到一批数据中所有点云的batch_id
bs_idx = points[:, 0]
# 初始化每个点云的类别,默认全0,背景点; shape (batch * 16384)
point_cls_labels = points.new_zeros(points.shape[0]).long()
# 初始化每个点云预测box的参数,默认全0; shape (batch * 16384, 8)
point_box_labels = gt_boxes.new_zeros((points.shape[0], 8)) if ret_box_labels else None
point_part_labels = gt_boxes.new_zeros((points.shape[0], 3)) if ret_part_labels else None
# 逐帧点云数据进行处理
for k in range(batch_size):
# 得到一个mask,用于取出一批数据中属于当前帧的点 mask可理解为元素为True/False的tensor
bs_mask = (bs_idx == k)
# 取出对应的点shape (16384, 3)
points_single = points[bs_mask][:, 1:4]
# 初始化当前帧中点的类别,默认为0,背景点 (16384, )
point_cls_labels_single = point_cls_labels.new_zeros(bs_mask.sum())
# 计算哪些点在GTbox中, box_idxs_of_pts
box_idxs_of_pts = roiaware_pool3d_utils.points_in_boxes_gpu(
points_single.unsqueeze(dim=0), gt_boxes[k:k + 1, :, 0:7].contiguous()
).long().squeeze(dim=0)
# mask 表明该帧中的哪些点属于前景点,哪些点属于背景点;得到属于前景点的mask
box_fg_flag = (box_idxs_of_pts >= 0)
# 是否忽略在enlarge box中的点 True
if set_ignore_flag:
# 计算哪些点在GTbox_enlarge中
extend_box_idxs_of_pts = roiaware_pool3d_utils.points_in_boxes_gpu(
points_single.unsqueeze(dim=0), extend_gt_boxes[k:k+1, :, 0:7].contiguous()
).long().squeeze(dim=0)
# 前景点
fg_flag = box_fg_flag
# ^为异或运算符,不同为真,相同为假,这样就可以得到真实GT enlarge后的的点了
ignore_flag = fg_flag ^ (extend_box_idxs_of_pts >= 0)
# 将这些真实GT边上的点设置为-1 loss计算时,不考虑这类点
point_cls_labels_single[ignore_flag] = -1
elif use_ball_constraint:
box_centers = gt_boxes[k][box_idxs_of_pts][:, 0:3].clone()
box_centers[:, 2] += gt_boxes[k][box_idxs_of_pts][:, 5] / 2
ball_flag = ((box_centers - points_single).norm(dim=1) < central_radius)
fg_flag = box_fg_flag & ball_flag
else:
raise NotImplementedError
# [box_idxs_of_pts[fg_flag]]取出所有点中属于前景的点,
# 并为这些点分配对应的GT_box shape (num_of_gt_match_by_points, 8)
# 8个维度分别是x, y, z, l, w, h, heading, class_id
gt_box_of_fg_points = gt_boxes[k][box_idxs_of_pts[fg_flag]]
# 将类别信息赋值给对应的前景点 (16384, )
point_cls_labels_single[fg_flag] = 1 if self.num_class == 1 else gt_box_of_fg_points[:, -1].long()
# 赋值点的类别GT结果到的batch中对应的帧位置
point_cls_labels[bs_mask] = point_cls_labels_single
# 将每个点的类别放入字典中
targets_dict = {
'point_cls_labels': point_cls_labels,
}
return targets_dict
def get_cls_layer_loss(self, tb_dict=None):
# 第一阶段点的GT类别
point_cls_labels = self.forward_ret_dict['point_cls_labels'].view(-1)
# print(torch.unique(point_cls_labels)) # 取出属于前景的点的mask,0为背景,1,2,3分别为前景,-1不关注
# 第一阶段点的预测类别
point_cls_preds = self.forward_ret_dict['point_cls_preds'].view(-1, self.num_class)
# 取出属于前景的点的mask,0为背景,1,2,3分别为前景,-1不关注
positives = (point_cls_labels > 0)
# 背景点分类权重置0
negative_cls_weights = (point_cls_labels == 0) * 1.0
# 前景点分类权重置0
cls_weights = (negative_cls_weights + 1.0 * positives).float()
# 使用前景点的个数来normalize,使得一批数据中每个前景点贡献的loss一样
pos_normalizer = positives.sum(dim=0).float()
# 正则化每个类别分类损失权重
cls_weights /= torch.clamp(pos_normalizer, min=1.0)
# 初始化分类的one-hot (batch * 16384, 4)
one_hot_targets = point_cls_preds.new_zeros(*list(point_cls_labels.shape), self.num_class + 1)
# 将目标标签转换为one-hot编码形式,1000是背景 0100是车 0010是行人 0001是自行车
one_hot_targets.scatter_(-1, (point_cls_labels * (point_cls_labels >= 0).long()).unsqueeze(dim=-1).long(), 1.0)
# 原来背景为[1, 0, 0, 0] 现在背景为[0, 0, 0]
one_hot_targets = one_hot_targets[..., 1:]
# 计算分类损失使用focal loss
cls_loss_src = self.cls_loss_func(point_cls_preds, one_hot_targets, weights=cls_weights)
# 各类别loss置求总数
point_loss_cls = cls_loss_src.sum()
# 分类损失权重
loss_weights_dict = self.model_cfg.LOSS_CONFIG.LOSS_WEIGHTS
# 分类损失乘以分类损失权重
point_loss_cls = point_loss_cls * loss_weights_dict['point_cls_weight']
if tb_dict is None:
tb_dict = {}
tb_dict.update({
'point_loss_cls': point_loss_cls.item(),
'point_pos_num': pos_normalizer.item()
})
return point_loss_cls, tb_dict
def get_part_layer_loss(self, tb_dict=None):
pos_mask = self.forward_ret_dict['point_cls_labels'] > 0
pos_normalizer = max(1, (pos_mask > 0).sum().item())
point_part_labels = self.forward_ret_dict['point_part_labels']
point_part_preds = self.forward_ret_dict['point_part_preds']
point_loss_part = F.binary_cross_entropy(torch.sigmoid(point_part_preds), point_part_labels, reduction='none')
point_loss_part = (point_loss_part.sum(dim=-1) * pos_mask.float()).sum() / (3 * pos_normalizer)
loss_weights_dict = self.model_cfg.LOSS_CONFIG.LOSS_WEIGHTS
point_loss_part = point_loss_part * loss_weights_dict['point_part_weight']
if tb_dict is None:
tb_dict = {}
tb_dict.update({'point_loss_part': point_loss_part.item()})
return point_loss_part, tb_dict
def get_box_layer_loss(self, tb_dict=None):
pos_mask = self.forward_ret_dict['point_cls_labels'] > 0
point_box_labels = self.forward_ret_dict['point_box_labels']
point_box_preds = self.forward_ret_dict['point_box_preds']
reg_weights = pos_mask.float()
pos_normalizer = pos_mask.sum().float()
reg_weights /= torch.clamp(pos_normalizer, min=1.0)
point_loss_box_src = self.reg_loss_func(
point_box_preds[None, ...], point_box_labels[None, ...], weights=reg_weights[None, ...]
)
point_loss_box = point_loss_box_src.sum()
loss_weights_dict = self.model_cfg.LOSS_CONFIG.LOSS_WEIGHTS
point_loss_box = point_loss_box * loss_weights_dict['point_box_weight']
if tb_dict is None:
tb_dict = {}
tb_dict.update({'point_loss_box': point_loss_box.item()})
return point_loss_box, tb_dict
def generate_predicted_boxes(self, points, point_cls_preds, point_box_preds):
"""
Args:
points: (N, 3) 每个点的实际坐标
point_cls_preds: (N, num_class) 每个点类别的预测结果
point_box_preds: (N, box_code_size) 每个点box的回归结果
Returns:
point_cls_preds: (N, num_class)
point_box_preds: (N, box_code_size)
"""
_, pred_classes = point_cls_preds.max(dim=-1)
point_box_preds = self.box_coder.decode_torch(point_box_preds, points, pred_classes + 1)
return point_cls_preds, point_box_preds
def forward(self, **kwargs):
raise NotImplementedError
新建cls_head.py定义模块内的具体网络,这里采用两层mlp,网络的预测结果要存在batch_dict,方便模块化。
class ClsHead(CLS2_HeadTemplate): # 从PointHeadTemplate调用make_fc_layers
def __init__(self, num_class, input_channels, model_cfg, **kwargs):
super().__init__(model_cfg=model_cfg, num_class=num_class)
# 根据yaml构建网络
self.cls_layers = self.make_fc_layers(
fc_cfg=self.model_cfg.CLS_FC,
input_channels=input_channels,
output_channels=num_class
)
def forward(self, batch_dict):
point_features = batch_dict['point_features'] # 从字典中获取每个点的特征 shape (batch * 16384, 128)
point_cls_preds = self.cls_layers(point_features) # (total_points, num_class)
# 从每个点的分类预测结果中,取出类别预测概率最大的结果 (batch * 16384, num_class) --> (batch * 16384, )
point_cls_preds_max, _ = point_cls_preds.max(dim=-1)
# 将类别预测分数经过sigmoid激活后放入字典中
batch_dict['point_cls_scores'] = torch.sigmoid(point_cls_preds_max)
# 将点的类别预测结果和回归结果放入字典中
ret_dict = {
'point_cls_preds': point_cls_preds,
}
# 如果在训练模式下,需要根据GTBox来生成对应的前背景点,用于点云的前背景分割,给后面计算前背景分类loss
if self.training:
targets_dict = self.assign_targets(batch_dict)
# 将一个batch中所有点的GT类别结果放入字典中 shape (batch * 16384)
ret_dict['point_cls_labels'] = targets_dict['point_cls_labels']
# 第一阶段生成的预测结果放入前向传播字典
self.forward_ret_dict = ret_dict
return batch_dict
把新网络加入模块的簇
__all__ = {
'AnchorHeadTemplate': AnchorHeadTemplate,
'AnchorHeadSingle': AnchorHeadSingle,
'PointIntraPartOffsetHead': PointIntraPartOffsetHead,
'PointHeadSimple': PointHeadSimple,
'PointHeadBox': PointHeadBox,
'AnchorHeadMulti': AnchorHeadMulti,
'CenterHead': CenterHead,
'ClsHead': ClsHead, # 自定义网络
}
④新建detector的yaml配置文件
注意YAML中的NAME和簇里的名字需要保持一致,也就是和类名保持一致
# 模型配置
MODEL:
NAME: PointRCNN_cls # 和detector定义中的名称保持一致:class PointRCNN_cls(Detector3DTemplate):
BACKBONE_3D:
NAME: PointNet2MSG
SA_CONFIG:
NPOINTS: [4096, 1024, 256, 64]
RADIUS: [[0.1, 0.5], [0.5, 1.0], [1.0, 2.0], [2.0, 4.0]]
NSAMPLE: [[16, 32], [16, 32], [16, 32], [16, 32]]
MLPS: [[[16, 16, 32], [32, 32, 64]],
[[64, 64, 128], [64, 96, 128]],
[[128, 196, 256], [128, 196, 256]],
[[256, 256, 512], [256, 384, 512]]]
FP_MLPS: [[128, 128], [256, 256], [512, 512], [512, 512]]
CLS_HEAD: # 模块名
NAME: ClsHead # 模型和定义的名字保持一致 class ClsHead(CLS2_HeadTemplate)
CLS_FC: [ 256, 256 ]
REG_FC: [ 256, 256 ]
CLASS_AGNOSTIC: False
USE_POINT_FEATURES_BEFORE_FUSION: False
LOSS_CONFIG:
此时网络已构建完毕,可以通过detector里的self.module_list = self.build_networks()查看网络。
步骤二:loss构建
具体的loss如何计算在cls2_head_template中定义,get_cls_layer_loss作为CLS2_HeadTemplate的成员函数,通过self.cls_head.get_loss()调用,get_training_loss的作用是整理一下不同模块的loss,如pointrcnn里第一阶段loss和第二阶段loss。
①在detector中写forward,预测结果存在batch_dict里。
②在cls2 head template中修改assign_stack_targets,去除box估计的部分,只考虑分类。在detector中写get loss,整合一下不同网络的loss,如第一阶段里的cls和box的两部分loss。
def forward(self, batch_dict):
for cur_module in self.module_list: # 循环model list进行网络预测:pointnet-pointheadbox-pointrcnnhead
batch_dict = cur_module(batch_dict)
if self.training:
loss, tb_dict, disp_dict = self.get_training_loss()
ret_dict = {
'loss': loss
}
return ret_dict, tb_dict, disp_dict
else:
pred_dicts, recall_dicts = self.post_processing(batch_dict)
return pred_dicts, recall_dicts
def get_training_loss(self):
disp_dict = {}
loss_point, tb_dict = self.point_head.get_loss()
loss = loss_point
return loss, tb_dict, disp_dict
至此,自定义内部完毕,可以开始训练咯!!
python train.py + 自定义的yaml
参考: