openpcdet之pointpillar代码阅读——第一篇：数据增强与数据处理

原创已于 2022-07-18 08:24:47 修改 · 6.4k 阅读

89 ·

CC 4.0 BY-SA版权

文章标签：

#openpcdet #pointpillar #激光雷达 #3D目标检测 #数据增强

于 2022-07-13 13:42:28 首次发布

目标检测实战专栏收录该内容

10 篇文章

订阅专栏

文章目录

1. 数据增强
2. 数据处理
3. 数据收集——datalodar

pointpillar相关的其它文章链接如下：

【论文阅读】CVPR 2019| PointPillars: 基于点云的快速编码目标检测框架(Fast Encoders for Object Detection from Point Clouds)
OpenPCDet v0.5版本的安装与测试
openpcdet之pointpillar代码阅读——第一篇：数据增强与数据处理
openpcdet之pointpillar代码阅读——第二篇：网络结构
openpcdet之pointpillar代码阅读——第三篇：损失函数的计算

1. 数据增强

数据增强部分，相对比较清晰，整体流程如下所示。后续openpcdet也出了一些新的数据增强方法，不过目前本人暂时还没有使用。

在这里插入图片描述

数据增强部分代码在：pcdet/datasets/augmentor/data_augmentor.py

1.1 gt数据采集——gt_sampling

该模块思路很简单，就是为了丰富训练数据，也就是将其它帧gt的点云以及box放入待训练帧中的空余位置。下面是这部分的配置文件，官方这部分训练了3种类型。

            - NAME: gt_sampling
              USE_ROAD_PLANE: True
              DB_INFO_PATH:
                  - kitti_dbinfos_train.pkl
              PREPARE: {
                 filter_by_min_points: ['Car:5', 'Pedestrian:5', 'Cyclist:5'],
                 filter_by_difficulty: [-1],
              }

              SAMPLE_GROUPS: ['Car:15','Pedestrian:15', 'Cyclist:15']
              NUM_POINT_FEATURES: 4
              DATABASE_WITH_FAKELIDAR: False
              REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0]
              LIMIT_WHOLE_SCENE: False

首先对采样的gt进行最小点过滤。代码注释如下：

class DataAugmentor(object):
    def __init__(self, root_path, augmentor_configs, class_names, logger=None):
        self.root_path = root_path
        self.class_names = class_names
        self.logger = logger
        
        self.data_augmentor_queue = []
        # 读取数据增强部分配置文件
        aug_config_list = augmentor_configs if isinstance(augmentor_configs, list) \
            else augmentor_configs.AUG_CONFIG_LIST
        #逐个读取数据增强部分
        for cur_cfg in aug_config_list:
            if not isinstance(augmentor_configs, list):
            	#不用数据增强的列表DISABLE_AUG_LIST
                if cur_cfg.NAME in augmentor_configs.DISABLE_AUG_LIST:
                    continue
            #使用partial，所以此刻只是把数据增强方法加入队列（data_dict=0）
            # 执行数据增加的函数，并加入至data_augmentor_queue
            cur_augmentor = getattr(self, cur_cfg.NAME)(config=cur_cfg)
            self.data_augmentor_queue.append(cur_augmentor)
    
    #gt数据采集部分
    def gt_sampling(self, config=None):
        db_sampler = database_sampler.DataBaseSampler(
            root_path=self.root_path,
            sampler_cfg=config,
            class_names=self.class_names,
            logger=self.logger
        )
        return db_sampler

其中DataBaseSampler的代码如下：

class DataBaseSampler(object):
    def __init__(self, root_path, sampler_cfg, class_names, logger=None):
        self.root_path = root_path
        self.class_names = class_names
        self.sampler_cfg = sampler_cfg
        self.logger = logger
        self.db_infos = {}
        #按照类别分类
        for class_name in class_names:
            self.db_infos[class_name] = []

        # use_shared_memory = false
        self.use_shared_memory = sampler_cfg.get('USE_SHARED_MEMORY', False)
        
        for db_info_path in sampler_cfg.DB_INFO_PATH:
            db_info_path = self.root_path.resolve() / db_info_path
            #按照类别加入数据各自的db数据
            with open(str(db_info_path), 'rb') as f:
                infos = pickle.load(f)
                [self.db_infos[cur_class].extend(infos[cur_class]) for cur_class in class_names]
        #执行最小点过滤和困难点过滤，我这里只用了filter_by_min_points过滤
        for func_name, val in sampler_cfg.PREPARE.items():
            self.db_infos = getattr(self, func_name)(self.db_infos, val)
        
        self.gt_database_data_key = self.load_db_to_shared_memory() if self.use_shared_memory else None

        self.sample_groups = {} #sample_num、pointer和indices
        self.sample_class_num = {} #sample_num
        self.limit_whole_scene = sampler_cfg.get('LIMIT_WHOLE_SCENE', False) #False

        for x in sampler_cfg.SAMPLE_GROUPS:
            class_name, sample_num = x.split(':')
            if class_name not in class_names:
                continue
            self.sample_class_num[class_name] = sample_num
            self.sample_groups[class_name] = {
                'sample_num': sample_num,
                'pointer': len(self.db_infos[class_name]),
                'indices': np.arange(len(self.db_infos[class_name]))
            }
 #最小点过滤函数 
  def filter_by_min_points(self, db_infos, min_gt_points_list):
        for name_num in min_gt_points_list:
            #对每个类别单独过滤
            name, min_num = name_num.split(':')
            min_num = int(min_num)
            if min_num > 0 and name in db_infos.keys():
                filtered_infos = []
                for info in db_infos[name]:
                    #box内大于min_num的保留
                    if info['num_points_in_gt'] >= min_num:
                        filtered_infos.append(info)

                if self.logger is not None:
                    self.logger.info('Database filter by min points %s: %d => %d' %
                                     (name, len(db_infos[name]), len(filtered_infos)))
                db_infos[name] = filtered_infos

        return db_infos

1.2 全局翻转——random_world_flip

这部分配置如下，这部分的意义为使points和gt_boxes进行 X轴的全局翻转 。

        - NAME: random_world_flip
          ALONG_AXIS_LIST: ['x']

官方这部分只做了X轴的对称翻转，翻转概率为50%。

    def random_world_flip(self, data_dict=None, config=None):
    	#data_dict如果为空数据，暂时返回
        if data_dict is None:
            return partial(self.random_world_flip, config=config)
        gt_boxes, points = data_dict['gt_boxes'], data_dict['points']
        for cur_axis in config['ALONG_AXIS_LIST']:
            assert cur_axis in ['x', 'y']
            #执行具体的翻转函数
            gt_boxes, points = getattr(augmentor_utils, 'random_flip_along_%s' % cur_axis)(
                gt_boxes, points,
            )
        
        data_dict['gt_boxes'] = gt_boxes
        data_dict['points'] = points
        return data_dict
 # x轴对称翻转函数
def random_flip_along_x(gt_boxes, points):
    """
    Args:
        gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
        points: (M, 3 + C)
    Returns:
    """
    # 0.5的概率翻转
    enable = np.random.choice([False, True], replace=False, p=[0.5, 0.5])
    if enable:
        gt_boxes[:, 1] = -gt_boxes[:, 1] #box，修改x的值
        gt_boxes[:, 6] = -gt_boxes[:, 6] #box，修改heading角
        points[:, 1] = -points[:, 1] #点云翻转
        
        if gt_boxes.shape[1] > 7:
            gt_boxes[:, 8] = -gt_boxes[:, 8]
    
    return gt_boxes, points

1.3 全局旋转——random_world_rotation

这部分配置如下，这部分的意义为使points和gt_boxes进行 绕Z轴的旋转 波动。

        - NAME: random_world_rotation
          WORLD_ROT_ANGLE: [-0.78539816, 0.78539816] #值为弧度

需要说明的是，这里Z轴旋转的角度为弧度。

    def random_world_rotation(self, data_dict=None, config=None):
    	#data_dict如果为空数据，暂时返回
        if data_dict is None:
            return partial(self.random_world_rotation, config=config)
        rot_range = config['WORLD_ROT_ANGLE']
        if not isinstance(rot_range, list):
            rot_range = [-rot_range, rot_range]
        gt_boxes, points = augmentor_utils.global_rotation(
            data_dict['gt_boxes'], data_dict['points'], rot_range=rot_range
        )
# 旋转函数
def global_rotation(gt_boxes, points, rot_range):
    """
    Args:
        gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
        points: (M, 3 + C),
        rot_range: [min, max]
    Returns:
    """
    #旋转噪声，从最小至最大中随机取值
    noise_rotation = np.random.uniform(rot_range[0], rot_range[1])
    #点云和box的旋转
    points = common_utils.rotate_points_along_z(points[np.newaxis, :, :], np.array([noise_rotation]))[0]
    gt_boxes[:, 0:3] = common_utils.rotate_points_along_z(gt_boxes[np.newaxis, :, 0:3], np.array([noise_rotation]))[0]
    gt_boxes[:, 6] += noise_rotation
    if gt_boxes.shape[1] > 7:
        gt_boxes[:, 7:9] = common_utils.rotate_points_along_z(
            np.hstack((gt_boxes[:, 7:9], np.zeros((gt_boxes.shape[0], 1))))[np.newaxis, :, :],
            np.array([noise_rotation])
        )[0][:, 0:2]

    return gt_boxes, points
  
# 具体调用的旋转函数为
def rotate_points_along_z(points, angle):
    """
    Args:
        points: (B, N, 3 + C)
        angle: (B), angle along z-axis, angle increases x ==> y
    Returns:

    """
    points, is_numpy = check_numpy_to_torch(points)
    angle, _ = check_numpy_to_torch(angle)

    cosa = torch.cos(angle)
    sina = torch.sin(angle)
    zeros = angle.new_zeros(points.shape[0])
    ones = angle.new_ones(points.shape[0])
    rot_matrix = torch.stack((
        cosa,  sina, zeros,
        -sina, cosa, zeros,
        zeros, zeros, ones
    ), dim=1).view(-1, 3, 3).float()
    points_rot = torch.matmul(points[:, :, 0:3], rot_matrix)
    points_rot = torch.cat((points_rot, points[:, :, 3:]), dim=-1)
    return points_rot.numpy() if is_numpy else points_rot

1.4 全局尺度变换——random_world_scaling

这部分配置如下，这部分的意义为使points和gt_boxes进行 尺度的缩放。

        - NAME: random_world_scaling
          WORLD_SCALE_RANGE: [0.95, 1.05]

    def random_world_scaling(self, data_dict=None, config=None):
    	#data_dict如果为空数据，暂时返回
        if data_dict is None:
            return partial(self.random_world_scaling, config=config)
         #调用尺度函数
        gt_boxes, points = augmentor_utils.global_scaling(
            data_dict['gt_boxes'], data_dict['points'], config['WORLD_SCALE_RANGE']
        )
        
        data_dict['gt_boxes'] = gt_boxes
        data_dict['points'] = points
        return data_dict

#尺度函数
def global_scaling(gt_boxes, points, scale_range):
    """
    Args:
        gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading]
        points: (M, 3 + C),
        scale_range: [min, max]
    Returns:
    """
    #变换尺度太小，则不用了
    if scale_range[1] - scale_range[0] < 1e-3:
        return gt_boxes, points
    #尺度因子
    noise_scale = np.random.uniform(scale_range[0], scale_range[1])
    points[:, :3] *= noise_scale
    gt_boxes[:, :6] *= noise_scale

    return gt_boxes, points

2. 数据处理

数据处理有3个函数，分别为限制范围、随机打乱点云范围和点云至voxels(或者pillars)的变换。这部分的流程如下：

在这里插入图片描述

这部分的代码位于：pcdet/datasets/processor/data_processor.py

class DataProcessor(object):
    def __init__(self, processor_configs, point_cloud_range, training, num_point_features):
        self.point_cloud_range = point_cloud_range #数据范围
        self.training = training
         #给定每个点云的特征维度，这里是x，y，z，r 其中r是激光雷达反射强度
        self.num_point_features = num_point_features
        self.mode = 'train' if training else 'test'
        #grid或voxel或pillar的size
        self.grid_size = self.voxel_size = None
        self.data_processor_queue = []

        self.voxel_generator = None
		#依次加入三个数据处理
        for cur_cfg in processor_configs:
            cur_processor = getattr(self, cur_cfg.NAME)(config=cur_cfg)
            self.data_processor_queue.append(cur_processor)

2.1 数据范围限制

点云的mask和box的mask是分别制作的。

    def mask_points_and_boxes_outside_range(self, data_dict=None, config=None):
        if data_dict is None:
            return partial(self.mask_points_and_boxes_outside_range, config=config)

        if data_dict.get('points', None) is not None:
        	#根据配置文件的范围，做一个点云mask
            mask = common_utils.mask_points_by_range(data_dict['points'], self.point_cloud_range)
            data_dict['points'] = data_dict['points'][mask]

        if data_dict.get('gt_boxes', None) is not None and config.REMOVE_OUTSIDE_BOXES and self.training:
        	#根据配置文件的范围，做一个gt_box的mask
            mask = box_utils.mask_boxes_outside_range_numpy(
                data_dict['gt_boxes'], self.point_cloud_range, min_num_corners=config.get('min_num_corners', 1)
            )
            data_dict['gt_boxes'] = data_dict['gt_boxes'][mask]
        return data_dict

#制作点云的mask的函数
def mask_points_by_range(points, limit_range):
    mask = (points[:, 0] >= limit_range[0]) & (points[:, 0] <= limit_range[3]) \
           & (points[:, 1] >= limit_range[1]) & (points[:, 1] <= limit_range[4])
    return mask
# gt_box的mask函数
def mask_boxes_outside_range_numpy(boxes, limit_range, min_num_corners=1):
    """
    Args:
        boxes: (N, 7) [x, y, z, dx, dy, dz, heading, ...], (x, y, z) is the box center
        limit_range: [minx, miny, minz, maxx, maxy, maxz]
        min_num_corners:

    Returns:

    """
    if boxes.shape[1] > 7:
        boxes = boxes[:, 0:7]
    corners = boxes_to_corners_3d(boxes)  # (N, 8, 3)
    mask = ((corners >= limit_range[0:3]) & (corners <= limit_range[3:6])).all(axis=2)
    mask = mask.sum(axis=1) >= min_num_corners  # (N)

    return mask

2.2 点云随机

这里使用np.random.permutation方法，随机打乱点云的顺序。

    def shuffle_points(self, data_dict=None, config=None):
        if data_dict is None:
            return partial(self.shuffle_points, config=config)

        if config.SHUFFLE_ENABLED[self.mode]:
            points = data_dict['points']
            shuffle_idx = np.random.permutation(points.shape[0])
            points = points[shuffle_idx]
            data_dict['points'] = points

        return data_dict

2.3 点云变换至pillar

这部分的内容为将3D的点云信息(N*3个点)，以俯视图的形式分配至均等大小的x-y平面的立方柱体中，这个立方柱就被称为pillar。经过这一步之后，会生成三份数据：

voxels ：代表了每个生成的pillar数据，维度是[M,32,4]；
coordinates ：代表了每个生成的pillar所在的zyx轴坐标，维度是[M,3],其中z恒为0；
num_points ：代表了每个生成的pillar中有多少个有效的点维度是[m,]，不满32会被0填充。

官网中的pillars设置为：[69.12, 79.36, 4]/[0.16, 0.16, 4] = [432, 496, 1]

    def transform_points_to_voxels(self, data_dict=None, config=None):
        """
        将点云转换为pillar,使用spconv的VoxelGeneratorV2
        """

         #初始化点云转换成pillar需要的参数
        if data_dict is None:
             # kitti截取的点云范围是[0, -39.68, -3, 69.12, 39.68, 1]
            # 得到[69.12, 79.36, 4]/[0.16, 0.16, 4] = [432, 496, 1]
            grid_size = (self.point_cloud_range[3:6] - self.point_cloud_range[0:3]) / np.array(config.VOXEL_SIZE)
            self.grid_size = np.round(grid_size).astype(np.int64)
            self.voxel_size = config.VOXEL_SIZE
            # just bind the config, we will create the VoxelGeneratorWrapper later,
            # to avoid pickling issues in multiprocess spawn
            return partial(self.transform_points_to_voxels, config=config)

        if self.voxel_generator is None:
            self.voxel_generator = VoxelGeneratorWrapper(
                vsize_xyz=config.VOXEL_SIZE,#给定每个pillar的大小  [0.16, 0.16, 4]
                coors_range_xyz=self.point_cloud_range, #给定点云的范围 [0, -39.68, -3, 69.12, 39.68, 1]
                 #给定每个点云的特征维度，这里是x，y，z，r 其中r是激光雷达反射强度
                num_point_features=self.num_point_features,
                #给定每个pillar中最多能有多少个点 32
                max_num_points_per_voxel=config.MAX_POINTS_PER_VOXEL,
                #最多选取多少个pillar，因为生成的pillar中，很多都是没有点在里面的
                #选择非空pillars
                max_num_voxels=config.MAX_NUMBER_OF_VOXELS[self.mode],
            )

        points = data_dict['points']
        #根据点云points生成pillar
        voxel_output = self.voxel_generator.generate(points)
          # 假设一份点云数据是N*4，那么经过pillar生成后会得到三份数据
        voxels, coordinates, num_points = voxel_output

        if not data_dict['use_lead_xyz']:
            voxels = voxels[..., 3:]  # remove xyz in voxels(N, 3)

        data_dict['voxels'] = voxels #最大[160000, 32, 4]
        data_dict['voxel_coords'] = coordinates
        data_dict['voxel_num_points'] = num_points
        return data_dict

3. 数据收集——datalodar

如果batch size为1的话，直接从_getitem__中获取data_dict，如果batch size > 1的话，就需要特殊处理了，处理代码如下所示。

这部分代码在：pcdet/datasets/dataset.py

    dataloader = DataLoader(
        dataset, batch_size=batch_size, pin_memory=True, num_workers=workers,
        shuffle=(sampler is None) and training, collate_fn=dataset.collate_batch,
        drop_last=False, sampler=sampler, timeout=0
    )

def collate_batch(batch_list, _unused=False):
        data_dict = defaultdict(list)
        for cur_sample in batch_list:
            for key, val in cur_sample.items():
                data_dict[key].append(val)
        batch_size = len(batch_list)
        ret = {}

        for key, val in data_dict.items():
            try:
                if key in ['voxels', 'voxel_num_points']:
                    #voxels处理
                    ret[key] = np.concatenate(val, axis=0)
                elif key in ['points', 'voxel_coords']:
                    #点云处理
                    coors = []
                    for i, coor in enumerate(val):
                        coor_pad = np.pad(coor, ((0, 0), (1, 0)), mode='constant', constant_values=i)
                        coors.append(coor_pad)
                    ret[key] = np.concatenate(coors, axis=0)
                elif key in ['gt_boxes']:
                    #gt处理
                    max_gt = max([len(x) for x in val])
                    batch_gt_boxes3d = np.zeros((batch_size, max_gt, val[0].shape[-1]), dtype=np.float32)
                    for k in range(batch_size):
                        batch_gt_boxes3d[k, :val[k].__len__(), :] = val[k]
                    ret[key] = batch_gt_boxes3d
                elif key in ['gt_boxes2d']:
                    max_boxes = 0
                    max_boxes = max([len(x) for x in val])
                    batch_boxes2d = np.zeros((batch_size, max_boxes, val[0].shape[-1]), dtype=np.float32)
                    for k in range(batch_size):
                        if val[k].size > 0:
                            batch_boxes2d[k, :val[k].__len__(), :] = val[k]
                    ret[key] = batch_boxes2d
                elif key in ["images", "depth_maps"]:
                    # Get largest image size (H, W)
                    max_h = 0
                    max_w = 0
                    for image in val:
                        max_h = max(max_h, image.shape[0])
                        max_w = max(max_w, image.shape[1])

                    # Change size of images
                    images = []
                    for image in val:
                        pad_h = common_utils.get_pad_params(desired_size=max_h, cur_size=image.shape[0])
                        pad_w = common_utils.get_pad_params(desired_size=max_w, cur_size=image.shape[1])
                        pad_width = (pad_h, pad_w)
                        # Pad with nan, to be replaced later in the pipeline.
                        pad_value = np.nan

                        if key == "images":
                            pad_width = (pad_h, pad_w, (0, 0))
                        elif key == "depth_maps":
                            pad_width = (pad_h, pad_w)

                        image_pad = np.pad(image,
                                           pad_width=pad_width,
                                           mode='constant',
                                           constant_values=pad_value)

                        images.append(image_pad)
                    ret[key] = np.stack(images, axis=0)
                else:
                    ret[key] = np.stack(val, axis=0)
            except:
                print('Error in collate_batch: key=%s' % key)
                raise TypeError

        ret['batch_size'] = batch_size
        return ret