mmdet3d预处理（上）

最新推荐文章于 2023-07-18 22:16:11 发布

moneymyone

最新推荐文章于 2023-07-18 22:16:11 发布

阅读量965

点赞数 2

文章标签： mmdetection3d

本文链接：https://blog.csdn.net/moneymyone/article/details/131779408

版权

mmdet3d预处理（上）

文章目录

mmdet3d预处理（上）

我们将在下图中展示一个最经典的数据集预处理流程，其中蓝色框表示预处理流程中的各项操作。随着预处理的进行，每一个操作都会添加新的键值（图中标记为绿色）到输出字典中，或者更新当前存在的键值（图中标记为橙色）。

data_pipeline

预处理流程中的各项操作主要分为数据加载、预处理、格式化、测试时的数据增强。

接下来将展示一个用于 PointPillars 模型的数据集预处理流程的例子。

train_pipeline = [
    dict(
        type='LoadPointsFromFile',
        load_dim=5,
        use_dim=5,
        backend_args=backend_args),
    dict(
        type='LoadPointsFromMultiSweeps',
        sweeps_num=10,
        backend_args=backend_args),
    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
    dict(
        type='GlobalRotScaleTrans',
        rot_range=[-0.3925, 0.3925],
        scale_ratio_range=[0.95, 1.05],
        translation_std=[0, 0, 0]),
    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
    dict(type='ObjectNameFilter', classes=class_names),
    dict(type='PointShuffle'),
    dict(type='DefaultFormatBundle3D', class_names=class_names),
    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
    dict(
        type='LoadPointsFromFile',
        load_dim=5,
        use_dim=5,
        backend_args=backend_args),
    dict(
        type='LoadPointsFromMultiSweeps',
        sweeps_num=10,
        backend_args=backend_args),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1333, 800),
        pts_scale_ratio=1.0,
        flip=False,
        pcd_horizontal_flip=False,
        pcd_vertical_flip=False,
        transforms=[
            dict(
                type='GlobalRotScaleTrans',
                rot_range=[0, 0],
                scale_ratio_range=[1., 1.],
                translation_std=[0, 0, 0]),
            dict(type='RandomFlip3D'),
            dict(
                type='PointsRangeFilter', point_cloud_range=point_cloud_range),
            dict(
                type='DefaultFormatBundle3D',
                class_names=class_names,
                with_label=False),
            dict(type='Collect3D', keys=['points'])
        ])
]

对于每项操作，我们将列出相关的被添加/更新/移除的字典项。

数据加载

LoadPointsFromFile

添加：points

LoadPointsFromMultiSweeps

更新：points

LoadAnnotations3D

添加：gt_bboxes_3d, gt_labels_3d, gt_bboxes, gt_labels, pts_instance_mask, pts_semantic_mask, bbox3d_fields, pts_mask_fields, pts_seg_fields

预处理

GlobalRotScaleTrans

添加：pcd_trans, pcd_rotation, pcd_scale_factor
更新：points, *bbox3d_fields

RandomFlip3D

添加：flip, pcd_horizontal_flip, pcd_vertical_flip
更新：points, *bbox3d_fields

PointsRangeFilter

更新：points

ObjectRangeFilter

更新：gt_bboxes_3d, gt_labels_3d

ObjectNameFilter

更新：gt_bboxes_3d, gt_labels_3d

PointShuffle

更新：points

PointsRangeFilter

更新：points

格式化

DefaultFormatBundle3D

更新：points, gt_bboxes_3d, gt_labels_3d, gt_bboxes, gt_labels

Collect3D

添加：img_meta （由 meta_keys 指定的键值构成的 img_meta）
移除：所有除 keys 指定的键值以外的其他键值

测试时的数据增强

MultiScaleFlipAug

更新: scale, pcd_scale_factor, flip, flip_direction, pcd_horizontal_flip, pcd_vertical_flip （与这些指定的参数对应的增强后的数据列表）

下面会逐一拆解上述预处理涉及到的各个函数。

本篇首先介绍3d数据在预处理时的 Voxel（体素化）的过程。

基类 DetDataPreprocessor

文件路径：/home/randy/anaconda3/envs/mmdet3d_env_py38/lib/python3.8/site-packages/mmdet/models/data_preprocessors/data_preprocessor.py

class DetDataPreprocessor(ImgDataPreprocessor)

支持批量扩充。
考虑到目标检测任务，它将另外附加batch_input_shape和pad_shape到data_samples。

它提供如下数据预处理

整理数据并将其移动到目标设备。
将输入填充到当前批次的最大大小，并定义“pad_value”。填充大小可被定义的“pad_size_divisor”整除
将输入堆叠到batch_inputs。
如果输入的形状为（3， H， W），则将输入从 bgr 转换为 rgb。
使用定义的标准和平均值规范化图像。
在训练期间进行批量扩充。

派生类 Det3DDataPreprocessor

文件路径：mmdetection3d/mmdet3d/models/data_preprocessors/data_preprocessor.py

class Det3DDataPreprocessor(DetDataPreprocessor)

功能

Collate and move image and point cloud data to the target device.
For point cloud data:
- If no voxelization, directly return list of point cloud data.
- If voxelization is applied, voxelize point cloud according tovoxel_type and obtain voxels.

体素化包括：

dynamic_voxelize_forward
hard_voxelize_forward：实现速度更快，但是牺牲了不确定性

非确定性版本相当快，但不是确定性的。

配置文件设置

data_preprocessor=dict(
    type='Det3DDataPreprocessor',
    voxel=True,
    voxel_layer=dict(
        max_num_points=30, // 每个voxel 包含的最多点数
        voxel_size=voxel_size,
        max_voxels=(30000, 40000), // train 和 test 最多voxel个数
        point_cloud_range=point_cloud_range)),

dynamic_voxelize_kernel

源码

gpu版：projects/BEVFusion/bevfusion/ops/voxel/src/voxelization_cuda.cu
cpu版：projects/BEVFusion/bevfusion/ops/voxel/src/voxelization_cpu.cpp

template <typename T, typename T_int>
__global__ void dynamic_voxelize_kernel(
    const T* points, T_int* coors, const float voxel_x, const float voxel_y,
    const float voxel_z, const float coors_x_min, const float coors_y_min,
    const float coors_z_min, const float coors_x_max, const float coors_y_max,
    const float coors_z_max, const int grid_x, const int grid_y,
    const int grid_z, const int num_points, const int num_features,
    const int NDim) {
  //   const int index = blockIdx.x * threadsPerBlock + threadIdx.x;
  CUDA_1D_KERNEL_LOOP(index, num_points) {
    // To save some computation
    auto points_offset = points + index * num_features;
    auto coors_offset = coors + index * NDim;
    int c_x = floor((points_offset[0] - coors_x_min) / voxel_x);
    if (c_x < 0 || c_x >= grid_x) {
      coors_offset[0] = -1;
      return;
    }

    int c_y = floor((points_offset[1] - coors_y_min) / voxel_y);
    if (c_y < 0 || c_y >= grid_y) {
      coors_offset[0] = -1;
      coors_offset[1] = -1;
      return;
    }

    int c_z = floor((points_offset[2] - coors_z_min) / voxel_z);
    if (c_z < 0 || c_z >= grid_z) {
      coors_offset[0] = -1;
      coors_offset[1] = -1;
      coors_offset[2] = -1;
    } else {
      coors_offset[0] = c_x;
      coors_offset[1] = c_y;
      coors_offset[2] = c_z;
    }
  }
}

思路：

确定x y z 3个方向需要划分的voxel 的个数 voxel_x、voxel_y、voxel_z，然后计算每个点对应的voxel id ，并判断和边界的关系。

   int c_x = floor((points_offset[0] - coors_x_min) / voxel_x);
   int c_y = floor((points_offset[1] - coors_y_min) / voxel_y);
   int c_z = floor((points_offset[2] - coors_z_min) / voxel_z);

赋值给offset

   coors_offset[0] = c_x;
   coors_offset[1] = c_y;
   coors_offset[2] = c_z;

hard_voxelize_kernel

不确定性在于不是每个点都要，也不是每个voxel都要，每个voxel保留一定的点，每帧数据保留一定量的voxel，其余的都需要舍去。

所以很多计算出来的点和voxelidx不符合要求直接省略了，不必继续存储了。

template <typename T, typename T_int>
void hard_voxelize_kernel(const torch::TensorAccessor<T, 2> points,
                          torch::TensorAccessor<T, 3> voxels,
                          torch::TensorAccessor<T_int, 2> coors,
                          torch::TensorAccessor<T_int, 1> num_points_per_voxel,
                          torch::TensorAccessor<T_int, 3> coor_to_voxelidx,
                          int& voxel_num, const std::vector<float> voxel_size,
                          const std::vector<float> coors_range,
                          const std::vector<int> grid_size,
                          const int max_points, const int max_voxels,
                          const int num_points, const int num_features,
                          const int NDim) {
  // declare a temp coors
  at::Tensor temp_coors = at::zeros(
      {num_points, NDim}, at::TensorOptions().dtype(at::kInt).device(at::kCPU));

  // First use dynamic voxelization to get coors,
  // then check max points/voxels constraints
  dynamic_voxelize_kernel<T, int>(points, temp_coors.accessor<int, 2>(),
                                  voxel_size, coors_range, grid_size,
                                  num_points, num_features, NDim);

  int voxelidx, num;
  auto coor = temp_coors.accessor<int, 2>();

  for (int i = 0; i < num_points; ++i) {
    // T_int* coor = temp_coors.data_ptr<int>() + i * NDim;

    if (coor[i][0] == -1) continue;

    voxelidx = coor_to_voxelidx[coor[i][0]][coor[i][1]][coor[i][2]];

    // record voxel
    if (voxelidx == -1) {
      voxelidx = voxel_num;
      if (max_voxels != -1 && voxel_num >= max_voxels) continue;
      voxel_num += 1;

      coor_to_voxelidx[coor[i][0]][coor[i][1]][coor[i][2]] = voxelidx;

      for (int k = 0; k < NDim; ++k) {
        coors[voxelidx][k] = coor[i][k];
      }
    }

    // put points into voxel
    num = num_points_per_voxel[voxelidx];
    if (max_points == -1 || num < max_points) {
      for (int k = 0; k < num_features; ++k) {
        voxels[voxelidx][num][k] = points[i][k];
      }
      num_points_per_voxel[voxelidx] += 1;
    }
  }

  return;
}

思路：

使用动态体素化获取体素坐标，即体素序号，然后检查最大点和体素维度的限制；
记录每个点对应的 voxel idx
将点放进 voxel中，并记录每个voxel包含的点数

CUDA版本的步骤略有不同:

计算voxel的尺寸大小

   const int grid_x = round((coors_x_max - coors_x_min) / voxel_x);
   const int grid_y = round((coors_y_max - coors_y_min) / voxel_y);
   const int grid_z = round((coors_z_max - coors_z_min) / voxel_z);

将点和voxel 序号进行对应
1. 计算点对应的体素坐标
2. 每个体素内值保留 max_points 个点，超过部分不要

      template <typename T_int>
      __global__ void point_to_voxelidx_kernel(const T_int* coor,
                                               T_int* point_to_voxelidx,
                                               T_int* point_to_pointidx,
                                               const int max_points,
                                               const int max_voxels,
                                               const int num_points, const int NDim) {
        CUDA_1D_KERNEL_LOOP(index, num_points) {
          auto coor_offset = coor + index * NDim;
          // skip invalid points
          if ((index >= num_points) || (coor_offset[0] == -1)) return;
      
          int num = 0;
          int coor_x = coor_offset[0];
          int coor_y = coor_offset[1];
          int coor_z = coor_offset[2];
          // only calculate the coors before this coor[index]
          // 仅计算当前点序号之前的点
          for (int i = 0; i < index; ++i) {
            auto prev_coor = coor + i * NDim;
            if (prev_coor[0] == -1) continue;
      
            // Find all previous points that have the same coors
            // if find the same coor, record it
            if ((prev_coor[0] == coor_x) && (prev_coor[1] == coor_y) &&
                (prev_coor[2] == coor_z)) {
              num++;
              if (num == 1) {
                // point to the same coor that first show up
                point_to_pointidx[index] = i;
              } else if (num >= max_points) {
                // out of boundary
                return;
              }
            }
          }
          if (num == 0) {
            // 标记空体素点
            point_to_pointidx[index] = index;
          }
          if (num < max_points) {
            // 点数小于 max_points 时，记录提速内点数
            point_to_voxelidx[index] = num;
          }
        }
      }
      ```

      

   3. 确定体素个数以及体素坐标序号（该步骤可加速10倍）

```cpp
      template <typename T_int>
      __global__ void determin_voxel_num(
          // const T_int* coor,
          T_int* num_points_per_voxel, T_int* point_to_voxelidx,
          T_int* point_to_pointidx, T_int* coor_to_voxelidx, T_int* voxel_num,
          const int max_points, const int max_voxels, const int num_points) {
        // only calculate the coors before this coor[index]
        // 仅计算当前点序号之前的点
        for (int i = 0; i < num_points; ++i) {
          // if (coor[i][0] == -1)
          //    continue;
          int point_pos_in_voxel = point_to_voxelidx[i];
          // record voxel
          if (point_pos_in_voxel == -1) {
            // out of max_points or invalid point
            continue;
          } else if (point_pos_in_voxel == 0) { // 记录新体素
            // record new voxel
            int voxelidx = voxel_num[0];
            if (voxel_num[0] >= max_voxels) continue; // 体素数目超过阈值，跳过
            voxel_num[0] += 1; // 体素数目 +1
            coor_to_voxelidx[i] = voxelidx; // 点号-voxelidx 映射
            num_points_per_voxel[voxelidx] = 1; // 当前体素内的点数为 1
          } else {
            int point_idx = point_to_pointidx[i];
            int voxelidx = coor_to_voxelidx[point_idx];
            if (voxelidx != -1) {
              coor_to_voxelidx[i] = voxelidx; // 点序号与 voxelidx映射
              num_points_per_voxel[voxelidx] += 1; // 当前体素内的点数 +1
            }
          }
        }
      }

拷贝点特征到体素， num_features 及点的x,y,z, intensity及其他特征。

      template <typename T, typename T_int>
      __global__ void assign_point_to_voxel(const int nthreads, const T* points,
                                            T_int* point_to_voxelidx,
                                            T_int* coor_to_voxelidx, T* voxels,
                                            const int max_points,
                                            const int num_features,
                                            const int num_points, const int NDim) {
        CUDA_1D_KERNEL_LOOP(thread_idx, nthreads) {
          // const int index = blockIdx.x * threadsPerBlock + threadIdx.x;
          int index = thread_idx / num_features;
      
          int num = point_to_voxelidx[index];
          int voxelidx = coor_to_voxelidx[index];
          if (num > -1 && voxelidx > -1) {
            auto voxels_offset =
                voxels + voxelidx * max_points * num_features + num * num_features;
      
            int k = thread_idx % num_features;
            voxels_offset[k] = points[thread_idx];
          }
        }
      }

拷贝点坐标到每个体素

template <typename T, typename T_int>
__global__ void assign_voxel_coors(const int nthreads, T_int* coor,
                                   T_int* point_to_voxelidx,
                                   T_int* coor_to_voxelidx, T_int* voxel_coors,
                                   const int num_points, const int NDim) {
  CUDA_1D_KERNEL_LOOP(thread_idx, nthreads) {
    // const int index = blockIdx.x * threadsPerBlock + threadIdx.x;
    // if (index >= num_points) return;
    int index = thread_idx / NDim;
    int num = point_to_voxelidx[index];
    int voxelidx = coor_to_voxelidx[index];
    if (num == 0 && voxelidx > -1) {
      auto coors_offset = voxel_coors + voxelidx * NDim;
      int k = thread_idx % NDim;
      coors_offset[k] = coor[thread_idx];
    }
  }
}