OpenPcdet中的点云体素化-VFE实现

最新推荐文章于 2025-01-17 18:16:58 发布

Arthur.AI

最新推荐文章于 2025-01-17 18:16:58 发布

阅读量1k

点赞数 1

分类专栏：自动驾驶与3D视觉文章标签： python 开发语言

本文链接：https://blog.csdn.net/qq_34106574/article/details/130724496

版权

自动驾驶与3D视觉专栏收录该内容

22 篇文章

订阅专栏

1. 体素格子生成

该过程调用spconv库中的实现，主要过程为points_to_voxel

from spconv import spconv_utils
from spconv.spconv_utils import (non_max_suppression_cpu,
                                 points_to_voxel_3d_np,
                                 points_to_voxel_3d_np_mean,
                                 points_to_voxel_3d_with_filtering,
                                 rbbox_intersection, rbbox_iou,
                                 rotate_non_max_suppression_cpu)

try:
    from spconv.spconv_utils import non_max_suppression
except ImportError:
    pass


def points_to_voxel(points,
                    voxel_size,
                    coors_range,
                    coor_to_voxelidx,
                    max_points=35,
                    max_voxels=20000,
                    full_mean=False,
                    block_filtering=True,
                    block_factor=1,
                    block_size=8,
                    height_threshold=0.2,
                    height_high_threshold=3.0,
                    pad_output=False):
    """convert 3d points(N, >=3) to voxels. This version calculate
    everything in one loop. now it takes only 0.8ms(~6k voxels) 
    with c++ and 3.2ghz cpu.

    Args:
        points: [N, ndim] float tensor. points[:, :3] contain xyz points and
            points[:, 3:] contain other information such as reflectivity.
        voxel_size: [3] list/tuple or array, float. xyz, indicate voxel size
        coors_range: [6] list/tuple or array, float. indicate voxel range.
            format: xyzxyz, minmax
        coor_to_voxelidx: int array. used as a dense map.
        max_points: int. indicate maximum points contained in a voxel.
        max_voxels: int. indicate maximum voxels this function create.
            for voxelnet, 20000 is a good choice. you should shuffle points
            before call this function because max_voxels may drop some points.
        full_mean: bool. if true, all empty points in voxel will be filled with mean
            of exist points.
        block_filtering: filter voxels by height. used for lidar point cloud.
            use some visualization tool to see filtered result.
    Returns:
        voxels: [M, max_points, ndim] float tensor. only contain points.
        coordinates: [M, 3] int32 tensor. zyx format.
        num_points_per_voxel: [M] int32 tensor.
    """
    if full_mean:
        assert block_filtering is False
    if not isinstance(voxel_size, np.ndarray):
        voxel_size = np.array(voxel_size, dtype=points.dtype)
    if not isinstance(coors_range, np.ndarray):
        coors_range = np.array(coors_range, dtype=points.dtype)
    voxelmap_shape = (coors_range[3:] - coors_range[:3]) / voxel_size
    voxelmap_shape = tuple(np.round(voxelmap_shape).astype(np.int32).tolist())
    voxelmap_shape = voxelmap_shape[::-1]
    num_points_per_voxel = np.zeros(shape=(max_voxels, ), dtype=np.int32)
    voxels = np.zeros(shape=(max_voxels, max_points, points.shape[-1]),
                      dtype=points.dtype)
    voxel_point_mask = np.zeros(shape=(max_voxels, max_points),
                                dtype=points.dtype)
    coors = np.zeros(shape=(max_voxels, 3), dtype=np.int32)
    res = {
        "voxels": voxels,
        "coordinates": coors,
        "num_points_per_voxel": num_points_per_voxel,
        "voxel_point_mask": voxel_point_mask,
    }
    if full_mean:
        means = np.zeros(shape=(max_voxels, points.shape[-1]),
                         dtype=points.dtype)
        voxel_num = points_to_voxel_3d_np_mean(points, voxels,
                                               voxel_point_mask, means, coors,
                                               num_points_per_voxel,
                                               coor_to_voxelidx,
                                               voxel_size.tolist(),
                                               coors_range.tolist(),
                                               max_points, max_voxels)
    else:
        if block_filtering:
            block_shape = [*voxelmap_shape[1:]]
            block_shape = [b // block_factor for b in block_shape]
            mins = np.full(block_shape, 99999999, dtype=points.dtype)
            maxs = np.full(block_shape, -99999999, dtype=points.dtype)
            voxel_mask = np.zeros((max_voxels, ), dtype=np.int32)
            voxel_num = points_to_voxel_3d_with_filtering(
                points, voxels, voxel_point_mask, voxel_mask, mins, maxs,
                coors, num_points_per_voxel, coor_to_voxelidx,
                voxel_size.tolist(), coors_range.tolist(), max_points,
                max_voxels, block_factor, block_size, height_threshold,
                height_high_threshold)
            voxel_mask = voxel_mask.astype(np.bool_)
            coors_ = coors[voxel_mask]
            if pad_output:
                res["coordinates"][:voxel_num] = coors_
                res["voxels"][:voxel_num] = voxels[voxel_mask]
                res["voxel_point_mask"][:voxel_num] = voxel_point_mask[
                    voxel_mask]

                res["num_points_per_voxel"][:voxel_num] = num_points_per_voxel[
                    voxel_mask]
                res["coordinates"][voxel_num:] = 0
                res["voxels"][voxel_num:] = 0
                res["num_points_per_voxel"][voxel_num:] = 0
                res["voxel_point_mask"][voxel_num:] = 0
            else:
                res["coordinates"] = coors_
                res["voxels"] = voxels[voxel_mask]
                res["num_points_per_voxel"] = num_points_per_voxel[voxel_mask]
                res["voxel_point_mask"] = voxel_point_mask[voxel_mask]
            voxel_num = coors_.shape[0]
        else:
            voxel_num = points_to_voxel_3d_np(points, voxels, voxel_point_mask,
                                              coors, num_points_per_voxel,
                                              coor_to_voxelidx,
                                              voxel_size.tolist(),
                                              coors_range.tolist(), max_points,
                                              max_voxels)
    res["voxel_num"] = voxel_num
    res["voxel_point_mask"] = res["voxel_point_mask"].reshape(
        -1, max_points, 1)
    return res


class VoxelGenerator:
    def __init__(self,
                 voxel_size,
                 point_cloud_range,
                 max_num_points,
                 max_voxels=20000,
                 full_mean=True):
        point_cloud_range = np.array(point_cloud_range, dtype=np.float32)
        # [0, -40, -3, 70.4, 40, 1]
        voxel_size = np.array(voxel_size, dtype=np.float32)
        grid_size = (point_cloud_range[3:] -
                     point_cloud_range[:3]) / voxel_size
        grid_size = np.round(grid_size).astype(np.int64)
        voxelmap_shape = tuple(np.round(grid_size).astype(np.int32).tolist())
        voxelmap_shape = voxelmap_shape[::-1]

        self._coor_to_voxelidx = np.full(voxelmap_shape, -1, dtype=np.int32)
        self._voxel_size = voxel_size
        self._point_cloud_range = point_cloud_range
        self._max_num_points = max_num_points
        self._max_voxels = max_voxels
        self._grid_size = grid_size
        self._full_mean = full_mean

    def generate(self, points, max_voxels=None):
        res = points_to_voxel(points, self._voxel_size,
                              self._point_cloud_range, self._coor_to_voxelidx,
                              self._max_num_points, max_voxels
                              or self._max_voxels, self._full_mean)
        voxels = res["voxels"]
        coors = res["coordinates"]
        num_points_per_voxel = res["num_points_per_voxel"]
        voxel_num = res["voxel_num"]
        coors = coors[:voxel_num]
        voxels = voxels[:voxel_num]
        num_points_per_voxel = num_points_per_voxel[:voxel_num]

        return (voxels, coors, num_points_per_voxel)

    def generate_multi_gpu(self, points, max_voxels=None):
        res = points_to_voxel(points, self._voxel_size,
                              self._point_cloud_range, self._coor_to_voxelidx,
                              self._max_num_points, max_voxels
                              or self._max_voxels, self._full_mean)
        voxels = res["voxels"]
        coors = res["coordinates"]
        num_points_per_voxel = res["num_points_per_voxel"]
        voxel_num = res["voxel_num"]
        return (voxels, coors, num_points_per_voxel)

    @property
    def voxel_size(self):
        return self._voxel_size

    @property
    def max_num_points_per_voxel(self):
        return self._max_num_points

    @property
    def point_cloud_range(self):
        return self._point_cloud_range

    @property
    def grid_size(self):
        return self._grid_size

具体实现在point2voxel.h中以c++语言实现：


template <typename DType, int NDim>
int points_to_voxel_3d_np(py::array_t<DType> points, py::array_t<DType> voxels,
                          py::array_t<DType> voxel_point_mask,
                          py::array_t<int> coors,
                          py::array_t<int> num_points_per_voxel,
                          py::array_t<int> coor_to_voxelidx,
                          std::vector<DType> voxel_size,
                          std::vector<DType> coors_range, int max_points,
                          int max_voxels) {
  auto points_rw = points.template mutable_unchecked<2>();
  auto voxels_rw = voxels.template mutable_unchecked<3>();
  auto voxel_point_mask_rw = voxel_point_mask.template mutable_unchecked<2>();
  auto coors_rw = coors.mutable_unchecked<2>();
  auto num_points_per_voxel_rw = num_points_per_voxel.mutable_unchecked<1>();
  auto coor_to_voxelidx_rw = coor_to_voxelidx.mutable_unchecked<NDim>();
  auto N = points_rw.shape(0);
  auto num_features = points_rw.shape(1);
  // auto ndim = points_rw.shape(1) - 1;
  constexpr int ndim_minus_1 = NDim - 1;
  int voxel_num = 0;
  bool failed = false;
  int coor[NDim];
  int c;
  int grid_size[NDim];
  for (int i = 0; i < NDim; ++i) {
    grid_size[i] =
        round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]);
  }
  int voxelidx, num;
  for (int i = 0; i < N; ++i) {
    failed = false;
    for (int j = 0; j < NDim; ++j) {
      c = floor((points_rw(i, j) - coors_range[j]) / voxel_size[j]);
      if ((c < 0 || c >= grid_size[j])) {
        failed = true;
        break;
      }
      coor[ndim_minus_1 - j] = c;
    }
    if (failed)
      continue;
    voxelidx = coor_to_voxelidx_rw(coor[0], coor[1], coor[2]);
    if (voxelidx == -1) {
      voxelidx = voxel_num;
      if (voxel_num >= max_voxels)
        continue;
      voxel_num += 1;
      coor_to_voxelidx_rw(coor[0], coor[1], coor[2]) = voxelidx;
      for (int k = 0; k < NDim; ++k) {
        coors_rw(voxelidx, k) = coor[k];
      }
    }
    num = num_points_per_voxel_rw(voxelidx);
    if (num < max_points) {
      voxel_point_mask_rw(voxelidx, num) = DType(1);
      for (int k = 0; k < num_features; ++k) {
        voxels_rw(voxelidx, num, k) = points_rw(i, k);
      }
      num_points_per_voxel_rw(voxelidx) += 1;
    }
  }
  for (int i = 0; i < voxel_num; ++i) {
    coor_to_voxelidx_rw(coors_rw(i, 0), coors_rw(i, 1), coors_rw(i, 2)) = -1;
  }
  return voxel_num;
}

template <typename DType, int NDim>
int points_to_voxel_3d_np_mean(
    py::array_t<DType> points, py::array_t<DType> voxel_point_mask,
    py::array_t<DType> voxels, py::array_t<DType> means, py::array_t<int> coors,
    py::array_t<int> num_points_per_voxel, py::array_t<int> coor_to_voxelidx,
    std::vector<DType> voxel_size, std::vector<DType> coors_range,
    int max_points, int max_voxels) {
  auto points_rw = points.template mutable_unchecked<2>();
  auto means_rw = means.template mutable_unchecked<2>();
  auto voxels_rw = voxels.template mutable_unchecked<3>();
  auto voxel_point_mask_rw = voxel_point_mask.template mutable_unchecked<2>();
  auto coors_rw = coors.mutable_unchecked<2>();
  auto num_points_per_voxel_rw = num_points_per_voxel.mutable_unchecked<1>();
  auto coor_to_voxelidx_rw = coor_to_voxelidx.mutable_unchecked<NDim>();
  auto N = points_rw.shape(0);
  auto num_features = points_rw.shape(1);
  // auto ndim = points_rw.shape(1) - 1;
  constexpr int ndim_minus_1 = NDim - 1;
  int voxel_num = 0;
  bool failed = false;
  int coor[NDim];
  int c;
  int grid_size[NDim];
  for (int i = 0; i < NDim; ++i) {
    grid_size[i] =
        round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]);
  }
  int voxelidx, num;
  for (int i = 0; i < N; ++i) {
    failed = false;
    for (int j = 0; j < NDim; ++j) {
      c = floor((points_rw(i, j) - coors_range[j]) / voxel_size[j]);
      if ((c < 0 || c >= grid_size[j])) {
        failed = true;
        break;
      }
      coor[ndim_minus_1 - j] = c;
    }
    if (failed)
      continue;
    voxelidx = coor_to_voxelidx_rw(coor[0], coor[1], coor[2]);
    if (voxelidx == -1) {
      voxelidx = voxel_num;
      if (voxel_num >= max_voxels)
        continue;
      voxel_num += 1;
      coor_to_voxelidx_rw(coor[0], coor[1], coor[2]) = voxelidx;
      for (int k = 0; k < NDim; ++k) {
        coors_rw(voxelidx, k) = coor[k];
      }
    }
    num = num_points_per_voxel_rw(voxelidx);
    if (num < max_points) {
      voxel_point_mask_rw(voxelidx, num) = DType(1);
      for (int k = 0; k < num_features; ++k) {
        voxels_rw(voxelidx, num, k) = points_rw(i, k);
      }
      num_points_per_voxel_rw(voxelidx) += 1;
      for (int k = 0; k < num_features; ++k) {
        means_rw(voxelidx, k) +=
            (points_rw(i, k) - means_rw(voxelidx, k)) / DType(num + 1);
      }
    }
  }
  for (int i = 0; i < voxel_num; ++i) {
    coor_to_voxelidx_rw(coors_rw(i, 0), coors_rw(i, 1), coors_rw(i, 2)) = -1;
    num = num_points_per_voxel_rw(i);
    for (int j = num; j < max_points; ++j) {
      for (int k = 0; k < num_features; ++k) {
        voxels_rw(i, j, k) = means_rw(i, k);
      }
    }
  }
  return voxel_num;
}

2. 硬体素特征

这个过程比较好理解，就是对同一体素格子中求和，然后求均值。


class MeanVFE(VFETemplate):
    def __init__(self, model_cfg, num_point_features, **kwargs):
        super().__init__(model_cfg=model_cfg)
        self.num_point_features = num_point_features

    def get_output_feature_dim(self):
        return self.num_point_features

    def forward(self, batch_dict, **kwargs):
        """
        Args:
            batch_dict:
                voxels: (num_voxels, max_points_per_voxel, C)
                voxel_num_points: optional (num_voxels)
            **kwargs:

        Returns:
            vfe_features: (num_voxels, C)
        """
        voxel_features, voxel_num_points = batch_dict['voxels'], batch_dict['voxel_num_points']
        points_mean = voxel_features[:, :, :].sum(dim=1, keepdim=False)
        normalizer = torch.clamp_min(voxel_num_points.view(-1, 1), min=1.0).type_as(voxel_features)
        points_mean = points_mean / normalizer
        batch_dict['voxel_features'] = points_mean.contiguous()

        return batch_dict

3. 动态体素生成

过程也很简洁，直接使用torch.unique 输出体素坐标中非重复坐标，原始坐标在非重复坐标中的索引，以及非重复坐标的个数。然后使用scatter_mean直接求得均值。


class DynamicMeanVFE(VFETemplate):
    def __init__(self, model_cfg, num_point_features, voxel_size, grid_size, point_cloud_range, **kwargs):
        super().__init__(model_cfg=model_cfg)
        self.num_point_features = num_point_features

        self.grid_size = torch.tensor(grid_size).cuda()
        self.voxel_size = torch.tensor(voxel_size).cuda()
        self.point_cloud_range = torch.tensor(point_cloud_range).cuda()

        self.voxel_x = voxel_size[0]
        self.voxel_y = voxel_size[1]
        self.voxel_z = voxel_size[2]
        self.x_offset = self.voxel_x / 2 + point_cloud_range[0]
        self.y_offset = self.voxel_y / 2 + point_cloud_range[1]
        self.z_offset = self.voxel_z / 2 + point_cloud_range[2]

        self.scale_xyz = grid_size[0] * grid_size[1] * grid_size[2]
        self.scale_yz = grid_size[1] * grid_size[2]
        self.scale_z = grid_size[2]

    def get_output_feature_dim(self):
        return self.num_point_features

    @torch.no_grad()
    def forward(self, batch_dict, **kwargs):
        """
        Args:
            batch_dict:
                voxels: (num_voxels, max_points_per_voxel, C)
                voxel_num_points: optional (num_voxels)
            **kwargs:

        Returns:
            vfe_features: (num_voxels, C)
        """
        batch_size = batch_dict['batch_size']
        points = batch_dict['points'] # (batch_idx, x, y, z, i, e)

        # # debug
        point_coords = torch.floor((points[:, 1:4] - self.point_cloud_range[0:3]) / self.voxel_size).int()
        mask = ((point_coords >= 0) & (point_coords < self.grid_size)).all(dim=1)
        points = points[mask]
        point_coords = point_coords[mask]
        merge_coords = points[:, 0].int() * self.scale_xyz + \
                        point_coords[:, 0] * self.scale_yz + \
                        point_coords[:, 1] * self.scale_z + \
                        point_coords[:, 2]
        points_data = points[:, 1:].contiguous()
        
        unq_coords, unq_inv, unq_cnt = torch.unique(merge_coords, return_inverse=True, return_counts=True)

        points_mean = torch_scatter.scatter_mean(points_data, unq_inv, dim=0)
        
        unq_coords = unq_coords.int()
        voxel_coords = torch.stack((unq_coords // self.scale_xyz,
                                    (unq_coords % self.scale_xyz) // self.scale_yz,
                                    (unq_coords % self.scale_yz) // self.scale_z,
                                    unq_coords % self.scale_z), dim=1)
        voxel_coords = voxel_coords[:, [0, 3, 2, 1]]
        
        batch_dict['voxel_features'] = points_mean.contiguous()
        batch_dict['voxel_coords'] = voxel_coords.contiguous()
        return batch_dict

补充：

torch.unique(input, sorted=True, return_inverse=False, return_counts=False, dim=None)

input: 待处理的tensor

sorted：是否对返回的无重复张量按照数值进行排列，默认是生序排列的

return_inverse: 是否返回原始tensor中的每个元素在这个无重复张量中的索引

return_counts: 统计原始张量中每个独立元素的重复个数

dim: 值沿着哪个维度进行unique的处理，这个我试验后没有搞懂怎样的机理。如果处理的张量都是一维的，那么这个不需要理会。

import torch
 
x = torch.tensor([4,0,1,2,1,2,3])#生成一个tensor,作为实验输入
print(x)
 
out = torch.unique(x) #所有参数都设置为默认的
print(out)#将处理结果打印出来
#结果如下：
#tensor([0, 1, 2, 3, 4])   #将x中的不重复元素挑了出来，并且默认为生序排列
 
out = torch.unique(x,sorted=False)#将默认的生序排列改为False
print(out)
#输出结果如下：
#tensor([3, 2, 1, 0, 4])  #将x中的独立元素找了出来，就按照原始顺序输出
 
out = torch.unique(x,return_inverse=True)#将原始数据中的每个元素在新生成的独立元素张量中的索引输出
print(out)
#输出结果如下：
#(tensor([0, 1, 2, 3, 4]), tensor([4, 0, 1, 2, 1, 2, 3]))  #第一个张量是排序后输出的独立张量，第二个结果对应着原始数据中的每个元素在新的独立无重复张量中的索引，比如x[0]=4,在新的张量中的索引为4, x[1]=0,在新的张量中的索引为0，x[6]=3,在新的张量中的索引为3
 
out = torch.unique(x,return_counts=True) #返回每个独立元素的个数
print(out)
#输出结果如下
#(tensor([0, 1, 2, 3, 4]), tensor([1, 2, 2, 1, 1]))  #0这个元素在原始数据中的数量为1,1这个元素在原始数据中的数量为2