1. 体素格子生成
该过程调用spconv库中的实现,主要过程为points_to_voxel
from spconv import spconv_utils
from spconv.spconv_utils import (non_max_suppression_cpu,
points_to_voxel_3d_np,
points_to_voxel_3d_np_mean,
points_to_voxel_3d_with_filtering,
rbbox_intersection, rbbox_iou,
rotate_non_max_suppression_cpu)
try:
from spconv.spconv_utils import non_max_suppression
except ImportError:
pass
def points_to_voxel(points,
voxel_size,
coors_range,
coor_to_voxelidx,
max_points=35,
max_voxels=20000,
full_mean=False,
block_filtering=True,
block_factor=1,
block_size=8,
height_threshold=0.2,
height_high_threshold=3.0,
pad_output=False):
"""convert 3d points(N, >=3) to voxels. This version calculate
everything in one loop. now it takes only 0.8ms(~6k voxels)
with c++ and 3.2ghz cpu.
Args:
points: [N, ndim] float tensor. points[:, :3] contain xyz points and
points[:, 3:] contain other information such as reflectivity.
voxel_size: [3] list/tuple or array, float. xyz, indicate voxel size
coors_range: [6] list/tuple or array, float. indicate voxel range.
format: xyzxyz, minmax
coor_to_voxelidx: int array. used as a dense map.
max_points: int. indicate maximum points contained in a voxel.
max_voxels: int. indicate maximum voxels this function create.
for voxelnet, 20000 is a good choice. you should shuffle points
before call this function because max_voxels may drop some points.
full_mean: bool. if true, all empty points in voxel will be filled with mean
of exist points.
block_filtering: filter voxels by height. used for lidar point cloud.
use some visualization tool to see filtered result.
Returns:
voxels: [M, max_points, ndim] float tensor. only contain points.
coordinates: [M, 3] int32 tensor. zyx format.
num_points_per_voxel: [M] int32 tensor.
"""
if full_mean:
assert block_filtering is False
if not isinstance(voxel_size, np.ndarray):
voxel_size = np.array(voxel_size, dtype=points.dtype)
if not isinstance(coors_range, np.ndarray):
coors_range = np.array(coors_range, dtype=points.dtype)
voxelmap_shape = (coors_range[3:] - coors_range[:3]) / voxel_size
voxelmap_shape = tuple(np.round(voxelmap_shape).astype(np.int32).tolist())
voxelmap_shape = voxelmap_shape[::-1]
num_points_per_voxel = np.zeros(shape=(max_voxels, ), dtype=np.int32)
voxels = np.zeros(shape=(max_voxels, max_points, points.shape[-1]),
dtype=points.dtype)
voxel_point_mask = np.zeros(shape=(max_voxels, max_points),
dtype=points.dtype)
coors = np.zeros(shape=(max_voxels, 3), dtype=np.int32)
res = {
"voxels": voxels,
"coordinates": coors,
"num_points_per_voxel": num_points_per_voxel,
"voxel_point_mask": voxel_point_mask,
}
if full_mean:
means = np.zeros(shape=(max_voxels, points.shape[-1]),
dtype=points.dtype)
voxel_num = points_to_voxel_3d_np_mean(points, voxels,
voxel_point_mask, means, coors,
num_points_per_voxel,
coor_to_voxelidx,
voxel_size.tolist(),
coors_range.tolist(),
max_points, max_voxels)
else:
if block_filtering:
block_shape = [*voxelmap_shape[1:]]
block_shape = [b // block_factor for b in block_shape]
mins = np.full(block_shape, 99999999, dtype=points.dtype)
maxs = np.full(block_shape, -99999999, dtype=points.dtype)
voxel_mask = np.zeros((max_voxels, ), dtype=np.int32)
voxel_num = points_to_voxel_3d_with_filtering(
points, voxels, voxel_point_mask, voxel_mask, mins, maxs,
coors, num_points_per_voxel, coor_to_voxelidx,
voxel_size.tolist(), coors_range.tolist(), max_points,
max_voxels, block_factor, block_size, height_threshold,
height_high_threshold)
voxel_mask = voxel_mask.astype(np.bool_)
coors_ = coors[voxel_mask]
if pad_output:
res["coordinates"][:voxel_num] = coors_
res["voxels"][:voxel_num] = voxels[voxel_mask]
res["voxel_point_mask"][:voxel_num] = voxel_point_mask[
voxel_mask]
res["num_points_per_voxel"][:voxel_num] = num_points_per_voxel[
voxel_mask]
res["coordinates"][voxel_num:] = 0
res["voxels"][voxel_num:] = 0
res["num_points_per_voxel"][voxel_num:] = 0
res["voxel_point_mask"][voxel_num:] = 0
else:
res["coordinates"] = coors_
res["voxels"] = voxels[voxel_mask]
res["num_points_per_voxel"] = num_points_per_voxel[voxel_mask]
res["voxel_point_mask"] = voxel_point_mask[voxel_mask]
voxel_num = coors_.shape[0]
else:
voxel_num = points_to_voxel_3d_np(points, voxels, voxel_point_mask,
coors, num_points_per_voxel,
coor_to_voxelidx,
voxel_size.tolist(),
coors_range.tolist(), max_points,
max_voxels)
res["voxel_num"] = voxel_num
res["voxel_point_mask"] = res["voxel_point_mask"].reshape(
-1, max_points, 1)
return res
class VoxelGenerator:
def __init__(self,
voxel_size,
point_cloud_range,
max_num_points,
max_voxels=20000,
full_mean=True):
point_cloud_range = np.array(point_cloud_range, dtype=np.float32)
# [0, -40, -3, 70.4, 40, 1]
voxel_size = np.array(voxel_size, dtype=np.float32)
grid_size = (point_cloud_range[3:] -
point_cloud_range[:3]) / voxel_size
grid_size = np.round(grid_size).astype(np.int64)
voxelmap_shape = tuple(np.round(grid_size).astype(np.int32).tolist())
voxelmap_shape = voxelmap_shape[::-1]
self._coor_to_voxelidx = np.full(voxelmap_shape, -1, dtype=np.int32)
self._voxel_size = voxel_size
self._point_cloud_range = point_cloud_range
self._max_num_points = max_num_points
self._max_voxels = max_voxels
self._grid_size = grid_size
self._full_mean = full_mean
def generate(self, points, max_voxels=None):
res = points_to_voxel(points, self._voxel_size,
self._point_cloud_range, self._coor_to_voxelidx,
self._max_num_points, max_voxels
or self._max_voxels, self._full_mean)
voxels = res["voxels"]
coors = res["coordinates"]
num_points_per_voxel = res["num_points_per_voxel"]
voxel_num = res["voxel_num"]
coors = coors[:voxel_num]
voxels = voxels[:voxel_num]
num_points_per_voxel = num_points_per_voxel[:voxel_num]
return (voxels, coors, num_points_per_voxel)
def generate_multi_gpu(self, points, max_voxels=None):
res = points_to_voxel(points, self._voxel_size,
self._point_cloud_range, self._coor_to_voxelidx,
self._max_num_points, max_voxels
or self._max_voxels, self._full_mean)
voxels = res["voxels"]
coors = res["coordinates"]
num_points_per_voxel = res["num_points_per_voxel"]
voxel_num = res["voxel_num"]
return (voxels, coors, num_points_per_voxel)
@property
def voxel_size(self):
return self._voxel_size
@property
def max_num_points_per_voxel(self):
return self._max_num_points
@property
def point_cloud_range(self):
return self._point_cloud_range
@property
def grid_size(self):
return self._grid_size
具体实现在point2voxel.h中以c++语言实现:
template <typename DType, int NDim>
int points_to_voxel_3d_np(py::array_t<DType> points, py::array_t<DType> voxels,
py::array_t<DType> voxel_point_mask,
py::array_t<int> coors,
py::array_t<int> num_points_per_voxel,
py::array_t<int> coor_to_voxelidx,
std::vector<DType> voxel_size,
std::vector<DType> coors_range, int max_points,
int max_voxels) {
auto points_rw = points.template mutable_unchecked<2>();
auto voxels_rw = voxels.template mutable_unchecked<3>();
auto voxel_point_mask_rw = voxel_point_mask.template mutable_unchecked<2>();
auto coors_rw = coors.mutable_unchecked<2>();
auto num_points_per_voxel_rw = num_points_per_voxel.mutable_unchecked<1>();
auto coor_to_voxelidx_rw = coor_to_voxelidx.mutable_unchecked<NDim>();
auto N = points_rw.shape(0);
auto num_features = points_rw.shape(1);
// auto ndim = points_rw.shape(1) - 1;
constexpr int ndim_minus_1 = NDim - 1;
int voxel_num = 0;
bool failed = false;
int coor[NDim];
int c;
int grid_size[NDim];
for (int i = 0; i < NDim; ++i) {
grid_size[i] =
round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]);
}
int voxelidx, num;
for (int i = 0; i < N; ++i) {
failed = false;
for (int j = 0; j < NDim; ++j) {
c = floor((points_rw(i, j) - coors_range[j]) / voxel_size[j]);
if ((c < 0 || c >= grid_size[j])) {
failed = true;
break;
}
coor[ndim_minus_1 - j] = c;
}
if (failed)
continue;
voxelidx = coor_to_voxelidx_rw(coor[0], coor[1], coor[2]);
if (voxelidx == -1) {
voxelidx = voxel_num;
if (voxel_num >= max_voxels)
continue;
voxel_num += 1;
coor_to_voxelidx_rw(coor[0], coor[1], coor[2]) = voxelidx;
for (int k = 0; k < NDim; ++k) {
coors_rw(voxelidx, k) = coor[k];
}
}
num = num_points_per_voxel_rw(voxelidx);
if (num < max_points) {
voxel_point_mask_rw(voxelidx, num) = DType(1);
for (int k = 0; k < num_features; ++k) {
voxels_rw(voxelidx, num, k) = points_rw(i, k);
}
num_points_per_voxel_rw(voxelidx) += 1;
}
}
for (int i = 0; i < voxel_num; ++i) {
coor_to_voxelidx_rw(coors_rw(i, 0), coors_rw(i, 1), coors_rw(i, 2)) = -1;
}
return voxel_num;
}
template <typename DType, int NDim>
int points_to_voxel_3d_np_mean(
py::array_t<DType> points, py::array_t<DType> voxel_point_mask,
py::array_t<DType> voxels, py::array_t<DType> means, py::array_t<int> coors,
py::array_t<int> num_points_per_voxel, py::array_t<int> coor_to_voxelidx,
std::vector<DType> voxel_size, std::vector<DType> coors_range,
int max_points, int max_voxels) {
auto points_rw = points.template mutable_unchecked<2>();
auto means_rw = means.template mutable_unchecked<2>();
auto voxels_rw = voxels.template mutable_unchecked<3>();
auto voxel_point_mask_rw = voxel_point_mask.template mutable_unchecked<2>();
auto coors_rw = coors.mutable_unchecked<2>();
auto num_points_per_voxel_rw = num_points_per_voxel.mutable_unchecked<1>();
auto coor_to_voxelidx_rw = coor_to_voxelidx.mutable_unchecked<NDim>();
auto N = points_rw.shape(0);
auto num_features = points_rw.shape(1);
// auto ndim = points_rw.shape(1) - 1;
constexpr int ndim_minus_1 = NDim - 1;
int voxel_num = 0;
bool failed = false;
int coor[NDim];
int c;
int grid_size[NDim];
for (int i = 0; i < NDim; ++i) {
grid_size[i] =
round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]);
}
int voxelidx, num;
for (int i = 0; i < N; ++i) {
failed = false;
for (int j = 0; j < NDim; ++j) {
c = floor((points_rw(i, j) - coors_range[j]) / voxel_size[j]);
if ((c < 0 || c >= grid_size[j])) {
failed = true;
break;
}
coor[ndim_minus_1 - j] = c;
}
if (failed)
continue;
voxelidx = coor_to_voxelidx_rw(coor[0], coor[1], coor[2]);
if (voxelidx == -1) {
voxelidx = voxel_num;
if (voxel_num >= max_voxels)
continue;
voxel_num += 1;
coor_to_voxelidx_rw(coor[0], coor[1], coor[2]) = voxelidx;
for (int k = 0; k < NDim; ++k) {
coors_rw(voxelidx, k) = coor[k];
}
}
num = num_points_per_voxel_rw(voxelidx);
if (num < max_points) {
voxel_point_mask_rw(voxelidx, num) = DType(1);
for (int k = 0; k < num_features; ++k) {
voxels_rw(voxelidx, num, k) = points_rw(i, k);
}
num_points_per_voxel_rw(voxelidx) += 1;
for (int k = 0; k < num_features; ++k) {
means_rw(voxelidx, k) +=
(points_rw(i, k) - means_rw(voxelidx, k)) / DType(num + 1);
}
}
}
for (int i = 0; i < voxel_num; ++i) {
coor_to_voxelidx_rw(coors_rw(i, 0), coors_rw(i, 1), coors_rw(i, 2)) = -1;
num = num_points_per_voxel_rw(i);
for (int j = num; j < max_points; ++j) {
for (int k = 0; k < num_features; ++k) {
voxels_rw(i, j, k) = means_rw(i, k);
}
}
}
return voxel_num;
}
2. 硬体素特征
这个过程比较好理解,就是对同一体素格子中求和,然后求均值。
class MeanVFE(VFETemplate):
def __init__(self, model_cfg, num_point_features, **kwargs):
super().__init__(model_cfg=model_cfg)
self.num_point_features = num_point_features
def get_output_feature_dim(self):
return self.num_point_features
def forward(self, batch_dict, **kwargs):
"""
Args:
batch_dict:
voxels: (num_voxels, max_points_per_voxel, C)
voxel_num_points: optional (num_voxels)
**kwargs:
Returns:
vfe_features: (num_voxels, C)
"""
voxel_features, voxel_num_points = batch_dict['voxels'], batch_dict['voxel_num_points']
points_mean = voxel_features[:, :, :].sum(dim=1, keepdim=False)
normalizer = torch.clamp_min(voxel_num_points.view(-1, 1), min=1.0).type_as(voxel_features)
points_mean = points_mean / normalizer
batch_dict['voxel_features'] = points_mean.contiguous()
return batch_dict
3. 动态体素生成
过程也很简洁,直接使用torch.unique 输出体素坐标中非重复坐标,原始坐标在非重复坐标中的索引,以及非重复坐标的个数。然后使用scatter_mean直接求得均值。
class DynamicMeanVFE(VFETemplate):
def __init__(self, model_cfg, num_point_features, voxel_size, grid_size, point_cloud_range, **kwargs):
super().__init__(model_cfg=model_cfg)
self.num_point_features = num_point_features
self.grid_size = torch.tensor(grid_size).cuda()
self.voxel_size = torch.tensor(voxel_size).cuda()
self.point_cloud_range = torch.tensor(point_cloud_range).cuda()
self.voxel_x = voxel_size[0]
self.voxel_y = voxel_size[1]
self.voxel_z = voxel_size[2]
self.x_offset = self.voxel_x / 2 + point_cloud_range[0]
self.y_offset = self.voxel_y / 2 + point_cloud_range[1]
self.z_offset = self.voxel_z / 2 + point_cloud_range[2]
self.scale_xyz = grid_size[0] * grid_size[1] * grid_size[2]
self.scale_yz = grid_size[1] * grid_size[2]
self.scale_z = grid_size[2]
def get_output_feature_dim(self):
return self.num_point_features
@torch.no_grad()
def forward(self, batch_dict, **kwargs):
"""
Args:
batch_dict:
voxels: (num_voxels, max_points_per_voxel, C)
voxel_num_points: optional (num_voxels)
**kwargs:
Returns:
vfe_features: (num_voxels, C)
"""
batch_size = batch_dict['batch_size']
points = batch_dict['points'] # (batch_idx, x, y, z, i, e)
# # debug
point_coords = torch.floor((points[:, 1:4] - self.point_cloud_range[0:3]) / self.voxel_size).int()
mask = ((point_coords >= 0) & (point_coords < self.grid_size)).all(dim=1)
points = points[mask]
point_coords = point_coords[mask]
merge_coords = points[:, 0].int() * self.scale_xyz + \
point_coords[:, 0] * self.scale_yz + \
point_coords[:, 1] * self.scale_z + \
point_coords[:, 2]
points_data = points[:, 1:].contiguous()
unq_coords, unq_inv, unq_cnt = torch.unique(merge_coords, return_inverse=True, return_counts=True)
points_mean = torch_scatter.scatter_mean(points_data, unq_inv, dim=0)
unq_coords = unq_coords.int()
voxel_coords = torch.stack((unq_coords // self.scale_xyz,
(unq_coords % self.scale_xyz) // self.scale_yz,
(unq_coords % self.scale_yz) // self.scale_z,
unq_coords % self.scale_z), dim=1)
voxel_coords = voxel_coords[:, [0, 3, 2, 1]]
batch_dict['voxel_features'] = points_mean.contiguous()
batch_dict['voxel_coords'] = voxel_coords.contiguous()
return batch_dict
补充:
torch.unique(input, sorted=True, return_inverse=False, return_counts=False, dim=None)
input: 待处理的tensor
sorted:是否对返回的无重复张量按照数值进行排列,默认是生序排列的
return_inverse: 是否返回原始tensor中的每个元素在这个无重复张量中的索引
return_counts: 统计原始张量中每个独立元素的重复个数
dim: 值沿着哪个维度进行unique的处理,这个我试验后没有搞懂怎样的机理。如果处理的张量都是一维的,那么这个不需要理会。
import torch
x = torch.tensor([4,0,1,2,1,2,3])#生成一个tensor,作为实验输入
print(x)
out = torch.unique(x) #所有参数都设置为默认的
print(out)#将处理结果打印出来
#结果如下:
#tensor([0, 1, 2, 3, 4]) #将x中的不重复元素挑了出来,并且默认为生序排列
out = torch.unique(x,sorted=False)#将默认的生序排列改为False
print(out)
#输出结果如下:
#tensor([3, 2, 1, 0, 4]) #将x中的独立元素找了出来,就按照原始顺序输出
out = torch.unique(x,return_inverse=True)#将原始数据中的每个元素在新生成的独立元素张量中的索引输出
print(out)
#输出结果如下:
#(tensor([0, 1, 2, 3, 4]), tensor([4, 0, 1, 2, 1, 2, 3])) #第一个张量是排序后输出的独立张量,第二个结果对应着原始数据中的每个元素在新的独立无重复张量中的索引,比如x[0]=4,在新的张量中的索引为4, x[1]=0,在新的张量中的索引为0,x[6]=3,在新的张量中的索引为3
out = torch.unique(x,return_counts=True) #返回每个独立元素的个数
print(out)
#输出结果如下
#(tensor([0, 1, 2, 3, 4]), tensor([1, 2, 2, 1, 1])) #0这个元素在原始数据中的数量为1,1这个元素在原始数据中的数量为2