PointPillar
PointPillars.py
有一篇关于voxelnet不错的文章VoxelNet.
有一篇关于PointPillar不错的文章Pillar
paper_read_PointPillar
无人驾驶汽车系统入门(二十八)——基于VoxelNet的激光雷达点云车辆检测及ROS实现
总而言之,voxelnet是通过VFE获得每个voxel的特征,然后采用3DCNN获得全局特征
pillar模型训练上,car单独训一个模型,ped和cyc单独训一个模型
图片来源:zhihu_pillar
图片来源:知乎
Voxel网络细节知乎_苹果的无人车激光雷达处理方案
"""
PointPillars fork from SECOND.
Code written by Alex Lang and Oscar Beijbom, 2018.
Licensed under MIT License [see LICENSE].
"""
import torch
from torch import nn
from torch.nn import functional as F
from second.pytorch.utils import get_paddings_indicator
from torchplus.nn import Empty
from torchplus.tools import change_default_args
class PFNLayer(nn.Module):
def __init__(self,
in_channels,
out_channels,
use_norm=True,
last_layer=False):
"""
Pillar Feature Net Layer.
The Pillar Feature Net could be composed of a series of these layers, but the PointPillars paper results only
used a single PFNLayer. This layer performs a similar role as second.pytorch.voxelnet.VFELayer.
:param in_channels: <int>. Number of input channels.
:param out_channels: <int>. Number of output channels.
:param use_norm: <bool>. Whether to include BatchNorm.
:param last_layer: <bool>. If last_layer, there is no concatenation of features.
"""
super().__init__()
self.name = 'PFNLayer'
self.last_vfe = last_layer
if not self.last_vfe:
out_channels = out_channels // 2
self.units = out_channels
"""
" / "就表示 浮点数除法,返回浮点结果;" // "表示整数除法。
在 Python 2.2 :要引用: from __future__ import division
————————————————
版权声明:本文为CSDN博主「lw_waston」的原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.csdn.net/lw_waston/article/details/83150304
"""
if use_norm:
BatchNorm1d = change_default_args(eps=1e-3, momentum=0.01)(nn.BatchNorm1d)
Linear = change_default_args(bias=False)(nn.Linear)
else:
BatchNorm1d = Empty
Linear = change_default_args(bias=True)(nn.Linear)
self.linear = Linear(in_channels, self.units)
self.norm = BatchNorm1d(self.units)
def forward(self, inputs):
x = self.linear(inputs)
x = self.norm(x.permute(0, 2, 1).contiguous()).permute(0, 2, 1).contiguous()
x = F.relu(x)
x_max = torch.max(x, dim=1, keepdim=True)[0]
if self.last_vfe:
return x_max
else:
x_repeat = x_max.repeat(1, inputs.shape[1], 1)
x_concatenated = torch.cat([x, x_repeat], dim=2)
return x_concatenated
class PillarFeatureNet(nn.Module):
def __init__(self,
num_input_features=4,
use_norm=True,
num_filters=(64,),
with_distance=False,
voxel_size=(0.2, 0.2, 4),
pc_range=(0, -40, -3, 70.4, 40, 1)):
"""
Pillar Feature Net.
The network prepares the pillar features and performs forward pass through PFNLayers. This net performs a
similar role to SECOND's second.pytorch.voxelnet.VoxelFeatureExtractor.
:param num_input_features: <int>. Number of input features, either x, y, z or x, y, z, r.
:param use_norm: <bool>. Whether to include BatchNorm.
:param num_filters: (<int>: N). Number of features in each of the N PFNLayers.
:param with_distance: <bool>. Whether to include Euclidean distance to points.
:param voxel_size: (<float>: 3). Size of voxels, only utilize x and y size.
:param pc_range: (<float>: 6). Point cloud range, only utilize x and y min.
"""
super().__init__()
self.name = 'PillarFeatureNet'
assert len(num_filters) > 0
num_input_features += 5
if with_distance:
num_input_features += 1
self._with_distance = with_distance
# Create PillarFeatureNet layers
num_filters = [num_input_features] + list(num_filters)
pfn_layers = []
for i in range(len(num_filters) - 1):
in_filters = num_filters[i]
out_filters = num_filters[i + 1]
if i < len(num_filters) - 2:
last_layer = False
else:
last_layer = True
pfn_layers.append(PFNLayer(in_filters, out_filters, use_norm, last_layer=last_layer))
self.pfn_layers = nn.ModuleList(pfn_layers)
# Need pillar (voxel) size and x/y offset in order to calculate pillar offset
self.vx = voxel_size[0]
self.vy = voxel_size[1]
self.x_offset = self.vx / 2 + pc_range[0]
self.y_offset = self.vy / 2 + pc_range[1]
def forward(self, features, num_voxels, coors):
# Find distance of x, y, and z from cluster center
points_mean = features[:, :, :3].sum(dim=1, keepdim=True) / num_voxels.type_as(features).view(-1, 1, 1)
f_cluster = features[:, :, :3] - points_mean
# Find distance of x, y, and z from pillar center
f_center = torch.zeros_like(features[:, :, :2])
f_center[:, :, 0] = features[:, :, 0] - (coors[:, 3].float().unsqueeze(1) * self.vx + self.x_offset)
f_center[:, :, 1] = features[:, :, 1] - (coors[:, 2].float().unsqueeze(1) * self.vy + self.y_offset)
# Combine together feature decorations
features_ls = [features, f_cluster, f_center]
if self._with_distance:
points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)
features_ls.append(points_dist)
features = torch.cat(features_ls, dim=-1)
# The feature decorations were calculated without regard to whether pillar was empty. Need to ensure that
# empty pillars remain set to zeros.
voxel_count = features.shape[1]
mask = get_paddings_indicator(num_voxels, voxel_count, axis=0)
mask = torch.unsqueeze(mask, -1).type_as(features)
features *= mask
# Forward pass through PFNLayers
for pfn in self.pfn_layers:
features = pfn(features)
return features.squeeze()
class PointPillarsScatter(nn.Module):
def __init__(self,
output_shape,
num_input_features=64):
"""
Point Pillar's Scatter.
Converts learned features from dense tensor to sparse pseudo image. This replaces SECOND's
second.pytorch.voxelnet.SparseMiddleExtractor.
:param output_shape: ([int]: 4). Required output shape of features.
:param num_input_features: <int>. Number of input features.
"""
super().__init__()
self.name = 'PointPillarsScatter'
self.output_shape = output_shape
self.ny = output_shape[2]
self.nx = output_shape[3]
self.nchannels = num_input_features
def forward(self, voxel_features, coords, batch_size):
# batch_canvas will be the final output.
batch_canvas = []
for batch_itt in range(batch_size):
# Create the canvas for this sample
canvas = torch.zeros(self.nchannels, self.nx * self.ny, dtype=voxel_features.dtype,
device=voxel_features.device)
# Only include non-empty pillars
batch_mask = coords[:, 0] == batch_itt
this_coords = coords[batch_mask, :]
indices = this_coords[:, 2] * self.nx + this_coords[:, 3]
indices = indices.type(torch.long)
voxels = voxel_features[batch_mask, :]
voxels = voxels.t()
# Now scatter the blob back to the canvas.
canvas[:, indices] = voxels
# Append to a list for later stacking.
batch_canvas.append(canvas)
# Stack to 3-dim tensor (batch-size, nchannels, nrows*ncols)
batch_canvas = torch.stack(batch_canvas, 0)
# Undo the column stacking to final 4-dim tensor
batch_canvas = batch_canvas.view(batch_size, self.nchannels, self.ny, self.nx)
return batch_canvas
PointPillar. Voxelnet.py
import time
from enum import Enum
from functools import reduce
import numpy as np
import sparseconvnet as scn
import torch
from torch import nn
from torch.nn import functional as F
import torchplus
from torchplus import metrics
from torchplus.nn import Empty, GroupNorm, Sequential
from torchplus.ops.array_ops import gather_nd, scatter_nd
from torchplus.tools import change_default_args
from second.pytorch.core import box_torch_ops
from second.pytorch.core.losses import (WeightedSigmoidClassificationLoss,
WeightedSmoothL1LocalizationLoss,
WeightedSoftmaxClassificationLoss)
from second.pytorch.models.pointpillars import PillarFeatureNet, PointPillarsScatter
from second.pytorch.utils import get_paddings_indicator
def _get_pos_neg_loss(cls_loss, labels):
# cls_loss: [N, num_anchors, num_class]
# labels: [N, num_anchors]
batch_size = cls_loss.shape[0]
if cls_loss.shape[-1] == 1 or len(cls_loss.shape) == 2:
cls_pos_loss = (labels > 0).type_as(cls_loss) * cls_loss.view(
batch_size, -1)
cls_neg_loss = (labels == 0).type_as(cls_loss) * cls_loss.view(
batch_size, -1)
cls_pos_loss = cls_pos_loss.sum() / batch_size
cls_neg_loss = cls_neg_loss.sum() / batch_size
else:
cls_pos_loss = cls_loss[..., 1:].sum() / batch_size
cls_neg_loss = cls_loss[..., 0].sum() / batch_size
return cls_pos_loss, cls_neg_loss
class VFELayer(nn.Module):
def __init__(self, in_channels, out_channels, use_norm=True, name='vfe'):
super(VFELayer, self).__init__()
self.name = name
self.units = int(out_channels / 2)
if use_norm:
BatchNorm1d = change_default_args(
eps=1e-3, momentum=0.01)(nn.BatchNorm1d)
Linear = change_default_args(bias=False)(nn.Linear)
else:
BatchNorm1d = Empty
Linear = change_default_args(bias=True)(nn.Linear)
self.linear = Linear(in_channels, self.units)
self.norm = BatchNorm1d(self.units)
def forward(self, inputs):
# [K, T, 7] tensordot [7, units] = [K, T, units]
voxel_count = inputs.shape[1]
x = self.linear(inputs)
x = self.norm(x.permute(0, 2, 1).contiguous()).permute(0, 2,
1).contiguous()
pointwise = F.relu(x)
# [K, T, units]
aggregated = torch.max(pointwise, dim=1, keepdim=True)[0]
# [K, 1, units]
repeated = aggregated.repeat(1, voxel_count, 1)
concatenated = torch.cat([pointwise, repeated], dim=2)
# [K, T, 2 * units]
return concatenated
class VoxelFeatureExtractor(nn.Module):
def __init__(self,
num_input_features=4,
use_norm=True,
num_filters=[32, 128],
with_distance=False,
name='VoxelFeatureExtractor'):
super(VoxelFeatureExtractor, self).__init__()
self.name = name
if use_norm:
BatchNorm1d = change_default_args(
eps=1e-3, momentum=0.01)(nn.BatchNorm1d)
Linear = change_default_args(bias=False)(nn.Linear)
else:
BatchNorm1d = Empty
Linear = change_default_args(bias=True)(nn.Linear)
assert len(num_filters) == 2
num_input_features += 3 # add mean features
if with_distance:
num_input_features += 1
self._with_distance = with_distance
self.vfe1 = VFELayer(num_input_features, num_filters[0], use_norm)
self.vfe2 = VFELayer(num_filters[0], num_filters[1], use_norm)
self.linear = Linear(num_filters[1], num_filters[1])
# var_torch_init(self.linear.weight)
# var_torch_init(self.linear.bias)
self.norm = BatchNorm1d(num_filters[1])
def forward(self, features, num_voxels, coors):
# features: [concated_num_points, num_voxel_size, 3(4)]
# num_voxels: [concated_num_points]
points_mean = features[:, :, :3].sum(
dim=1, keepdim=True) / num_voxels.type_as(features).view(-1, 1, 1)
features_relative = features[:, :, :3] - points_mean
if self._with_distance:
points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)
features = torch.cat(
[features, features_relative, points_dist], dim=-1)
else:
features = torch.cat([features, features_relative], dim=-1)
voxel_count = features.shape[1]
mask = get_paddings_indicator(num_voxels, voxel_count, axis=0)
mask = torch.unsqueeze(mask, -1).type_as(features)
# mask = features.max(dim=2, keepdim=True)[0] != 0
x = self.vfe1(features)
x *= mask
x = self.vfe2(x)
x *= mask
x = self.linear(x)
x = self.norm(x.permute(0, 2, 1).contiguous()).permute(0, 2,
1).contiguous()
x = F.relu(x)
x *= mask
# x: [concated_num_points, num_voxel_size, 128]
voxelwise = torch.max(x, dim=1)[0]
return voxelwise
class VoxelFeatureExtractorV2(nn.Module):
def __init__(self,
num_input_features=4,
use_norm=True,
num_filters=[32, 128],
with_distance=False,
name='VoxelFeatureExtractor'):
super(VoxelFeatureExtractorV2, self).__init__()
self.name = name
if use_norm:
BatchNorm1d = change_default_args(
eps=1e-3, momentum=0.01)(nn.BatchNorm1d)
Linear = change_default_args(bias=False)(nn.Linear)
else:
BatchNorm1d = Empty
Linear = change_default_args(bias=True)(nn.Linear)
assert len(num_filters) > 0
num_input_features += 3
if with_distance:
num_input_features += 1
self._with_distance = with_distance
num_filters = [num_input_features] + num_filters
filters_pairs = [[num_filters[i], num_filters[i + 1]]
for i in range(len(num_filters) - 1)]
self.vfe_layers = nn.ModuleList(
[VFELayer(i, o, use_norm) for i, o in filters_pairs])
self.linear = Linear(num_filters[-1], num_filters[-1])
# var_torch_init(self.linear.weight)
# var_torch_init(self.linear.bias)
self.norm = BatchNorm1d(num_filters[-1])
def forward(self, features, num_voxels, coors):
# features: [concated_num_points, num_voxel_size, 3(4)]
# num_voxels: [concated_num_points]
points_mean = features[:, :, :3].sum(
dim=1, keepdim=True) / num_voxels.type_as(features).view(-1, 1, 1)
features_relative = features[:, :, :3] - points_mean
if self._with_distance:
points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)
features = torch.cat(
[features, features_relative, points_dist], dim=-1)
else:
features = torch.cat([features, features_relative], dim=-1)
voxel_count = features.shape[1]
mask = get_paddings_indicator(num_voxels, voxel_count, axis=0)
mask = torch.unsqueeze(mask, -1).type_as(features)
for vfe in self.vfe_layers:
features = vfe(features)
features *= mask
features = self.linear(features)
features = self.norm(features.permute(0, 2, 1).contiguous()).permute(
0, 2, 1).contiguous()
features = F.relu(features)
features *= mask
# x: [concated_num_points, num_voxel_size, 128]
voxelwise = torch.max(features, dim=1)[0]
return voxelwise
class SparseMiddleExtractor(nn.Module):
def __init__(self,
output_shape,
use_norm=True,
num_input_features=128,
num_filters_down1=[64],
num_filters_down2=[64, 64],
name='SparseMiddleExtractor'):
super(SparseMiddleExtractor, self).__init__()
self.name = name
if use_norm:
BatchNorm1d = change_default_args(
eps=1e-3, momentum=0.01)(nn.BatchNorm1d)
Linear = change_default_args(bias=False)(nn.Linear)
else:
BatchNorm1d = Empty
Linear = change_default_args(bias=True)(nn.Linear)
sparse_shape = np.array(output_shape[1:4]) + [1, 0, 0]
# sparse_shape[0] = 11
print(sparse_shape)
self.scn_input = scn.InputLayer(3, sparse_shape.tolist())
self.voxel_output_shape = output_shape
middle_layers = []
num_filters = [num_input_features] + num_filters_down1
# num_filters = [64] + num_filters_down1
filters_pairs_d1 = [[num_filters[i], num_filters[i + 1]]
for i in range(len(num_filters) - 1)]
for i, o in filters_pairs_d1:
middle_layers.append(scn.SubmanifoldConvolution(3, i, o, 3, False))
middle_layers.append(scn.BatchNormReLU(o, eps=1e-3, momentum=0.99))
middle_layers.append(
scn.Convolution(
3,
num_filters[-1],
num_filters[-1], (3, 1, 1), (2, 1, 1),
bias=False))
middle_layers.append(
scn.BatchNormReLU(num_filters[-1], eps=1e-3, momentum=0.99))
# assert len(num_filters_down2) > 0
if len(num_filters_down1) == 0:
num_filters = [num_filters[-1]] + num_filters_down2
else:
num_filters = [num_filters_down1[-1]] + num_filters_down2
filters_pairs_d2 = [[num_filters[i], num_filters[i + 1]]
for i in range(len(num_filters) - 1)]
for i, o in filters_pairs_d2:
middle_layers.append(scn.SubmanifoldConvolution(3, i, o, 3, False))
middle_layers.append(scn.BatchNormReLU(o, eps=1e-3, momentum=0.99))
middle_layers.append(
scn.Convolution(
3,
num_filters[-1],
num_filters[-1], (3, 1, 1), (2, 1, 1),
bias=False))
middle_layers.append(
scn.BatchNormReLU(num_filters[-1], eps=1e-3, momentum=0.99))
middle_layers.append(scn.SparseToDense(3, num_filters[-1]))
self.middle_conv = Sequential(*middle_layers)
def forward(self, voxel_features, coors, batch_size):
# coors[:, 1] += 1
coors = coors.int()[:, [1, 2, 3, 0]]
ret = self.scn_input((coors.cpu(), voxel_features, batch_size))
ret = self.middle_conv(ret)
N, C, D, H, W = ret.shape
ret = ret.view(N, C * D, H, W)
return ret
class ZeroPad3d(nn.ConstantPad3d):
def __init__(self, padding):
super(ZeroPad3d, self).__init__(padding, 0)
class MiddleExtractor(nn.Module):
def __init__(self,
output_shape,
use_norm=True,
num_input_features=128,
num_filters_down1=[64],
num_filters_down2=[64, 64],
name='MiddleExtractor'):
super(MiddleExtractor, self).__init__()
self.name = name
if use_norm:
BatchNorm3d = change_default_args(
eps=1e-3, momentum=0.01)(nn.BatchNorm3d)
# BatchNorm3d = change_default_args(
# group=32, eps=1e-3, momentum=0.01)(GroupBatchNorm3d)
Conv3d = change_default_args(bias=False)(nn.Conv3d)
else:
BatchNorm3d = Empty
Conv3d = change_default_args(bias=True)(nn.Conv3d)
self.voxel_output_shape = output_shape
self.middle_conv = Sequential(
ZeroPad3d(1),
Conv3d(num_input_features, 64, 3, stride=(2, 1, 1)),
BatchNorm3d(64),
nn.ReLU(),
ZeroPad3d([1, 1, 1, 1, 0, 0]),
Conv3d(64, 64, 3, stride=1),
BatchNorm3d(64),
nn.ReLU(),
ZeroPad3d(1),
Conv3d(64, 64, 3, stride=(2, 1, 1)),
BatchNorm3d(64),
nn.ReLU(),
)
def forward(self, voxel_features, coors, batch_size):
output_shape = [batch_size] + self.voxel_output_shape[1:]
ret = scatter_nd(coors.long(), voxel_features, output_shape)
# print('scatter_nd fw:', time.time() - t)
ret = ret.permute(0, 4, 1, 2, 3)
ret = self.middle_conv(ret)
N, C, D, H, W = ret.shape
ret = ret.view(N, C * D, H, W)
return ret
#RPN忽略,我不需要
class LossNormType(Enum):
NormByNumPositives = "norm_by_num_positives"
NormByNumExamples = "norm_by_num_examples"
NormByNumPosNeg = "norm_by_num_pos_neg"
VoxelNet
voxelnet.group_pointcloud
class VFELayer(object):
def __init__(self, out_channels, name):
super(VFELayer, self).__init__()
self.units = int(out_channels / 2)
with tf.variable_scope(name, reuse=tf.AUTO_REUSE) as scope:
self.dense = tf.layers.Dense(
self.units, tf.nn.relu, name='dense', _reuse=tf.AUTO_REUSE, _scope=scope)
self.batch_norm = tf.layers.BatchNormalization(
name='batch_norm', fused=True, _reuse=tf.AUTO_REUSE, _scope=scope)
def apply(self, inputs, mask, training):
# [K, T, 7] tensordot [7, units] = [K, T, units]
pointwise = self.batch_norm.apply(self.dense.apply(inputs), training)
#n [K, 1, units]
aggregated = tf.reduce_max(pointwise, axis=1, keep_dims=True)
# [K, T, units]
repeated = tf.tile(aggregated, [1, cfg.VOXEL_POINT_COUNT, 1])
# [K, T, 2 * units]
concatenated = tf.concat([pointwise, repeated], axis=2)
mask = tf.tile(mask, [1, 1, 2 * self.units])
concatenated = tf.multiply(concatenated, tf.cast(mask, tf.float32))
return concatenated
class FeatureNet(object):
def __init__(self, training, batch_size, name=''):
super(FeatureNet, self).__init__()
self.training = training
# scalar
self.batch_size = batch_size
# [ΣK, 35/45, 7]
self.feature = tf.placeholder(
tf.float32, [None, cfg.VOXEL_POINT_COUNT, 7], name='feature')
# [ΣK]
self.number = tf.placeholder(tf.int64, [None], name='number')
# [ΣK, 4], each row stores (batch, d, h, w)
self.coordinate = tf.placeholder(
tf.int64, [None, 4], name='coordinate')
with tf.variable_scope(name, reuse=tf.AUTO_REUSE) as scope:
self.vfe1 = VFELayer(32, 'VFE-1')
self.vfe2 = VFELayer(128, 'VFE-2')
# boolean mask [K, T, 2 * units]
mask = tf.not_equal(tf.reduce_max(
self.feature, axis=2, keep_dims=True), 0)
x = self.vfe1.apply(self.feature, mask, self.training)
x = self.vfe2.apply(x, mask, self.training)
# [ΣK, 128]
voxelwise = tf.reduce_max(x, axis=1)
# car: [N * 10 * 400 * 352 * 128]
# pedestrian/cyclist: [N * 10 * 200 * 240 * 128]
self.outputs = tf.scatter_nd(
self.coordinate, voxelwise, [self.batch_size, 10, cfg.INPUT_HEIGHT, cfg.INPUT_WIDTH, 128])