阅读PVRCNN代码位例,笔记记录:openpcdet如何准备数据、加载数据。了解点云输入准备以便后续对点云的各种修改
大概流程图如下:
train.py中的代码:
train_set, train_loader, train_sampler = build_dataloader(
dataset_cfg=cfg.DATA_CONFIG,
class_names=cfg.CLASS_NAMES,
batch_size=args.batch_size,
dist=dist_train, workers=args.workers,
logger=logger,
training=True,
merge_all_iters_to_one_epoch=args.merge_all_iters_to_one_epoch,
total_epochs=args.epochs
)
利用build_dataloader 将制作数据集和DataLoader加载数据的方法封装。
def build_dataloader(dataset_cfg, class_names, batch_size, dist, root_path=None, workers=4,
logger=None, training=True, merge_all_iters_to_one_epoch=False, total_epochs=0):
# 制作数据
dataset = __all__[dataset_cfg.DATASET](
dataset_cfg=dataset_cfg,
class_names=class_names,
root_path=root_path,
training=training,
logger=logger,
)
if merge_all_iters_to_one_epoch:
assert hasattr(dataset, 'merge_all_iters_to_one_epoch')
dataset.merge_all_iters_to_one_epoch(merge=True, epochs=total_epochs)
if dist:
if training:
sampler = torch.utils.data.distributed.DistributedSampler(dataset)
else:
rank, world_size = common_utils.get_dist_info()
sampler = DistributedSampler(dataset, world_size, rank, shuffle=False)
else:
sampler = None
# 加载数据
dataloader = DataLoader(
dataset, batch_size=batch_size, pin_memory=True, num_workers=workers,
shuffle=(sampler is None) and training, collate_fn=dataset.collate_batch,
drop_last=False, sampler=sampler, timeout=0
)
return dataset, dataloader, sampler
这里主要关注制作数据的部分(通过kitti_dataset.py实例化数据集)
这里kitti_dataset继承于DatasetTemplate,而DatasetTemplate继承于torch_data.Dataset,torch_data.Dataset抽象于多个数据集的共同部分,kitti_dataset和nuscenes_dataset等继承并实例化。
DatasetTemplate:
需要重写的几个核心的方法:
-
init
-
len(子类实现)
-
getitem(子类实现)
init中:
初始各类参数以及三个方法:PointFeatureEncoder、DataAugmentor、DataProcessor(在getitem中调用forword)
-
PointFeatureEncoder(点云特征编码):根据配置文件(kitti_dataset.yaml文件)实例化
-
DataAugmentor(数据增强):同上
-
DataProcessor(数据处理):同上
DataProcessor中的各个函数:
# 利用工厂模式对实现方法调用
for cur_cfg in processor_configs:
cur_processor = getattr(self, cur_cfg.NAME)(config=cur_cfg)
# 在forward函数中调用
self.data_processor_queue.append(cur_processor)
mask_points_and_boxes_outside_range(移除点云范围外的点point_cloud_range)
transform_points_to_voxels(点云转为体像素)
sample_points(采样点云,多退少补)
calculate_grid_size(计算网格范围)
downsample_depth_map(降采样网格范围)
kitti_dataset(继承于datatemplate):
需要重写的几个核心的方法:
-
init
-
len
-
getitem
init:根据split选择是训练集下的数据还是测试下的数据
self.split = self.dataset_cfg.DATA_SPLIT[self.mode]
self.root_split_path = self.root_path / ('training' if self.split != 'test' else 'testing')
split_dir = self.root_path / 'ImageSets' / (self.split + '.txt')
self.sample_id_list = [x.strip() for x in open(split_dir).readlines()] if split_dir.exists() else None
self.kitti_infos = []
self.include_kitti_data(self.mode)# 加载kitti数据.txt(记录点云数据保存样本序列号),根据pkl文件
-
kitti_infos每个记录数据:点云文件名、对应图像名、内参数calib、注释annos
pkl文件解释:mmdetection3d kitti (持续更新)_kitti_infos_train.pkl_Coding的叶子的博客-CSDN博客
Dataloader加载数据时通过len 计算加载数据的长度
训练时train_one_epoch会加载getitem进行迭代数据训练:
def __getitem__(self, index):
# index = 4
if self._merge_all_iters_to_one_epoch:
index = index % len(self.kitti_infos)
#kitti_info 来自init中加载数据的列表文件
info = copy.deepcopy(self.kitti_infos[index])
#加载数据点云帧数
sample_idx = info['point_cloud']['lidar_idx']
#加载图片大小
img_shape = info['image']['image_shape']
# 加载标注信息
calib = self.get_calib(sample_idx)
# point只加载点云的gt——box信息
get_item_list = self.dataset_cfg.get('GET_ITEM_LIST', ['points'])
# 输入数据的字典包含帧id和标定信息
input_dict = {
'frame_id': sample_idx,
'calib': calib,
}
if 'annos' in info:
annos = info['annos']
# 去掉dontcare包含的所有信息
annos = common_utils.drop_info_with_name(annos, name='DontCare')
# 获取标注信息的xyz,lhw,旋转角
loc, dims, rots = annos['location'], annos['dimensions'], annos['rotation_y']
# 标注的类别
gt_names = annos['name']
# 相机视角下的位置以及高度等boundingbox
gt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]], axis=1).astype(np.float32)
# 雷达视角下的位置以及高度等boundingbox
gt_boxes_lidar = box_utils.boxes3d_kitti_camera_to_lidar(gt_boxes_camera, calib)
# 更新输入的特征
input_dict.update({
'gt_names': gt_names,
'gt_boxes': gt_boxes_lidar
})
# 没包含图像的标注信息
if "gt_boxes2d" in get_item_list:
input_dict['gt_boxes2d'] = annos["bbox"]
# 没有道路信息不执行(kitii数据中没有包含道路信息(输入特征中))
road_plane = self.get_road_plane(sample_idx)
if road_plane is not None:
input_dict['road_plane'] = road_plane
if "points" in get_item_list:
# 加载点云数据
points = self.get_lidar(sample_idx)
if self.dataset_cfg.FOV_POINTS_ONLY:
# 将点云从激光雷达坐标系转换到相机坐标系,然后投影到图像平面上,是为了进行目标检测和定位。pts_rect = np.dot(pts_lidar_hom, np.dot(self.V2C.T, self.R0.T))
pts_rect = calib.lidar_to_rect(points[:, 0:3])
# 用来计算点云中哪些点在相机视野内的函数 点在相机视野内的点返回true
fov_flag = self.get_fov_flag(pts_rect, img_shape, calib)
points = points[fov_flag]
# 添加点云可以映射在相机平面内的点
input_dict['points'] = points
# 一下三个条件语句在PVRCNN 没有执行
if "images" in get_item_list:
input_dict['images'] = self.get_image(sample_idx)
if "depth_maps" in get_item_list:
input_dict['depth_maps'] = self.get_depth_map(sample_idx)
if "calib_matricies" in get_item_list:
input_dict["trans_lidar_to_cam"], input_dict["trans_cam_to_img"] = kitti_utils.calib_to_matricies(calib)
# 添加calib标定信息
input_dict['calib'] = calib
# 准备输入数据的函数
data_dict = self.prepare_data(data_dict=input_dict)
# 添加图片形状大小
data_dict['image_shape'] = img_shape
return data_dict
其中比较重要的:data_dict = self.prepare_data(data_dict=input_dict)(准备输入数据)
def prepare_data(self, data_dict):
"""
Args:
data_dict:
points: optional, (N, 3 + C_in)
gt_boxes: optional, (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...]
gt_names: optional, (N), string
...
Returns:
data_dict:
frame_id: string
points: (N, 3 + C_in)
gt_boxes: optional, (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...]
gt_names: optional, (N), string
use_lead_xyz: bool
voxels: optional (num_voxels, max_points_per_voxel, 3 + C)
voxel_coords: optional (num_voxels, 3)
voxel_num_points: optional (num_voxels)
...
"""
if self.training:
assert 'gt_boxes' in data_dict, 'gt_boxes should be provided for training'
gt_boxes_mask = np.array([n in self.class_names for n in data_dict['gt_names']], dtype=np.bool_)
calib = data_dict['calib']
points_before_aug = copy.deepcopy(data_dict['points'])
#记录数据增强前的点
data_dict['points_before_aug'] = points_before_aug
# 对gt——boundingbox存在于待检测类进行数据增强的操作
data_dict = self.data_augmentor.forward(
data_dict={
**data_dict,
'gt_boxes_mask': gt_boxes_mask
}
)
data_dict['calib'] = calib
points_after_aug = copy.deepcopy(data_dict['points'])
points_before_aug = data_dict['points_before_aug']
if data_dict.get('gt_boxes', None) is not None:
selected = common_utils.keep_arrays_by_name(data_dict['gt_names'], self.class_names)
data_dict['gt_boxes'] = data_dict['gt_boxes'][selected]
data_dict['gt_names'] = data_dict['gt_names'][selected]
gt_classes = np.array([self.class_names.index(n) + 1 for n in data_dict['gt_names']], dtype=np.int32)
gt_boxes = np.concatenate((data_dict['gt_boxes'], gt_classes.reshape(-1, 1).astype(np.float32)), axis=1)
data_dict['gt_boxes'] = gt_boxes
if data_dict.get('gt_boxes2d', None) is not None:
data_dict['gt_boxes2d'] = data_dict['gt_boxes2d'][selected]
# 使用点的哪些属性 比如x,y,z等
if data_dict.get('points', None) is not None:
data_dict = self.point_feature_encoder.forward(data_dict)
# 对点云进行预处理,包括移除超出point_cloud_range的点、 打乱点的顺序以及将点云转换为voxel
data_dict = self.data_processor.forward(
data_dict=data_dict
)
if self.training and len(data_dict['gt_boxes']) == 0:
new_index = np.random.randint(self.__len__())
return self.__getitem__(new_index)
data_dict.pop('gt_names', None)
return data_dict