使用labelCloud打标签
安装与使用(参考安装与使用)
git clone https://github.com/ch-sa/labelCloud.git
conda create -n label3.8 python=3.8 -y
conda activate label3.8
pip install -r requirements.txt
python labelCloud.py
1.点击Span Bounding Box开始标注
2.box微调控制台,1标注完用它调整位置和大小
3.标注信息的显示
OpenPCDet数据准备与源码阅读
data文件
1.ImageSets:存放数据索引号
2.label_2:存放标签文件
3.数据集中的点云
注意文件名要对应,标签文件勿改动
数据集预处理,生成数据.pkl
python3 -m pcdet.datasets.custom.custom_dataset create_custom_infos tools/cfgs/dataset_configs/custom_dataset.yaml
下面阅读custom_dataset.py的主要部分(不想了解代码的请跳过)
if __name__=='__main__':
import sys
if sys.argv.__len__() > 1 and sys.argv[1] == 'create_custom_infos':
import yaml
from pathlib import Path
from easydict import EasyDict
dataset_cfg = EasyDict(yaml.safe_load(open(sys.argv[2])))
ROOT_DIR = (Path(__file__).resolve().parent / '../../../').resolve()
create_custom_infos(
dataset_cfg=dataset_cfg,
class_names=['Car', 'Pedestrian', 'Cyclist'],
data_path=ROOT_DIR / 'data' / 'custom',
save_path=ROOT_DIR / 'data' / 'custom'
)
主要执行 create_custom_infos()
def create_custom_infos(dataset_cfg, class_names, data_path, save_path, workers=4):
#实例化CustomDataset类对象dataset
dataset = CustomDataset(dataset_cfg=dataset_cfg, class_names=class_names, root_path=data_path, training=False)
train_split, val_split = 'train', 'val'
# No evaluation
train_filename = save_path / ('custom_infos_%s.pkl' % train_split)
val_filenmae = save_path / ('custom_infos%s.pkl' % val_split)
trainval_filename = save_path / 'custom_infos_trainval.pkl'
test_filename = save_path / 'custom_infos_test.pkl'
print('------------------------Start to generate data infos------------------------')
dataset.set_split(train_split)
custom_infos_train = dataset.get_infos(num_workers=workers, has_label=True, count_inside_pts=True)
with open(train_filename, 'wb') as f:
pickle.dump(custom_infos_train, f)
print('Custom info train file is save to %s' % train_filename)
dataset.set_split('test')
custom_infos_test = dataset.get_infos(num_workers=workers, has_label=False, count_inside_pts=False)
with open(test_filename, 'wb') as f:
pickle.dump(custom_infos_test, f)
print('Custom info test file is saved to %s' % test_filename)
print('------------------------Start create groundtruth database for data augmentation------------------------')
dataset.set_split(train_split)
# Input the 'custom_train_info.pkl' to generate gt_database
dataset.create_groundtruth_database(train_filename, split=train_split)
print('------------------------Data preparation done------------------------')
实例化CustomDataset创建对象dataset
dataset = CustomDataset(dataset_cfg=dataset_cfg, class_names=class_names, root_path=data_path, training=False)
给idataset的属性sample_id_list赋值,sample_id_list是一个存放了所有数据名字的列表
dataset.set_split(train_split)
def set_split(self, split):
super().__init__(
dataset_cfg=self.dataset_cfg, class_names=self.class_names, training=self.training, root_path=self.root_path, logger=self.logger
)
self.split = split
self.root_split_path = self.root_path / ('training' if self.split != 'test' else 'testing')
#这段代码是用于获取样本索引列表 sample_id_list 的值
split_dir = self.root_path / 'ImageSets' / (self.split + '.txt')#split_dir= /home/kin/workspace/OpenPCDet/data/custom/ImageSets/train.txt
print("split_dir=",split_dir)
self.sample_id_list = [x.strip() for x in open(split_dir).readlines()] if split_dir.exists() else None
split_dir是train.txt的绝对路径,如split_dir= /home/kin/workspace/OpenPCDet/data/custom/ImageSets/train.txt
open(split_dir).readlines() 返回一个list,每个元素是一行的字符串,如[‘000000’,‘000001’,‘000002’,‘000003’,‘000004’,‘000005’,‘000006’,‘000007’]
x.strip()方法常用于清除字符串中的不必要的空白字符
get_infos()
def get_infos(self, num_workers=4, has_label=True, count_inside_pts=True, sample_id_list=None):
import concurrent.futures as futures
# Process single scene
def process_single_scene(sample_idx):...
sample_id_list = sample_id_list if sample_id_list is not None else self.sample_id_list
# create a thread pool to improve the velocity
with futures.ThreadPoolExecutor(num_workers) as executor:
infos = executor.map(process_single_scene, sample_id_list)
# infos is a list that each element represents per frame
return list(infos)
self.sample_id_list值赋给变量sample_id_list
infos = executor.map(process_single_scene, sample_id_list)
executor.map(function, iterable)executor.map() 方法会按顺序迭代 iterable 中的每个元素,并将每个元素作为参数传递给 function 进行处理
将读到train.txt里的数据集标号挨个给到process_single_scene()执行一次
def process_single_scene(sample_idx):
print('%s sample_idx: %s' % (self.split, sample_idx))
# define an empty dict
info = {}
# pts infos: dimention and idx
pc_info = {'num_features': 4, 'lidar_idx': sample_idx}
# add to pts infos
info['point_cloud'] = pc_info
# no images, calibs are need to transform the labels
type_to_id = {'Car': 1, 'Pedestrian': 2, 'Cyclist': 3}
if has_label:
# read labels to build object list according to idx
obj_list = self.get_label(sample_idx)
# build an empty annotations dict
annotations = {}
# add to annotations ==> refer to 'object3d_custom' (no truncated,occluded,alpha,bbox)
annotations['name'] = np.array([obj.cls_type for obj in obj_list]) # 1-dimension
# hwl(camera) format 2-dimension: The kitti-labels are in camera-coord
# h,w,l -> 0.21,0.22,0.33 (see object3d_custom.py h=label[8], w=label[9], l=label[10])
annotations['dimensions'] = np.array([[obj.l, obj.h, obj.w] for obj in obj_list])
annotations['location'] = np.concatenate([obj.loc.reshape(1,3) for obj in obj_list])
annotations['rotation_y'] = np.array([obj.ry for obj in obj_list]) # 1-dimension
num_objects = len([obj.cls_type for obj in obj_list if obj.cls_type != 'DontCare'])
num_gt = len(annotations['name'])
index = list(range(num_objects)) + [-1] * (num_gt - num_objects)
annotations['index'] = np.array(index, dtype=np.int32)
loc = annotations['location'][:num_objects]
dims = annotations['dimensions'][:num_objects]
rots = annotations['rotation_y'][:num_objects]
# camera -> lidar: The points of custom_dataset are already in lidar-coord
# But the labels are in camera-coord and need to transform
# loc_lidar = self.get_calib(loc)#由于使用labelCloud标注的格式是Untransformed,这里不用转换
loc_lidar = loc
l, h, w = dims[:, 0:1], dims[:, 1:2], dims[:, 2:3]
# bottom center -> object center: no need for loc_lidar[:, 2] += h[:, 0] / 2
# print("sample_idx: ", sample_idx, "loc: ", loc, "loc_lidar: " , sample_idx, loc_lidar)
# get gt_boxes_lidar see https://zhuanlan.zhihu.com/p/152120636
gt_boxes_lidar = np.concatenate([loc_lidar, l, w, h, (np.pi / 2 - rots[..., np.newaxis])], axis=1) # 2-dimension array
annotations['gt_boxes_lidar'] = gt_boxes_lidar
# add annotation info
info['annos'] = annotations
return info
获取对应索引标签文件的路径传给get_objects_from_label()
def get_label(self, idx):
# get labels
label_file = self.root_split_path / 'label_2' / ('%s.txt' % idx)
print("label_file=",label_file)
assert label_file.exists()
return object3d_custom.get_objects_from_label(label_file)
打开标签文件,把标签文件的每一行作为一个字符串保存在lines[],便利lines[]每一个元素,传入Object3d类中实例化,在Object3d类中将标签文件的每一行元素中的每个元素解析出来作为Object3d的属性,最后便利完成生成一个objects列表,其中包含每个object的所有信息
object3d_custom.py
def get_objects_from_label(label_file):
with open(label_file, 'r') as f:
lines = f.readlines() #列表,每个元素是label.txt的一行元素再加一个换行符
print("lines=",lines)
objects = [Object3d(line) for line in lines]
return objects # 返回一个标定文件中所有目标的列表
class Object3d(object):
def __init__(self, line):
label = line.strip().split(' ') #strip()方法去除字符串两端的空格和换行符,split(' ')方法将字符串按空格进行分割,返回一个由分割后的子字符串组成的列表
self.src = line
self.cls_type = label[0]
self.cls_id = cls_type_to_id(self.cls_type)
# print("RRRRRRR LABEL=",label)
self.truncation = float(label[1])
self.occlusion = float(label[2]) # 0:fully visible 1:partly occluded 2:largely occluded 3:unknown
self.alpha = float(label[3])
self.box2d = np.array((float(label[4]), float(label[5]), float(label[6]), float(label[7])), dtype=np.float32)
self.h = float(label[8])
self.w = float(label[9])
self.l = float(label[10])
self.loc = np.array((float(label[11]), float(label[12]), float(label[13])), dtype=np.float32)
self.dis_to_cam = np.linalg.norm(self.loc)
self.ry = float(label[14])
self.score = float(label[15]) if label.__len__() == 16 else -1.0
self.level_str = None
self.level = self.get_custom_obj_level()
annotations{}通过遍历objects中的元素给对应的键赋值,形成最终的可使用的数据集合
以上分析了代码中的train的pkl生成过程,test,val原理相同,都是通过不同参数调用get_infos()形成,有了pkl,openpcdet框架就可以方便的调用数据进行训练了。写的有些乱,欢迎道友们一起沟通交流,未完待续
源码来自:
1.https://github.com/open-mmlab/OpenPCDet
2.https://github.com/OrangeSodahub/CRLFnet/blob/master/src/site_model/src/LidCamFusion/OpenPCDet/README.md(自定义纯点云数据集构建)