3D目标检测实用技巧（三）- 生成虚拟点

空持千百偈，不如吃茶去

已于 2024-12-25 18:19:43 修改

阅读量1k

点赞数 25

分类专栏： 3D目标检测实用技巧文章标签：目标检测人工智能计算机视觉

于 2024-04-16 18:32:46 首次发布

本文链接：https://blog.csdn.net/weixin_44013732/article/details/137834593

版权

3D目标检测实用技巧专栏收录该内容

4 篇文章

订阅专栏

一、引言

本次参考的是VirConv生成虚拟点的方法：

VirConv/tools/PENet at master · hailanyi/VirConv · GitHubVirtual Sparse Convolution for Multimodal 3D Object Detection - VirConv/tools/PENet at master · hailanyi/VirConvhttps://github.com/hailanyi/VirConv/tree/master/tools/PENet里面用到了一些小技巧，直接绕开评估，直接使用预训练的权重生成虚拟点。本文主要介绍如何使用各种各样的深度补全方法生成虚拟点，辅助3D目标检测：

二、更改推理py文件

不同深度补全更改代码的方式是不同，有些甚至非常难改，可惜不能一点通。整体套路都是一样的，首先我们需要知道的一点就是，如果我们仅生成虚拟点，那么gt标签，评估函数什么的都不重要了，想去掉就去掉。因此，我们最主要的就是如何读取我们自己的数据集以及如何准确的实现深度图到3D LiDAR坐标系的投影。

下面我们以当前SOTA LRRU为例，具体操作如下：

1、重写Dataloader

我们需要自己写一个读取KITTI 3D目标检测数据集的DataLoader，从而替换掉原来读取KITTI DC的DataLoader。主要更改的是读取方式，比如calib的读取，lidar的读取，以及图像的读取。（也不是说他的不好，主要是封装的太死了，我直接改用我写好的。）

调用：

        if self.arg.dataset == 'kitti': # 我们新加的my_loader
            rgb, dep = self.my_loader[index]
            gt = None
            K = None
            paths = None
        else:  # 原来的读取方法
            dep, gt, K, rgb, paths = self.__getraw__(index)

下面是我参考VirConv的方法改好的完整代码：

class KittiDepthSemi(data.Dataset):
    """A data loader for the Kitti dataset
    """

    def __init__(self, split, args, arg):
        self.args = args
        self.split = split
        self.arg = arg
        if arg.dataset == 'kitti-dc':
            self.paths = get_kittipaths(split, args)
        elif arg.dataset == 'kitti':
            self.my_loader = MyLoader(arg.detpath)
        self.transforms = kittitransforms
        self.ipfill = fill_in_fast

    def __getraw__(self, index):
        dep = read_depth(self.paths['dep'][index]) if \
            (self.paths['dep'][index] is not None) else None
        gt = read_depth(self.paths['gt'][index]) if \
            self.paths['gt'][index] is not None else None
        rgb = read_rgb(self.paths['rgb'][index]) if \
            (self.paths['rgb'][index] is not None) else None

        if self.paths['K'][index] is not None:
            if self.split == 'train' or (self.split == 'val' and self.args.val == 'full'):
                calib = read_calib_file(self.paths['K'][index])
                K_cam = None
                if 'image_02' in self.paths['rgb'][index]:
                    K_cam = np.reshape(calib['P_rect_02'], (3, 4))
                elif 'image_03' in self.paths['rgb'][index]:
                    K_cam = np.reshape(calib['P_rect_03'], (3, 4))
                K = [K_cam[0, 0], K_cam[1, 1], K_cam[0, 2], K_cam[1, 2]]
            else:
                f_calib = open(self.paths['K'][index], 'r')
                K_cam = f_calib.readline().split(' ')
                f_calib.close()
                K = [float(K_cam[0]), float(K_cam[4]), float(K_cam[2]),
                     float(K_cam[5])]
        else:
            K = None
        return dep, gt, K, rgb, self.paths['dep'][index]

    def __getitem__(self, index):
        if self.arg.dataset == 'kitti':
            rgb, dep = self.my_loader[index]
            gt = None
            K = None
            paths = None
        else:
            dep, gt, K, rgb, paths = self.__getraw__(index)

        dep, gt, K, rgb = self.transforms(self.split, self.args, dep, gt, K, rgb)

        dep_np = dep.numpy().squeeze(0)
        dep_clear, _ = outlier_removal(dep_np)
        dep_clear = np.expand_dims(dep_clear, 0)
        dep_clear_torch = torch.from_numpy(dep_clear)

        # ip_basic fill
        dep_np = dep.numpy().squeeze(0)
        dep_np_ip = np.copy(dep_np)

        dep_ip = self.ipfill(dep_np_ip, max_depth=100.0,
                             extrapolate=True, blur_type='gaussian')

        dep_ip_torch = torch.from_numpy(dep_ip)
        dep_ip_torch = dep_ip_torch.to(dtype=torch.float32)
        # gt = dep_clear_torch if gt==None else gt
        candidates = {'dep': dep, 'dep_clear': dep_clear_torch, 'gt': gt, 'rgb': rgb, 'ip': dep_ip_torch}
        items = {
            key: val
            for key, val in candidates.items() if val is not None
        }
        if (self.args.debug_dp or self.args.test) and self.arg.dataset =='kitti-dc':
            items['d_path'] = paths

        return items

    def __len__(self):
        if self.args.toy_test:
            return self.args.toy_test_number
        else:
            try:
                return len(self.paths['gt'])
            except:
                return len(self.my_loader)

MyLoader：

class MyLoader():
    def __init__(self, root_path=''):
        self.root_path = root_path
        self.file_list = self.include_all_files()

    def include_all_files(self):
        velo_path = os.path.join(self.root_path, 'velodyne')
        all_files = os.listdir(velo_path)
        all_files.sort()

        all_files = [x[0:6] for x in all_files]

        return all_files

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, item):
        file_idx = self.file_list[item]
        # 读取kitti的基本信息
        file_image_path = os.path.join(self.root_path, 'image_2', file_idx + '.png')
        file_velo_path = os.path.join(self.root_path, 'velodyne', file_idx + '.bin')
        file_calib = os.path.join(self.root_path, 'calib', file_idx + '.txt')
        # 返回的是个Calibration的对象：初始化了P2，P3，Tr_velo2cam，R0
        calib = calibration_kitti.Calibration(file_calib)
        # 读取点云和img
        points = np.fromfile(str(file_velo_path), dtype=np.float32).reshape(-1, 4)
        # points = layer_point_discard(points, 0.9)
        image = np.array(io.imread(file_image_path), dtype=np.int32)
        image = image[:352, :1216]

        # 这里将点云
        rgb, depth = load_depth_input(calib, image, points)

2、投影并保存为虚拟点

这一块其实挺简单的，我们只需要找到预测出来深度图的位置，一般都挺明显的，例如：

outputf = net(samplef)

里面保存着预测好的深度图，然后我们需要根据刚刚读取好的calib将深度图投影到3D空间中：

def save_depth_as_points(depth, idx, root_path, args):
    file_idx = str(idx).zfill(6)
    # 保存成png，同时也保存成bin文件
    file_image_path = os.path.join(root_path, 'image_2', file_idx + '.png')
    file_velo_path = os.path.join(root_path, 'velodyne', file_idx + '.bin')
    file_calib = os.path.join(root_path, 'calib', file_idx + '.txt')

    calib = calibration_kitti.Calibration(file_calib)
    # ############################改动1：最最最重要！！！！！！calib修正####################################
    # 这里calib不修正的话，会导致最后投影直接出问题。
    calib.cv -= args.top_crop
    # 读取lidar文件
    lidar = np.fromfile(str(file_velo_path), dtype=np.float32).reshape(-1, 4)
    # 读取图像信息
    image = np.array(io.imread(file_image_path), dtype=np.int32)
    # ###############################2、改动2：rgb这里要改成252#################################
    # 这里千万千万不要改，如果把这个改成100:352，或者是:252就会出现img投影和depth投影不能完全重合的情况，具体看qq发的效果图
    image = image[:352, :1216]
    # #######################保存rgb####################
    # paths_rgb = os.path.join(root_path, 'velodyne_depth')
    # out_path_img = os.path.join(paths_rgb, file_idx + '_rgb.png')
    # cv2.imwrite(out_path_img, image)

    # 将lidar数据转换到相机
    pts_rect = calib.lidar_to_rect(lidar[:, 0:3])
    # 截取FOV视角的lidar点云
    fov_flag = get_fov_flag(pts_rect, image.shape, calib)
    lidar = lidar[fov_flag]
    # lidar = layer_point_discard(lidar, 0.7)
    # liar深度保存位置
    paths = os.path.join(root_path, 'velodyne_depth_lrru')
    if not os.path.exists(paths):
        os.makedirs(paths)
    # 文件格式
    out_path = os.path.join(paths, file_idx + '.npy')
    # ##############################2、改动2：这里也要改成252#################################333
    depth = depth.squeeze(0).permute(1,2,0).cpu().detach().numpy()
    # # ###############depth_img保存初始深度图####################
    # root_path = '/media/xd/7fffee8d-7279-4e6a-80bf-7880ff37da7a/xyy/LRRU/vision/corrupted_image/'
    # if not os.path.exists(root_path):
    #     os.makedirs(root_path)
    # total_path = root_path + 'pre_init' + str(idx) + '.jpg'
    # cv2.imwrite(total_path, depth)
    #
    # out_path_img = os.path.join(paths, file_idx + '.png')
    # img = np.squeeze(depth)
    # img = (img * 256).astype('uint16')
    # cv2.imwrite(out_path_img, img)

    # ######################保存最终合并gt信息的深度补全数据point#####################

    # calib.cv -= args.top_crop
    # calib.cu -= args.top_crop
    # calib.fv -= args.top_crop
    # 将深度图转换成voxel
    final_points, new_lidar, new_p = depth2pointsrgbp(depth, image, calib, lidar)
    # 保存成float16类型
    final_points = final_points.astype(np.float16)
    new_lidar = new_lidar.astype(np.float16)
    # vision(final_points)
    # vision(new_lidar)
    # 保存成npy
    np.save(out_path, final_points)
    return len(final_points)

最后可视化的结果如下：