ScanNet/VKITTI2/KITTI/nuScenes 数据集坐标变换的简单记录

Promethe_us

已于 2023-10-16 23:55:08 修改

阅读量985

点赞数 7

文章标签：人工智能 python 深度学习

于 2023-07-19 00:40:21 首次发布

本文链接：https://blog.csdn.net/weixin_45696231/article/details/131799130

版权

最近在和坐标变换斗争，不同数据集的位姿给的也是千奇百怪，我列写了我用到的三个数据集以及相应的变换方式，以便自己日后查找。(最终检验是否正确以累计点云为准)

文中用到的所有代码，以及四个数据集我都打包上传到了：
链接：https://pan.baidu.com/s/1LARmVn1kMHNl-1CXSyB8HA 提取码：2023

欢迎下载~🙂

一、ScanNet

这个ScanNet数据我是从NICE-SLAM的链接下载的: https://github.com/cvg/nice-slam

ScanNet的数据集格式:

└─frames
    ├─color
    ├─depth
    ├─intrinsic
    └─pose/0.txt

在这里插入图片描述

坐标系介绍: (下面的都是这个坐标系)

在这里插入图片描述

1.1 完整的坐标变换代码

代码tf_scannet.py:

# ⭐tf_scannet.py
import numpy as np
import cv2
from scipy.spatial.transform import Rotation as R

# hyperparameters ----------------------------------
ci, cj, fi, fj = 242.6836, 318.9054, 578.729797, 577.59

SCENE_DIR = 'D:/DESKTOP/0716/Demo/frames/'
# --------------------------------------------------

def get_rgbd(idx):
    rgb = cv2.cvtColor(cv2.imread(SCENE_DIR + '/color/' + str(idx) + '.jpg'), cv2.COLOR_RGB2BGR)
    depth = cv2.imread(SCENE_DIR + '/depth/' + str(idx) + '.png', cv2.IMREAD_UNCHANGED) / 1000.0
    return rgb, depth


def get_transform(idx):
    transform = np.loadtxt(SCENE_DIR + '/pose/{}.txt'.format(idx))
    return transform

def transform(x, source, target, idx=0):

    if source == "depth" and target == "cam":
        ci, cj, fi, fj = 242.6836, 318.9054, 578.729797, 577.59

        # x.shape = (H, W)
        i, j = np.nonzero(x)
        k = x[i, j]

        '''
        cam_kitti_y|   cam_kitti_z
                   |  /
                   | /
                   |/
                   ----------- cam_kitti_x
        '''
        cam_kitti_x = (j - cj)/fj * k
        cam_kitti_y = -(i - ci)/fi * k
        cam_kitti_z = k

        '''       1
             2    |   
              \   |  
               \  | 
                \ |
                  --------- 0
        '''
        pc_cam = np.stack([cam_kitti_x, cam_kitti_y, cam_kitti_z]).T
        y = pc_cam

    # need idx
    elif source == "cam" and target == "world":
        # x.shape = (N, 3)
        transpose = get_transform(idx)
        transpose[:3, 1] *= -1
        transpose[:3, 2] *= -1   
        pc_world = np.matmul(transpose[:3, :3], pc_cam.T).T + transpose[:3, -1]
        y = pc_world


    elif source == "depth" and target == "world":
        y = transform(x, 'depth', 'cam')
        y = transform(y, 'cam', 'world', idx)

    return y

累计点云:

import numpy as np
import cv2
import open3d as o3d

from tf_scannet import get_rgbd, transform, get_transform

def get_single_pc_cam(idx):
    rgb, depth = get_rgbd(idx)
    rgb = cv2.resize(rgb, (640, 480))
    pc_cam = transform(depth, "depth", "cam")
    pc_cam[:, 2] *= -1
    colors = rgb[depth>0, :]
    pc_cam_o3d = o3d.geometry.PointCloud()
    pc_cam_o3d.points = o3d.utility.Vector3dVector(pc_cam)
    pc_cam_o3d.colors = o3d.utility.Vector3dVector(colors/255.0)
    return pc_cam_o3d


def pc_cam_to_world(pc_cam, idx):
    pc_world = transform(pc_cam, "cam", "world", idx)
    return pc_world    



pc_accumulate = o3d.geometry.PointCloud()

for idx in range(0, 500, 10):
    pc_current = get_single_pc_cam(idx)
    pc_current_points = np.asarray(pc_current.points)
    pc_current.points = o3d.utility.Vector3dVector(pc_cam_to_world(pc_current_points, idx))
    pc_accumulate += pc_current


o3d.visualization.draw_geometries([pc_accumulate])

1.2 效果

在这里插入图片描述

二、VKITTI2

2.1 读数据

VKITTI2的原始数据是这样的：

在这里插入图片描述

我把它转写成0.txt那种，然后做位姿变换。

2.2 完整的坐标变换代码

这个很诡异，他的平移矩阵要除10才可以，很抽象。

代码(tf_vkitti2.py):

# tf_vkitti2.py
import numpy as np
import cv2
from scipy.spatial.transform import Rotation as R

# hyperparameters ----------------------------------
ci, cj, fi, fj = 187, 620.5, 725.0087, 725.0087
SCENE_ID = 1
SCENE_DIR = 'D:/DESKTOP/0701/TrackData/VKITTI2_Scene'
ISEG_DIR = 'D:/DESKTOP/0701/TrackData/VKITTI2_Scene' + str(SCENE_ID).zfill(2) + '/iseg/'
# --------------------------------------------------

def get_rgbd(idx):
    rgb = cv2.cvtColor(cv2.imread(SCENE_DIR + str(SCENE_ID).zfill(2) + '/rgb/rgb_' + str(idx).zfill(5) + '.jpg'), cv2.COLOR_RGB2BGR)
    depth = cv2.imread(SCENE_DIR + str(SCENE_ID).zfill(2) + '/depth/depth_' + str(idx).zfill(5) + '.png', cv2.IMREAD_UNCHANGED) / 1000.0
    return rgb, depth

def get_iseg(idx):
    return cv2.imread(ISEG_DIR+'instancegt_'+ str(idx).zfill(5) + '.png')

def inverse_pose(pose):
        """
        pose: 4x4,
        """
        inv_pose = np.eye(4)
        rvec = pose[:3,:3]
        tvec = pose[:3,3].reshape(3,1)
        inv_rvec = rvec.T # r^T
        t = - np.dot(inv_rvec, tvec) # -r^T * t
        inv_pose[:3, :3] = inv_rvec
        inv_pose[:3, [3]] = t
        return inv_pose

def rmat_to_quad(mat):
    r = R.from_matrix(mat)
    quat = r.as_quat()
    return quat

def get_transform(idx):
    transform = np.loadtxt('D:/DESKTOP/0701/TrackData/VKITTI2_Scene' + str(SCENE_ID).zfill(2) + '/pose/{}.txt'.format(idx))
    transform = inverse_pose(transform)
    transform[:3, -1] /= 10
    return transform

def transform(x, source, target, idx=0):

    if source == "depth" and target == "cam":
        # x.shape = (H, W)
        i, j = np.nonzero(x)
        k = x[i, j]

        '''
        cam_kitti_y|   cam_kitti_z
                   |  /
                   | /
                   |/
                   ----------- cam_kitti_x
        '''
        cam_kitti_x = (j - cj)/fj * k
        cam_kitti_y = -(i - ci)/fi * k
        cam_kitti_z = k

        '''       1
             2    |   
              \   |  
               \  | 
                \ |
                  --------- 0
        '''
        pc_cam = np.stack([cam_kitti_x, cam_kitti_y, cam_kitti_z]).T
        y = pc_cam

    # need idx
    elif source == "cam" and target == "world":
        # x.shape = (N, 3)
        velo_points = np.ones((x.shape[0], 4))
        velo_points[:,:3] = x
        velo_points = velo_points.T
        T_w_velo = get_transform(idx)

        y = np.dot(T_w_velo, velo_points).T[:, :3]
        y[:, 0] = -y[:, 0]


    elif source == "depth" and target == "world":
        y = transform(x, 'depth', 'cam')
        y = transform(y, 'cam', 'world', idx)

    return y

累计点云:

from tf_vkitti2 import *
import open3d as o3d




pcd_total = o3d.geometry.PointCloud()

for idx in range(0, 82, 2):
    rgb, depth = get_rgbd(idx)
    depth[depth>20] = 0
    i,j = np.nonzero(depth)
    pc = transform(depth, 'depth', 'world', idx)
    colors = rgb[i,j,:]
    # 创建Open3D的点云对象
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(pc)
    pcd.colors = o3d.utility.Vector3dVector(colors/255.0)
    pcd_total += pcd


# 可视化点云
o3d.visualization.draw_geometries([pcd_total])

2.3 效果

在这里插入图片描述

三、KITTI

3.1 读数据

KITTI本身没有那种txt的位姿，我是借助pykitti库读取的位姿，下载好数据集后，读取位姿的代码如下：

import pykitti
import matplotlib.pyplot as plt
import numpy as np

basedir = 'E:/DATASETS/KITTI/2011_09_26_drive_0018/'
date = '2011_09_26'
drive = '0018'


dataset = pykitti.raw(basedir, date, drive, frames=range(47, 85, 1))


def get_pose(idx):

    oxts = dataset.oxts
    velos = dataset.velo
    oxt = dataset.oxts[idx] 
    T_w_imu = oxt.T_w_imu # 这个就是绝对坐标系的RT矩阵
    T_velo_imu = dataset.calib.T_velo_imu
    # 这个就是从imu坐标系到世界坐标系下的变换,如果想知道车此时的位置，就可以用 T_w_imu.dot(np.array(0,0,0,1)) 来得到. 这一点在求车辆的轨迹的时候就会用到.
    T_w_velo = np.matmul(T_w_imu, np.linalg.inv(T_velo_imu))

    return T_w_velo


# 存pose
for i in range(38):
     np.savetxt('D:/DESKTOP/0714/2011_09_26_drive_0018_nice-slam/pose/{}.txt'.format(i), get_pose(i))

3.2 完整的坐标变换代码

然后也能拿到一个位姿，之后我将点云投影成稀疏深度（或者借助CompletionFormer直接拿到稠密深度），然后根据RGBD+pose做点云累计，代码如下：

import numpy as np
import cv2
from scipy.spatial.transform import Rotation as R

# hyperparameters ----------------------------------
ci, cj, fi, fj = 187, 620.5, 725.0087, 725.0087

SCENE_DIR = 'D:/DESKTOP/DATASETS/KITTI_2011_09_26_drive_0001/'
# --------------------------------------------------

def get_rgbd(idx):
    rgb = cv2.cvtColor(cv2.imread(SCENE_DIR + '/color/' + str(idx) + '.png'), cv2.COLOR_RGB2BGR)
    depth = cv2.imread(SCENE_DIR + '/depth/' + str(idx) + '.png', cv2.IMREAD_UNCHANGED) / 1000.0
    return rgb, depth


def get_transform(idx):
    transform = np.loadtxt(SCENE_DIR + '/pose/{}.txt'.format(idx))
    return transform

def transform(x, source, target, idx=0):

    if source == "depth" and target == "cam":
        ci, cj, fi, fj = 187, 620.5, 725.0087, 725.0087

        # x.shape = (H, W)
        i, j = np.nonzero(x)
        k = x[i, j]

        '''
        cam_kitti_y|   cam_kitti_z
                   |  /
                   | /
                   |/
                   ----------- cam_kitti_x
        '''
        cam_kitti_x = (j - cj)/fj * k
        cam_kitti_y = -(i - ci)/fi * k
        cam_kitti_z = k

        '''       1
             2    |   
              \   |  
               \  | 
                \ |
                  --------- 0
        '''
        pc_cam = np.stack([cam_kitti_x, cam_kitti_y, cam_kitti_z]).T
        y = pc_cam

    # need idx
    elif source == "cam" and target == "world":
        # x.shape = (N, 3)
        pc_cam = x
        transpose = get_transform(idx)
        pc_cam = np.concatenate([pc_cam, np.ones((pc_cam.shape[0], 1))], axis=-1)

        pc_world = np.matmul(transpose, pc_cam.T).T[:, :3]
        y = pc_world


    elif source == "depth" and target == "world":
        y = transform(x, 'depth', 'cam')
        y = transform(y, 'cam', 'world', idx)

    return y

累计点云如下：

import sys
sys.path.append('D:/DESKTOP/0627/')
from tf_kitti import *
import cv2
import open3d as o3d
import cv2

def get_rgb_world_pc(idx):
    rgb, depth = get_rgbd(idx)
    depth[:150,:] = 0
    depth[:300,400:850] = 0

    rgb = rgb[depth > 0] / 255.0

    pc_world = transform(depth, 'depth', 'world', idx)

    pc_o3d = o3d.geometry.PointCloud(o3d.utility.Vector3dVector(pc_world))
    pc_o3d.colors = o3d.utility.Vector3dVector(rgb)

    return pc_o3d


pc_rgb_o3d = get_rgb_world_pc(6)
for i in range(6, 95, 1):
    pc_rgb_o3d += get_rgb_world_pc(i)


o3d.visualization.draw_geometries([pc_rgb_o3d])

3.3 效果

在这里插入图片描述

四、nuScenes

4.1 从nuScenes读数据的代码

这一段是我扒开可视化然后魔改的，作用是把 (RGB, 稀疏雷达版深度图，各种外参内参) 保存下来，直接用就行。

下面这些我放到百度网盘的read_nuScenes.ipynb文件里了。

%matplotlib inline
from nuscenes.nuscenes import NuScenes
from nuscenes.utils.data_classes import LidarPointCloud, RadarPointCloud, Box
import os.path as osp
from PIL import Image
import numpy as np
from pyquaternion import Quaternion
from nuscenes.utils.geometry_utils import view_points, box_in_image, BoxVisibility, transform_matrix
import copy

nusc = NuScenes(version='v1.0-trainval', dataroot='/data/wuke/zips/trainval', verbose=True)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
def token_to_ipt(my_sample, global_pc=None):

    sample_record = nusc.get('sample', my_sample['token'])
    camera_channel = 'CAM_FRONT'
    pointsensor_channel = 'LIDAR_TOP'

    calibs = {}

    pointsensor_token = sample_record['data'][pointsensor_channel]
    camera_token = sample_record['data'][camera_channel]
    cam = nusc.get('sample_data', camera_token)
    pointsensor = nusc.get('sample_data', pointsensor_token)
    pose = nusc.get('ego_pose', cam['ego_pose_token'])
    poserecord = nusc.get('ego_pose', cam['ego_pose_token'])
    R, T = Quaternion(poserecord['rotation']).rotation_matrix, np.array(poserecord['translation']).T
    pcl_path = osp.join(nusc.dataroot, pointsensor['filename'])
    pc = LidarPointCloud.from_file(pcl_path)
    im = Image.open(osp.join(nusc.dataroot, cam['filename']))
    img = np.array(im)


    # Points live in the point sensor frame. So they need to be transformed via global to the image plane.
    # First step: transform the pointcloud to the ego vehicle frame for the timestamp of the sweep.
    cs_record = nusc.get('calibrated_sensor', pointsensor['calibrated_sensor_token'])
    pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix)
    pc.translate(np.array(cs_record['translation']))

    # 存外参数 - - - - - - - - - - - - - - - - - - - - - - - -
    calib_lidar2ego = np.eye(4) 
    calib_lidar2ego[:3, :3] = Quaternion(cs_record['rotation']).rotation_matrix
    calib_lidar2ego[:3, -1] = np.array(cs_record['translation']).reshape(3)
    calibs['calib_lidar2ego'] = calib_lidar2ego
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    # Second step: transform from ego to the global frame.
    poserecord = nusc.get('ego_pose', pointsensor['ego_pose_token'])
    # 存外参数 - - - - - - - - - - - - - - - - - - - - - - - -
    calib_ego2world = np.eye(4) 
    calib_ego2world[:3, :3] = Quaternion(poserecord['rotation']).rotation_matrix
    calib_ego2world[:3, -1] = np.array(poserecord['translation']).reshape(3)
    calibs['calib_ego2world'] = calib_ego2world
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    # The ego_pose is the output of a lidar map-based localization algorithm described in our paper. The localization is 2-dimensional in the x-y plane.
    pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix)
    pc.translate(np.array(poserecord['translation']))

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
    if not (global_pc is None):
        pc_world = copy.deepcopy(pc.points)
        global_pc = np.concatenate([global_pc, pc_world.T], axis=0)

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 

    # Third step: transform from global into the ego vehicle frame for the timestamp of the image. 
    poserecord = nusc.get('ego_pose', cam['ego_pose_token'])
    pc.translate(-np.array(poserecord['translation']))
    pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix.T)


    # Fourth step: transform from ego into the camera.
    cs_record = nusc.get('calibrated_sensor', cam['calibrated_sensor_token'])
    # 存外参数 - - - - - - - - - - - - - - - - - - - - - - - -
    calib_cam2ego = np.eye(4) 
    calib_cam2ego[:3, :3] = Quaternion(cs_record['rotation']).rotation_matrix
    calib_cam2ego[:3, -1] = np.array(cs_record['translation']).reshape(3)
    calibs['calib_cam2ego'] = calib_cam2ego
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    pc.translate(-np.array(cs_record['translation']))
    pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix.T)


    # Fifth step: actually take a "picture" of the point cloud.
    # Grab the depths (camera frame z axis points away from the camera).
    depths = pc.points[2, :] # depths.shape = (1,34784)
    raw_depth = pc.points[2]
    gt_from = 'lidarseg'
    # semantic_table = getattr(nusc, gt_from)
    coloring = depths
    # Take the actual picture (matrix multiplication with camera-matrix + renormalization).
    points = view_points(pc.points[:3, :], np.array(cs_record['camera_intrinsic']), normalize=True) # depths.shape = (3,34784)

    np.savetxt('./intrinsic.txt', np.array(cs_record['camera_intrinsic']))


    # TEMP TO DELETE ==================
    # points[2, :] = depths
    # temp_0, temp_1 = copy.deepcopy(points[0, :]), copy.deepcopy(points[1, :])
    # points[0, :], points[1, :] = temp_1, -temp_0
    # =================================

    min_dist = 0
    mask = np.ones(depths.shape[0], dtype=bool)
    mask = np.logical_and(mask, depths > min_dist)
    mask = np.logical_and(mask, points[0, :] > 1)
    mask = np.logical_and(mask, points[0, :] < im.size[0] - 1)
    mask = np.logical_and(mask, points[1, :] > 1)
    mask = np.logical_and(mask, points[1, :] < im.size[1] - 1)

    # TEMP TO DELETE =========================================
    points[2, :] = raw_depth
    # ========================================================

    points = points[:, mask]

    # 返回变换矩阵
    T = T[:,np.newaxis]

    transcation = np.concatenate([R,T],axis=1)
    transcation = np.concatenate([transcation,np.array([0,0,0,1])
    [np.newaxis,:]], axis=0)
    transcation = np.around(transcation, 6)

    H,W,C = img.shape

    # points是(3, 3053)且都是离散的数字，需要将points变成(1,1600,900)的图。
    points[:2,:] = np.around(points[:2,:])
    temp_depth = np.zeros((H,W,1)) #没有的存成200
    for i in range(len(points[0])):
        temp_depth[ min(H, int(points[1][i])), min(W, int(points[0][i]))] = points[2,i]

    depth = (temp_depth * 1000.0).astype(np.uint16)[..., 0]

    return img, depth, transcation, calibs

import cv2
import os
import numpy as np

def transcation_to_txt(i, transcation, scene_id):
    file = open('/home/wuke/nuScenes/scene_{}/pose/'.format(scene_id)+str(i) + '.txt', 'w')
    text = ''
    text = str(transcation[0][0]) + ' ' + str(transcation[0][1]) + ' ' + str(transcation[0][2]) + ' ' + str(transcation[0][3]) + '\n'
    text += str(transcation[1][0]) + ' ' + str(transcation[1][1]) + ' ' + str(transcation[1][2]) + ' ' + str(transcation[1][3]) + '\n'
    text += str(transcation[2][0]) + ' ' + str(transcation[2][1]) + ' ' + str(transcation[2][2]) + ' ' + str(transcation[2][3]) + '\n'
    text += str(transcation[3][0]) + ' ' + str(transcation[3][1]) + ' ' + str(transcation[3][2]) + ' ' + str(transcation[3][3])
    file.write(text)

def get_sample_id_list(scene_id):
    scene = nusc.scene[scene_id]
    sample_tokens = nusc.field2token('sample', 'scene_token', scene['token'])

    sample_list = []
    for sample_token in sample_tokens:
        sample = nusc.get('sample', sample_token)
        sample_list.append(sample)
    return sample_list

def main(scene_id = 0):

    # scene_id = 0
    # global_pc = np.empty((0, 4))

    sample_list = get_sample_id_list(scene_id)

    for i in range(len(sample_list)): # sample_id_list:
        # my_sample = nusc.sample[i]
        my_sample = sample_list[i]
        img, depth, transcation, calibs = token_to_ipt(my_sample)
        # calibs是一个字典，{'calib_lidar2ego':, 'calib_cam2ego':, 'calib_ego2world':}

        cv2.imwrite('/home/wuke/nuScenes/scene_{}/color/'.format(scene_id)+ str(i) + '.jpg', cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        # depth = cv2.resize(depth,(640,480))
        cv2.imwrite('/home/wuke/nuScenes/scene_{}/depth/'.format(scene_id)+str(i) + '.png', depth, [int(cv2.IMWRITE_PNG_COMPRESSION), 1])
        transcation_to_txt(i, transcation, scene_id)

        # 存外参捏
        for key in list(calibs.keys()):
            np.savetxt('/home/wuke/nuScenes/scene_{}/{}/{}.txt'.format(scene_id, key, i), calibs[key])

    # np.save('/home/wuke/nuScenes/scene_{}/global_pc.npy'.format(scene_id), global_pc)


if __name__ == '__main__':

    for out in [68]:
        # out -= 1
        os.makedirs('/home/wuke/nuScenes/scene_{}/'.format(out), exist_ok=True)
        os.makedirs('/home/wuke/nuScenes/scene_{}/depth'.format(out), exist_ok=True)
        os.makedirs('/home/wuke/nuScenes/scene_{}/color'.format(out), exist_ok=True)
        os.makedirs('/home/wuke/nuScenes/scene_{}/pose'.format(out), exist_ok=True)
        # 存一下小外参
        os.makedirs('/home/wuke/nuScenes/scene_{}/calib_lidar2ego'.format(out), exist_ok=True)
        os.makedirs('/home/wuke/nuScenes/scene_{}/calib_cam2ego'.format(out), exist_ok=True)
        os.makedirs('/home/wuke/nuScenes/scene_{}/calib_ego2world'.format(out), exist_ok=True)

        main(out)

4.2 nuScenes-devkit 的API解析

正经代码

pointsensor_token = sample_record['data'][pointsensor_channel]
camera_token = sample_record['data'][camera_channel]
cam = nusc.get('sample_data', camera_token)
pointsensor = nusc.get('sample_data', pointsensor_token)


# ① 雷达坐标系 → 小车坐标系
cs_record = nusc.get('calibrated_sensor', pointsensor['calibrated_sensor_token'])
pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix)
pc.translate(np.array(cs_record['translation']))

# ② 小车坐标系 → 世界坐标系
poserecord = nusc.get('ego_pose', pointsensor['ego_pose_token'])
# The ego_pose is the output of a lidar map-based localization algorithm described in our paper. The localization is 2-dimensional in the x-y plane.
pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix)
pc.translate(np.array(poserecord['translation']))

# ③ 世界坐标系 → 小车坐标系
poserecord = nusc.get('ego_pose', cam['ego_pose_token'])
pc.translate(-np.array(poserecord['translation']))
pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix.T)

# ④ 小车坐标系 → 相机坐标系
cs_record = nusc.get('calibrated_sensor', cam['calibrated_sensor_token'])
pc.translate(-np.array(cs_record['translation']))
pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix.T)

# ⑤ 相机坐标系 → 像素坐标系(u, v, depth)
points = view_points(pc.points[:3, :], np.array(cs_record['camera_intrinsic']), normalize=True) # depths.shape = (3,34784)

pc类：

def translate(self, x: np.ndarray) -> None:
     """
     Applies a translation to the point cloud.
     :param x: <np.float: 3, 1>. Translation in x, y, z.
     """
     for i in range(3):
         self.points[i, :] = self.points[i, :] + x[i]

def rotate(self, rot_matrix: np.ndarray) -> None:
     """
     Applies a rotation.
     :param rot_matrix: <np.float: 3, 3>. Rotation matrix.
     """
     self.points[:3, :] = np.dot(rot_matrix, self.points[:3, :])

现在需要的是：像素→相机→小车→世界，即需要⑤④②

emmm，假设后面的变换全部都是按照人家原本的先旋转后平移的小策略进行，然后存一个各个变换的小矩阵。

4.3 完整的坐标变换代码

tf_nuScenes.py

import numpy as np
import cv2
import copy

'''
intrinsic:
     _         _
    | fv  0  cv |
    | 0  fu  cu |  
    |_0   0   1_|

pixel_coord:
     _ _ _ _v
    |
    |
    |u

cam_coord:
     /z  
    /
   /_ _ _ _ x
   |
   |
   |y

world_coord:

 
 def transform(x_raw, source, target, intrinsic=None, calibs=None):

     if source == "cam" and target == "pixel":
         '''
         取points的前面三个值，即(N, 3)
         x.shape = (N, 3)
         '''
         x = copy.deepcopy(x_raw)
   
         points = x.T # (3, N)
   
         fu, fv, cu, cv = intrinsic['fu'], intrinsic['fv'], intrinsic['cu'], intrinsic['cv']
   
         view = np.eye(4)
         view[0,0], view[1,1], view[0,2], view[1,2] = fv, fu, cv, cu
   
         points = np.concatenate((points, np.ones((1, points.shape[1]))), axis=0) # (4, N)
   
         points = np.dot(view, points)
         points = points[:3, :]
         points = points / points[2, :].reshape(1, -1)
   
         y = points
   
   
     elif source == "depth" and target == "cam":
         '''
         x.shape = (H, W)
         '''
         fu, fv, cu, cv = intrinsic['fu'], intrinsic['fv'], intrinsic['cu'], intrinsic['cv']
   
         x = copy.deepcopy(x_raw)
         u, v = np.nonzero(x)
         z_cam = x[u, v] # (N ,)
   
         x_cam = (v - cv) * z_cam / fv # (N ,)
         y_cam = (u - cu) * z_cam / fu # (N ,)
   
         y = np.concatenate([x_cam[:,np.newaxis], y_cam[:,np.newaxis], z_cam[:,np.newaxis]], axis=-1) # (N, 3)
   
   
     elif source == "pixel" and target == "cam":
         '''
         x.shape = (N, 3)
         '''
         x = copy.deepcopy(x_raw)
         fu, fv, cu, cv = intrinsic['fu'], intrinsic['fv'], intrinsic['cu'], intrinsic['cv']
         u, v, z_cam = x[:, 0], x[:, 1], x[:, 2]
   
         x_cam = (v - cv) * z_cam / fv # (N ,)
         y_cam = (u - cu) * z_cam / fu # (N ,)
   
         y = np.concatenate([x_cam[:,np.newaxis], y_cam[:,np.newaxis], z_cam[:,np.newaxis]], axis=-1) # (N, 3)
   
   
     elif source == "cam" and target == "world":
         '''
         x.shape = (N, 3)
         '''
   
         x = copy.deepcopy(x_raw)
   
         # STEP 1 相机坐标系转小车坐标系（先旋转后平移）
         calib_cam2ego = calibs['calib_cam2ego']
         x_ego = np.dot(calib_cam2ego[:3,:3], x.T).T + calib_cam2ego[:3,-1].reshape(1, 3) # (N, 3)
   
         # STEP 2 小车坐标系转世界坐标系
         calib_ego2world = calibs['calib_ego2world']
         x_world = np.dot(calib_ego2world[:3,:3], x_ego.T).T + calib_ego2world[:3,-1].reshape(1, 3) # (N, 3)
   
         y = x_world                    
   
     elif source == "depth" and target == "world":
         x = transform(x_raw, source="depth", target="cam", intrinsic=intrinsic)
         y = transform(x, source="cam", target="world", calibs=calibs)
   
     return y

可视化

import os
import numpy as np
import cv2
import open3d as o3d

import sys
sys.path.append('D:/DESKTOP/1014/')
from tf_nuScenes import *


def get_single_frame_data(idx):
    DATA_PATH = 'D:/DESKTOP/1014/DATA/OSDF_nuScenes_Scene68/'
    rgb = cv2.imread(DATA_PATH + '/color/{}.jpg'.format(idx))
    sparse_depth = cv2.imread(DATA_PATH + '/depth/{}.png'.format(idx), cv2.IMREAD_UNCHANGED) / 1000.0
    calib_cam2ego = np.loadtxt(DATA_PATH + '/calib_cam2ego/{}.txt'.format(idx))
    calid_ego2world = np.loadtxt(DATA_PATH + '/calib_ego2world/{}.txt'.format(idx))

    return rgb, sparse_depth, {'calib_cam2ego': calib_cam2ego, 'calib_ego2world': calid_ego2world}

intrinsic= {'fu': 1.266417203046554050e+03, 'fv': 1.266417203046554050e+03, 'cu': 4.915070657929475715e+02, 'cv': 8.162670197447984037e+02}

pcd_all = o3d.geometry.PointCloud()

for idx in range(40):
    rgb, sparse_depth, calibs = get_single_frame_data(idx)
    rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)
    mask = sparse_depth>0
    colors = rgb[mask]
    pc_world = transform(sparse_depth, 'depth', 'world', intrinsic=intrinsic, calibs=calibs)
    pcd_world = o3d.geometry.PointCloud()
    pcd_world.points = o3d.utility.Vector3dVector(pc_world.reshape(-1, 3))
    pcd_world.colors = o3d.utility.Vector3dVector(colors.reshape(-1, 3)/255.0)
    pcd_all += pcd_world

o3d.visualization.draw_geometries([pcd_all])

4.4 效果

在这里插入图片描述

五、三种数据集的pose格式相互转换

因为我需要把VKITTI2转换成ScanNet的pose格式才能丢到NICE-SLAM里面跑。

这一节实现的功能就是比如：我的数据集是VKITTI2，我想用ScanNet的读数据方式读VKITTI2并且做点云累计。

5.1 理论介绍

ScanNet
- txt中直接load的pose:
$\begin{bmatrix} r_{11} & r_{12} & r_{13} & t_{1} \\ r_{21} & r_{22} & r_{23} & t_{2} \\ r_{31} & r_{32} & r_{33} & t_{3} \\ 0 & 0 & 0 &1 \end{bmatrix}$
- 操作方式:
  
  $pc\_cam = \begin{pmatrix}x_{cam}&y_{cam}&z_{cam} \end{pmatrix}^T\\ pc\_world = \begin{pmatrix} r_{11}x_{cam} - r_{12}y_{cam} - r_{13}z_{cam} + t_1 \\ r_{21}x_{cam} - r_{22}y_{cam} - r_{23}z_{cam} + t_2 \\ r_{31}x_{cam} - r_{32}y_{cam} - r_{33}z_{cam} + t_3 \end{pmatrix}$
VKITTI2
- txt中直接load的pose：
$\begin{bmatrix} r_{11} & r_{12} & r_{13} & t_{1} \\ r_{21} & r_{21} & r_{23} & t_{2} \\ r_{31} & r_{32} & r_{33} & t_{3} \\ 0 & 0 & 0 &1 \end{bmatrix}$
- 操作方式
  
  $pc\_cam = \begin{pmatrix}x_{cam}&y_{cam}&z_{cam}\end{pmatrix}^T\\ \\ pc\_world = \begin{pmatrix} -r_{11}x_{cam} - r_{21}y_{cam} - r_{31}z_{cam} + 0.1(r_{11}t_1+r_{21}t_2+r_{31}t_3) \\ r_{12}x_{cam} + r_{22}y_{cam} + r_{32}z_{cam} - 0.1(r_{12}t_1+r_{22}t_2+r_{32}t_3) \\ r_{13}x_{cam} + r_{23}y_{cam} + r_{33}z_{cam} - 0.1(r_{13}t_1+r_{23}t_2+r_{33}t_3) \end{pmatrix}$
KITTI
- pykitti中读取并且保存的pose
  - 代码：
```
def get_transpose(idx):
    # cam to velo
    T_velo_cam3 = np.linalg.inv(dataset.calib.T_cam3_velo)

    # velo to world
    oxt = oxts_list[idx]   
    T_w_imu = oxt.T_w_imu
    T_velo_imu = dataset.calib.T_velo_imu
    T_imu_velo = inverse_pose(T_velo_imu)
    T_w_velo = np.dot(T_w_imu, T_imu_velo)

    transpose = np.dot(T_w_velo, T_velo_cam3)

    # 保存好以后，KITTI只需要 pc_world = np.matmul(transpose, pc_cam.T).T[:, :3] 即可

    return transpose
```
  - 记作：
    
    $\begin{pmatrix} r_{11} & r_{12} & r_{13} & t_{1} \\ r_{21} & r_{21} & r_{23} & t_{2} \\ r_{31} & r_{32} & r_{33} & t_{3} \\ 0 & 0 & 0 &1 \end{pmatrix}$
- 操作方式
  
  $pc\_cam = \begin{pmatrix}x_{cam}&y_{cam}&z_{cam} \end{pmatrix}^T\\ pc\_world = \begin{pmatrix} r_{11}x_{cam} + r_{12}y_{cam} + r_{13}z_{cam} + t_1 \\ r_{21}x_{cam} + r_{22}y_{cam} + r_{23}z_{cam} + t_2 \\ r_{31}x_{cam} + r_{32}y_{cam} + r_{33}z_{cam} + t_3 \end{pmatrix}$

5.2 代码

pose是用来做cam系到world系坐标变换的，这里直接放代码了：

# tf_exchange.py


import numpy as np
import copy


def exchange(x, source, target):
    ''' 
    x.shape = (4, 4)
    '''
    if source == 'vkitti2' and target == 'scannet':
        TR = np.eye(4)

        x_cp = copy.deepcopy(x)

        TR[0,0], TR[0,1], TR[0,2] = -x_cp[0,0], x_cp[1,0], x_cp[2,0]
        TR[0,-1] = 0.1 * (x_cp[0,0]*x_cp[0,-1] + x_cp[1,0]*x_cp[1,-1] + x_cp[2,0]*x_cp[2,-1])


        TR[1,0], TR[1,1], TR[1,2] = x_cp[0,1], -x_cp[1,1], -x_cp[2,1]
        TR[1,-1] = -0.1 * (x_cp[0,1]*x_cp[0,-1] + x_cp[1,1]*x_cp[1,-1] + x_cp[2,1]*x_cp[2,-1])

        TR[2,0], TR[2,1], TR[2,2] = x_cp[0,2], -x_cp[1,2], -x_cp[2,2]
        TR[2,-1] = -0.1 * (x_cp[0,2]*x_cp[0,-1] + x_cp[1,2]*x_cp[1,-1] + x_cp[2,2]*x_cp[2,-1])

        y = TR


    elif source == 'scannet' and target == 'vkitti2':
        TR = np.eye(4)

        TR[0,0], TR[0,1], TR[0,2] = -x[0,0], x[1,0], x[2,0]
        TR[1,0], TR[1,1], TR[1,2] = x[0,1], -x[1,1], -x[2,1]
        TR[2,0], TR[2,1], TR[2,2] = x[0,2], -x[1,2], -x[2,2]

        x_cp = copy.deepcopy(x)
        x_cp[0, -1] *= -1
        TR[:3, -1] = -10 * np.matmul(np.linalg.inv(TR[:3,:3].T), x_cp[:3, -1])

        y = TR

    elif source == 'kitti' and target == 'scannet':
        TR = copy.deepcopy(x)
        TR[:3, 1] *= -1
        TR[:3, 2] *= -1
        y = TR         

    elif source == 'scannet' and target == 'kitti':
        TR = copy.deepcopy(x)
        TR[:3, 1] *= -1
        TR[:3, 2] *= -1
        y = TR  

    elif source == 'vkitti2' and target == 'kitti':
        x = exchange(x, 'vkitti2', 'scannet')
        y = exchange(x, 'scannet', 'kitti')

    elif source == 'kitti' and target == 'vkitti2':
        x = exchange(x, 'kitti', 'scannet')
        y = exchange(x, 'scannet', 'vkitti2')            

    return y

Promethe_us

关注

7
点赞
踩
14

收藏

觉得还不错? 一键收藏
0
评论
ScanNet/VKITTI2/KITTI/nuScenes 数据集坐标变换的简单记录

ScanNettxt中直接load的pose:r11r12r13t1r21r22r23t2r31r32r33t30001r11r21r310r12r22r320r13r23r330t1t2t31pccamxcamycamzcamTpcworldr11xcam−。
复制链接

扫一扫