最近在和坐标变换斗争,不同数据集的位姿给的也是千奇百怪,我列写了我用到的三个数据集以及相应的变换方式,以便自己日后查找。(最终检验是否正确以累计点云为准)
文中用到的所有代码,以及四个数据集我都打包上传到了:
链接:https://pan.baidu.com/s/1LARmVn1kMHNl-1CXSyB8HA 提取码:2023欢迎下载~🙂
文章目录
一、ScanNet
这个ScanNet数据我是从NICE-SLAM的链接下载的: https://github.com/cvg/nice-slam
ScanNet的数据集格式:
└─frames
├─color
├─depth
├─intrinsic
└─pose/0.txt
坐标系介绍: (下面的都是这个坐标系)
1.1 完整的坐标变换代码
代码tf_scannet.py
:
# ⭐tf_scannet.py
import numpy as np
import cv2
from scipy.spatial.transform import Rotation as R
# hyperparameters ----------------------------------
ci, cj, fi, fj = 242.6836, 318.9054, 578.729797, 577.59
SCENE_DIR = 'D:/DESKTOP/0716/Demo/frames/'
# --------------------------------------------------
def get_rgbd(idx):
rgb = cv2.cvtColor(cv2.imread(SCENE_DIR + '/color/' + str(idx) + '.jpg'), cv2.COLOR_RGB2BGR)
depth = cv2.imread(SCENE_DIR + '/depth/' + str(idx) + '.png', cv2.IMREAD_UNCHANGED) / 1000.0
return rgb, depth
def get_transform(idx):
transform = np.loadtxt(SCENE_DIR + '/pose/{}.txt'.format(idx))
return transform
def transform(x, source, target, idx=0):
if source == "depth" and target == "cam":
ci, cj, fi, fj = 242.6836, 318.9054, 578.729797, 577.59
# x.shape = (H, W)
i, j = np.nonzero(x)
k = x[i, j]
'''
cam_kitti_y| cam_kitti_z
| /
| /
|/
----------- cam_kitti_x
'''
cam_kitti_x = (j - cj)/fj * k
cam_kitti_y = -(i - ci)/fi * k
cam_kitti_z = k
''' 1
2 |
\ |
\ |
\ |
--------- 0
'''
pc_cam = np.stack([cam_kitti_x, cam_kitti_y, cam_kitti_z]).T
y = pc_cam
# need idx
elif source == "cam" and target == "world":
# x.shape = (N, 3)
transpose = get_transform(idx)
transpose[:3, 1] *= -1
transpose[:3, 2] *= -1
pc_world = np.matmul(transpose[:3, :3], pc_cam.T).T + transpose[:3, -1]
y = pc_world
elif source == "depth" and target == "world":
y = transform(x, 'depth', 'cam')
y = transform(y, 'cam', 'world', idx)
return y
累计点云:
import numpy as np
import cv2
import open3d as o3d
from tf_scannet import get_rgbd, transform, get_transform
def get_single_pc_cam(idx):
rgb, depth = get_rgbd(idx)
rgb = cv2.resize(rgb, (640, 480))
pc_cam = transform(depth, "depth", "cam")
pc_cam[:, 2] *= -1
colors = rgb[depth>0, :]
pc_cam_o3d = o3d.geometry.PointCloud()
pc_cam_o3d.points = o3d.utility.Vector3dVector(pc_cam)
pc_cam_o3d.colors = o3d.utility.Vector3dVector(colors/255.0)
return pc_cam_o3d
def pc_cam_to_world(pc_cam, idx):
pc_world = transform(pc_cam, "cam", "world", idx)
return pc_world
pc_accumulate = o3d.geometry.PointCloud()
for idx in range(0, 500, 10):
pc_current = get_single_pc_cam(idx)
pc_current_points = np.asarray(pc_current.points)
pc_current.points = o3d.utility.Vector3dVector(pc_cam_to_world(pc_current_points, idx))
pc_accumulate += pc_current
o3d.visualization.draw_geometries([pc_accumulate])
1.2 效果
二、VKITTI2
2.1 读数据
VKITTI2的原始数据是这样的:
我把它转写成0.txt那种,然后做位姿变换。
2.2 完整的坐标变换代码
这个很诡异,他的平移矩阵要除10才可以,很抽象。
代码(tf_vkitti2.py
):
# tf_vkitti2.py
import numpy as np
import cv2
from scipy.spatial.transform import Rotation as R
# hyperparameters ----------------------------------
ci, cj, fi, fj = 187, 620.5, 725.0087, 725.0087
SCENE_ID = 1
SCENE_DIR = 'D:/DESKTOP/0701/TrackData/VKITTI2_Scene'
ISEG_DIR = 'D:/DESKTOP/0701/TrackData/VKITTI2_Scene' + str(SCENE_ID).zfill(2) + '/iseg/'
# --------------------------------------------------
def get_rgbd(idx):
rgb = cv2.cvtColor(cv2.imread(SCENE_DIR + str(SCENE_ID).zfill(2) + '/rgb/rgb_' + str(idx).zfill(5) + '.jpg'), cv2.COLOR_RGB2BGR)
depth = cv2.imread(SCENE_DIR + str(SCENE_ID).zfill(2) + '/depth/depth_' + str(idx).zfill(5) + '.png', cv2.IMREAD_UNCHANGED) / 1000.0
return rgb, depth
def get_iseg(idx):
return cv2.imread(ISEG_DIR+'instancegt_'+ str(idx).zfill(5) + '.png')
def inverse_pose(pose):
"""
pose: 4x4,
"""
inv_pose = np.eye(4)
rvec = pose[:3,:3]
tvec = pose[:3,3].reshape(3,1)
inv_rvec = rvec.T # r^T
t = - np.dot(inv_rvec, tvec) # -r^T * t
inv_pose[:3, :3] = inv_rvec
inv_pose[:3, [3]] = t
return inv_pose
def rmat_to_quad(mat):
r = R.from_matrix(mat)
quat = r.as_quat()
return quat
def get_transform(idx):
transform = np.loadtxt('D:/DESKTOP/0701/TrackData/VKITTI2_Scene' + str(SCENE_ID).zfill(2) + '/pose/{}.txt'.format(idx))
transform = inverse_pose(transform)
transform[:3, -1] /= 10
return transform
def transform(x, source, target, idx=0):
if source == "depth" and target == "cam":
# x.shape = (H, W)
i, j = np.nonzero(x)
k = x[i, j]
'''
cam_kitti_y| cam_kitti_z
| /
| /
|/
----------- cam_kitti_x
'''
cam_kitti_x = (j - cj)/fj * k
cam_kitti_y = -(i - ci)/fi * k
cam_kitti_z = k
''' 1
2 |
\ |
\ |
\ |
--------- 0
'''
pc_cam = np.stack([cam_kitti_x, cam_kitti_y, cam_kitti_z]).T
y = pc_cam
# need idx
elif source == "cam" and target == "world":
# x.shape = (N, 3)
velo_points = np.ones((x.shape[0], 4))
velo_points[:,:3] = x
velo_points = velo_points.T
T_w_velo = get_transform(idx)
y = np.dot(T_w_velo, velo_points).T[:, :3]
y[:, 0] = -y[:, 0]
elif source == "depth" and target == "world":
y = transform(x, 'depth', 'cam')
y = transform(y, 'cam', 'world', idx)
return y
累计点云:
from tf_vkitti2 import *
import open3d as o3d
pcd_total = o3d.geometry.PointCloud()
for idx in range(0, 82, 2):
rgb, depth = get_rgbd(idx)
depth[depth>20] = 0
i,j = np.nonzero(depth)
pc = transform(depth, 'depth', 'world', idx)
colors = rgb[i,j,:]
# 创建Open3D的点云对象
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(pc)
pcd.colors = o3d.utility.Vector3dVector(colors/255.0)
pcd_total += pcd
# 可视化点云
o3d.visualization.draw_geometries([pcd_total])
2.3 效果
三、KITTI
3.1 读数据
KITTI本身没有那种txt的位姿,我是借助pykitti
库读取的位姿,下载好数据集后,读取位姿的代码如下:
import pykitti
import matplotlib.pyplot as plt
import numpy as np
basedir = 'E:/DATASETS/KITTI/2011_09_26_drive_0018/'
date = '2011_09_26'
drive = '0018'
dataset = pykitti.raw(basedir, date, drive, frames=range(47, 85, 1))
def get_pose(idx):
oxts = dataset.oxts
velos = dataset.velo
oxt = dataset.oxts[idx]
T_w_imu = oxt.T_w_imu # 这个就是绝对坐标系的RT矩阵
T_velo_imu = dataset.calib.T_velo_imu
# 这个就是从imu坐标系到世界坐标系下的变换,如果想知道车此时的位置,就可以用 T_w_imu.dot(np.array(0,0,0,1)) 来得到. 这一点在求车辆的轨迹的时候就会用到.
T_w_velo = np.matmul(T_w_imu, np.linalg.inv(T_velo_imu))
return T_w_velo
# 存pose
for i in range(38):
np.savetxt('D:/DESKTOP/0714/2011_09_26_drive_0018_nice-slam/pose/{}.txt'.format(i), get_pose(i))
3.2 完整的坐标变换代码
然后也能拿到一个位姿,之后我将点云投影成稀疏深度(或者借助CompletionFormer直接拿到稠密深度),然后根据RGBD+pose做点云累计,代码如下:
import numpy as np
import cv2
from scipy.spatial.transform import Rotation as R
# hyperparameters ----------------------------------
ci, cj, fi, fj = 187, 620.5, 725.0087, 725.0087
SCENE_DIR = 'D:/DESKTOP/DATASETS/KITTI_2011_09_26_drive_0001/'
# --------------------------------------------------
def get_rgbd(idx):
rgb = cv2.cvtColor(cv2.imread(SCENE_DIR + '/color/' + str(idx) + '.png'), cv2.COLOR_RGB2BGR)
depth = cv2.imread(SCENE_DIR + '/depth/' + str(idx) + '.png', cv2.IMREAD_UNCHANGED) / 1000.0
return rgb, depth
def get_transform(idx):
transform = np.loadtxt(SCENE_DIR + '/pose/{}.txt'.format(idx))
return transform
def transform(x, source, target, idx=0):
if source == "depth" and target == "cam":
ci, cj, fi, fj = 187, 620.5, 725.0087, 725.0087
# x.shape = (H, W)
i, j = np.nonzero(x)
k = x[i, j]
'''
cam_kitti_y| cam_kitti_z
| /
| /
|/
----------- cam_kitti_x
'''
cam_kitti_x = (j - cj)/fj * k
cam_kitti_y = -(i - ci)/fi * k
cam_kitti_z = k
''' 1
2 |
\ |
\ |
\ |
--------- 0
'''
pc_cam = np.stack([cam_kitti_x, cam_kitti_y, cam_kitti_z]).T
y = pc_cam
# need idx
elif source == "cam" and target == "world":
# x.shape = (N, 3)
pc_cam = x
transpose = get_transform(idx)
pc_cam = np.concatenate([pc_cam, np.ones((pc_cam.shape[0], 1))], axis=-1)
pc_world = np.matmul(transpose, pc_cam.T).T[:, :3]
y = pc_world
elif source == "depth" and target == "world":
y = transform(x, 'depth', 'cam')
y = transform(y, 'cam', 'world', idx)
return y
累计点云如下:
import sys
sys.path.append('D:/DESKTOP/0627/')
from tf_kitti import *
import cv2
import open3d as o3d
import cv2
def get_rgb_world_pc(idx):
rgb, depth = get_rgbd(idx)
depth[:150,:] = 0
depth[:300,400:850] = 0
rgb = rgb[depth > 0] / 255.0
pc_world = transform(depth, 'depth', 'world', idx)
pc_o3d = o3d.geometry.PointCloud(o3d.utility.Vector3dVector(pc_world))
pc_o3d.colors = o3d.utility.Vector3dVector(rgb)
return pc_o3d
pc_rgb_o3d = get_rgb_world_pc(6)
for i in range(6, 95, 1):
pc_rgb_o3d += get_rgb_world_pc(i)
o3d.visualization.draw_geometries([pc_rgb_o3d])
3.3 效果
四、nuScenes
4.1 从nuScenes读数据的代码
这一段是我扒开可视化然后魔改的,作用是把 (RGB, 稀疏雷达版深度图,各种外参内参) 保存下来,直接用就行。
下面这些我放到百度网盘的read_nuScenes.ipynb
文件里了。
%matplotlib inline
from nuscenes.nuscenes import NuScenes
from nuscenes.utils.data_classes import LidarPointCloud, RadarPointCloud, Box
import os.path as osp
from PIL import Image
import numpy as np
from pyquaternion import Quaternion
from nuscenes.utils.geometry_utils import view_points, box_in_image, BoxVisibility, transform_matrix
import copy
nusc = NuScenes(version='v1.0-trainval', dataroot='/data/wuke/zips/trainval', verbose=True)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def token_to_ipt(my_sample, global_pc=None):
sample_record = nusc.get('sample', my_sample['token'])
camera_channel = 'CAM_FRONT'
pointsensor_channel = 'LIDAR_TOP'
calibs = {}
pointsensor_token = sample_record['data'][pointsensor_channel]
camera_token = sample_record['data'][camera_channel]
cam = nusc.get('sample_data', camera_token)
pointsensor = nusc.get('sample_data', pointsensor_token)
pose = nusc.get('ego_pose', cam['ego_pose_token'])
poserecord = nusc.get('ego_pose', cam['ego_pose_token'])
R, T = Quaternion(poserecord['rotation']).rotation_matrix, np.array(poserecord['translation']).T
pcl_path = osp.join(nusc.dataroot, pointsensor['filename'])
pc = LidarPointCloud.from_file(pcl_path)
im = Image.open(osp.join(nusc.dataroot, cam['filename']))
img = np.array(im)
# Points live in the point sensor frame. So they need to be transformed via global to the image plane.
# First step: transform the pointcloud to the ego vehicle frame for the timestamp of the sweep.
cs_record = nusc.get('calibrated_sensor', pointsensor['calibrated_sensor_token'])
pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix)
pc.translate(np.array(cs_record['translation']))
# 存外参数 - - - - - - - - - - - - - - - - - - - - - - - -
calib_lidar2ego = np.eye(4)
calib_lidar2ego[:3, :3] = Quaternion(cs_record['rotation']).rotation_matrix
calib_lidar2ego[:3, -1] = np.array(cs_record['translation']).reshape(3)
calibs['calib_lidar2ego'] = calib_lidar2ego
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Second step: transform from ego to the global frame.
poserecord = nusc.get('ego_pose', pointsensor['ego_pose_token'])
# 存外参数 - - - - - - - - - - - - - - - - - - - - - - - -
calib_ego2world = np.eye(4)
calib_ego2world[:3, :3] = Quaternion(poserecord['rotation']).rotation_matrix
calib_ego2world[:3, -1] = np.array(poserecord['translation']).reshape(3)
calibs['calib_ego2world'] = calib_ego2world
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# The ego_pose is the output of a lidar map-based localization algorithm described in our paper. The localization is 2-dimensional in the x-y plane.
pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix)
pc.translate(np.array(poserecord['translation']))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if not (global_pc is None):
pc_world = copy.deepcopy(pc.points)
global_pc = np.concatenate([global_pc, pc_world.T], axis=0)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Third step: transform from global into the ego vehicle frame for the timestamp of the image.
poserecord = nusc.get('ego_pose', cam['ego_pose_token'])
pc.translate(-np.array(poserecord['translation']))
pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix.T)
# Fourth step: transform from ego into the camera.
cs_record = nusc.get('calibrated_sensor', cam['calibrated_sensor_token'])
# 存外参数 - - - - - - - - - - - - - - - - - - - - - - - -
calib_cam2ego = np.eye(4)
calib_cam2ego[:3, :3] = Quaternion(cs_record['rotation']).rotation_matrix
calib_cam2ego[:3, -1] = np.array(cs_record['translation']).reshape(3)
calibs['calib_cam2ego'] = calib_cam2ego
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
pc.translate(-np.array(cs_record['translation']))
pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix.T)
# Fifth step: actually take a "picture" of the point cloud.
# Grab the depths (camera frame z axis points away from the camera).
depths = pc.points[2, :] # depths.shape = (1,34784)
raw_depth = pc.points[2]
gt_from = 'lidarseg'
# semantic_table = getattr(nusc, gt_from)
coloring = depths
# Take the actual picture (matrix multiplication with camera-matrix + renormalization).
points = view_points(pc.points[:3, :], np.array(cs_record['camera_intrinsic']), normalize=True) # depths.shape = (3,34784)
np.savetxt('./intrinsic.txt', np.array(cs_record['camera_intrinsic']))
# TEMP TO DELETE ==================
# points[2, :] = depths
# temp_0, temp_1 = copy.deepcopy(points[0, :]), copy.deepcopy(points[1, :])
# points[0, :], points[1, :] = temp_1, -temp_0
# =================================
min_dist = 0
mask = np.ones(depths.shape[0], dtype=bool)
mask = np.logical_and(mask, depths > min_dist)
mask = np.logical_and(mask, points[0, :] > 1)
mask = np.logical_and(mask, points[0, :] < im.size[0] - 1)
mask = np.logical_and(mask, points[1, :] > 1)
mask = np.logical_and(mask, points[1, :] < im.size[1] - 1)
# TEMP TO DELETE =========================================
points[2, :] = raw_depth
# ========================================================
points = points[:, mask]
# 返回变换矩阵
T = T[:,np.newaxis]
transcation = np.concatenate([R,T],axis=1)
transcation = np.concatenate([transcation,np.array([0,0,0,1])
[np.newaxis,:]], axis=0)
transcation = np.around(transcation, 6)
H,W,C = img.shape
# points是(3, 3053)且都是离散的数字,需要将points变成(1,1600,900)的图。
points[:2,:] = np.around(points[:2,:])
temp_depth = np.zeros((H,W,1)) #没有的存成200
for i in range(len(points[0])):
temp_depth[ min(H, int(points[1][i])), min(W, int(points[0][i]))] = points[2,i]
depth = (temp_depth * 1000.0).astype(np.uint16)[..., 0]
return img, depth, transcation, calibs
import cv2
import os
import numpy as np
def transcation_to_txt(i, transcation, scene_id):
file = open('/home/wuke/nuScenes/scene_{}/pose/'.format(scene_id)+str(i) + '.txt', 'w')
text = ''
text = str(transcation[0][0]) + ' ' + str(transcation[0][1]) + ' ' + str(transcation[0][2]) + ' ' + str(transcation[0][3]) + '\n'
text += str(transcation[1][0]) + ' ' + str(transcation[1][1]) + ' ' + str(transcation[1][2]) + ' ' + str(transcation[1][3]) + '\n'
text += str(transcation[2][0]) + ' ' + str(transcation[2][1]) + ' ' + str(transcation[2][2]) + ' ' + str(transcation[2][3]) + '\n'
text += str(transcation[3][0]) + ' ' + str(transcation[3][1]) + ' ' + str(transcation[3][2]) + ' ' + str(transcation[3][3])
file.write(text)
def get_sample_id_list(scene_id):
scene = nusc.scene[scene_id]
sample_tokens = nusc.field2token('sample', 'scene_token', scene['token'])
sample_list = []
for sample_token in sample_tokens:
sample = nusc.get('sample', sample_token)
sample_list.append(sample)
return sample_list
def main(scene_id = 0):
# scene_id = 0
# global_pc = np.empty((0, 4))
sample_list = get_sample_id_list(scene_id)
for i in range(len(sample_list)): # sample_id_list:
# my_sample = nusc.sample[i]
my_sample = sample_list[i]
img, depth, transcation, calibs = token_to_ipt(my_sample)
# calibs是一个字典,{'calib_lidar2ego':, 'calib_cam2ego':, 'calib_ego2world':}
cv2.imwrite('/home/wuke/nuScenes/scene_{}/color/'.format(scene_id)+ str(i) + '.jpg', cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
# depth = cv2.resize(depth,(640,480))
cv2.imwrite('/home/wuke/nuScenes/scene_{}/depth/'.format(scene_id)+str(i) + '.png', depth, [int(cv2.IMWRITE_PNG_COMPRESSION), 1])
transcation_to_txt(i, transcation, scene_id)
# 存外参捏
for key in list(calibs.keys()):
np.savetxt('/home/wuke/nuScenes/scene_{}/{}/{}.txt'.format(scene_id, key, i), calibs[key])
# np.save('/home/wuke/nuScenes/scene_{}/global_pc.npy'.format(scene_id), global_pc)
if __name__ == '__main__':
for out in [68]:
# out -= 1
os.makedirs('/home/wuke/nuScenes/scene_{}/'.format(out), exist_ok=True)
os.makedirs('/home/wuke/nuScenes/scene_{}/depth'.format(out), exist_ok=True)
os.makedirs('/home/wuke/nuScenes/scene_{}/color'.format(out), exist_ok=True)
os.makedirs('/home/wuke/nuScenes/scene_{}/pose'.format(out), exist_ok=True)
# 存一下小外参
os.makedirs('/home/wuke/nuScenes/scene_{}/calib_lidar2ego'.format(out), exist_ok=True)
os.makedirs('/home/wuke/nuScenes/scene_{}/calib_cam2ego'.format(out), exist_ok=True)
os.makedirs('/home/wuke/nuScenes/scene_{}/calib_ego2world'.format(out), exist_ok=True)
main(out)
4.2 nuScenes-devkit 的API解析
- 正经代码
pointsensor_token = sample_record['data'][pointsensor_channel]
camera_token = sample_record['data'][camera_channel]
cam = nusc.get('sample_data', camera_token)
pointsensor = nusc.get('sample_data', pointsensor_token)
# ① 雷达坐标系 → 小车坐标系
cs_record = nusc.get('calibrated_sensor', pointsensor['calibrated_sensor_token'])
pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix)
pc.translate(np.array(cs_record['translation']))
# ② 小车坐标系 → 世界坐标系
poserecord = nusc.get('ego_pose', pointsensor['ego_pose_token'])
# The ego_pose is the output of a lidar map-based localization algorithm described in our paper. The localization is 2-dimensional in the x-y plane.
pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix)
pc.translate(np.array(poserecord['translation']))
# ③ 世界坐标系 → 小车坐标系
poserecord = nusc.get('ego_pose', cam['ego_pose_token'])
pc.translate(-np.array(poserecord['translation']))
pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix.T)
# ④ 小车坐标系 → 相机坐标系
cs_record = nusc.get('calibrated_sensor', cam['calibrated_sensor_token'])
pc.translate(-np.array(cs_record['translation']))
pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix.T)
# ⑤ 相机坐标系 → 像素坐标系(u, v, depth)
points = view_points(pc.points[:3, :], np.array(cs_record['camera_intrinsic']), normalize=True) # depths.shape = (3,34784)
-
pc类:
def translate(self, x: np.ndarray) -> None: """ Applies a translation to the point cloud. :param x: <np.float: 3, 1>. Translation in x, y, z. """ for i in range(3): self.points[i, :] = self.points[i, :] + x[i] def rotate(self, rot_matrix: np.ndarray) -> None: """ Applies a rotation. :param rot_matrix: <np.float: 3, 3>. Rotation matrix. """ self.points[:3, :] = np.dot(rot_matrix, self.points[:3, :])
-
现在需要的是:像素→相机→小车→世界,即需要⑤④②
emmm,假设后面的变换全部都是按照人家原本的先旋转后平移的小策略进行,然后存一个各个变换的小矩阵。
4.3 完整的坐标变换代码
-
tf_nuScenes.py
import numpy as np import cv2 import copy ''' intrinsic: _ _ | fv 0 cv | | 0 fu cu | |_0 0 1_| pixel_coord: _ _ _ _v | | |u cam_coord: /z / /_ _ _ _ x | | |y world_coord:
def transform(x_raw, source, target, intrinsic=None, calibs=None):
if source == "cam" and target == "pixel":
'''
取points的前面三个值,即(N, 3)
x.shape = (N, 3)
'''
x = copy.deepcopy(x_raw)
points = x.T # (3, N)
fu, fv, cu, cv = intrinsic['fu'], intrinsic['fv'], intrinsic['cu'], intrinsic['cv']
view = np.eye(4)
view[0,0], view[1,1], view[0,2], view[1,2] = fv, fu, cv, cu
points = np.concatenate((points, np.ones((1, points.shape[1]))), axis=0) # (4, N)
points = np.dot(view, points)
points = points[:3, :]
points = points / points[2, :].reshape(1, -1)
y = points
elif source == "depth" and target == "cam":
'''
x.shape = (H, W)
'''
fu, fv, cu, cv = intrinsic['fu'], intrinsic['fv'], intrinsic['cu'], intrinsic['cv']
x = copy.deepcopy(x_raw)
u, v = np.nonzero(x)
z_cam = x[u, v] # (N ,)
x_cam = (v - cv) * z_cam / fv # (N ,)
y_cam = (u - cu) * z_cam / fu # (N ,)
y = np.concatenate([x_cam[:,np.newaxis], y_cam[:,np.newaxis], z_cam[:,np.newaxis]], axis=-1) # (N, 3)
elif source == "pixel" and target == "cam":
'''
x.shape = (N, 3)
'''
x = copy.deepcopy(x_raw)
fu, fv, cu, cv = intrinsic['fu'], intrinsic['fv'], intrinsic['cu'], intrinsic['cv']
u, v, z_cam = x[:, 0], x[:, 1], x[:, 2]
x_cam = (v - cv) * z_cam / fv # (N ,)
y_cam = (u - cu) * z_cam / fu # (N ,)
y = np.concatenate([x_cam[:,np.newaxis], y_cam[:,np.newaxis], z_cam[:,np.newaxis]], axis=-1) # (N, 3)
elif source == "cam" and target == "world":
'''
x.shape = (N, 3)
'''
x = copy.deepcopy(x_raw)
# STEP 1 相机坐标系转小车坐标系(先旋转后平移)
calib_cam2ego = calibs['calib_cam2ego']
x_ego = np.dot(calib_cam2ego[:3,:3], x.T).T + calib_cam2ego[:3,-1].reshape(1, 3) # (N, 3)
# STEP 2 小车坐标系转世界坐标系
calib_ego2world = calibs['calib_ego2world']
x_world = np.dot(calib_ego2world[:3,:3], x_ego.T).T + calib_ego2world[:3,-1].reshape(1, 3) # (N, 3)
y = x_world
elif source == "depth" and target == "world":
x = transform(x_raw, source="depth", target="cam", intrinsic=intrinsic)
y = transform(x, source="cam", target="world", calibs=calibs)
return y
可视化
import os
import numpy as np
import cv2
import open3d as o3d
import sys
sys.path.append('D:/DESKTOP/1014/')
from tf_nuScenes import *
def get_single_frame_data(idx):
DATA_PATH = 'D:/DESKTOP/1014/DATA/OSDF_nuScenes_Scene68/'
rgb = cv2.imread(DATA_PATH + '/color/{}.jpg'.format(idx))
sparse_depth = cv2.imread(DATA_PATH + '/depth/{}.png'.format(idx), cv2.IMREAD_UNCHANGED) / 1000.0
calib_cam2ego = np.loadtxt(DATA_PATH + '/calib_cam2ego/{}.txt'.format(idx))
calid_ego2world = np.loadtxt(DATA_PATH + '/calib_ego2world/{}.txt'.format(idx))
return rgb, sparse_depth, {'calib_cam2ego': calib_cam2ego, 'calib_ego2world': calid_ego2world}
intrinsic= {'fu': 1.266417203046554050e+03, 'fv': 1.266417203046554050e+03, 'cu': 4.915070657929475715e+02, 'cv': 8.162670197447984037e+02}
pcd_all = o3d.geometry.PointCloud()
for idx in range(40):
rgb, sparse_depth, calibs = get_single_frame_data(idx)
rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)
mask = sparse_depth>0
colors = rgb[mask]
pc_world = transform(sparse_depth, 'depth', 'world', intrinsic=intrinsic, calibs=calibs)
pcd_world = o3d.geometry.PointCloud()
pcd_world.points = o3d.utility.Vector3dVector(pc_world.reshape(-1, 3))
pcd_world.colors = o3d.utility.Vector3dVector(colors.reshape(-1, 3)/255.0)
pcd_all += pcd_world
o3d.visualization.draw_geometries([pcd_all])
4.4 效果
五、三种数据集的pose格式相互转换
因为我需要把VKITTI2转换成ScanNet的pose格式才能丢到NICE-SLAM里面跑。
这一节实现的功能就是比如:我的数据集是VKITTI2,我想用ScanNet的读数据方式读VKITTI2并且做点云累计。
5.1 理论介绍
-
ScanNet
- txt中直接load的pose:
[ r 11 r 12 r 13 t 1 r 21 r 22 r 23 t 2 r 31 r 32 r 33 t 3 0 0 0 1 ] \begin{bmatrix} r_{11} & r_{12} & r_{13} & t_{1} \\ r_{21} & r_{22} & r_{23} & t_{2} \\ r_{31} & r_{32} & r_{33} & t_{3} \\ 0 & 0 & 0 &1 \end{bmatrix} r11r21r310r12r22r320r13r23r330t1t2t31
-
操作方式:
p c _ c a m = ( x c a m y c a m z c a m ) T p c _ w o r l d = ( r 11 x c a m − r 12 y c a m − r 13 z c a m + t 1 r 21 x c a m − r 22 y c a m − r 23 z c a m + t 2 r 31 x c a m − r 32 y c a m − r 33 z c a m + t 3 ) pc\_cam = \begin{pmatrix}x_{cam}&y_{cam}&z_{cam} \end{pmatrix}^T\\ pc\_world = \begin{pmatrix} r_{11}x_{cam} - r_{12}y_{cam} - r_{13}z_{cam} + t_1 \\ r_{21}x_{cam} - r_{22}y_{cam} - r_{23}z_{cam} + t_2 \\ r_{31}x_{cam} - r_{32}y_{cam} - r_{33}z_{cam} + t_3 \end{pmatrix} pc_cam=(xcamycamzcam)Tpc_world= r11xcam−r12ycam−r13zcam+t1r21xcam−r22ycam−r23zcam+t2r31xcam−r32ycam−r33zcam+t3
-
VKITTI2
- txt中直接load的pose:
[ r 11 r 12 r 13 t 1 r 21 r 21 r 23 t 2 r 31 r 32 r 33 t 3 0 0 0 1 ] \begin{bmatrix} r_{11} & r_{12} & r_{13} & t_{1} \\ r_{21} & r_{21} & r_{23} & t_{2} \\ r_{31} & r_{32} & r_{33} & t_{3} \\ 0 & 0 & 0 &1 \end{bmatrix} r11r21r310r12r21r320r13r23r330t1t2t31
-
操作方式
p c _ c a m = ( x c a m y c a m z c a m ) T p c _ w o r l d = ( − r 11 x c a m − r 21 y c a m − r 31 z c a m + 0.1 ( r 11 t 1 + r 21 t 2 + r 31 t 3 ) r 12 x c a m + r 22 y c a m + r 32 z c a m − 0.1 ( r 12 t 1 + r 22 t 2 + r 32 t 3 ) r 13 x c a m + r 23 y c a m + r 33 z c a m − 0.1 ( r 13 t 1 + r 23 t 2 + r 33 t 3 ) ) pc\_cam = \begin{pmatrix}x_{cam}&y_{cam}&z_{cam}\end{pmatrix}^T\\ \\ pc\_world = \begin{pmatrix} -r_{11}x_{cam} - r_{21}y_{cam} - r_{31}z_{cam} + 0.1(r_{11}t_1+r_{21}t_2+r_{31}t_3) \\ r_{12}x_{cam} + r_{22}y_{cam} + r_{32}z_{cam} - 0.1(r_{12}t_1+r_{22}t_2+r_{32}t_3) \\ r_{13}x_{cam} + r_{23}y_{cam} + r_{33}z_{cam} - 0.1(r_{13}t_1+r_{23}t_2+r_{33}t_3) \end{pmatrix} pc_cam=(xcamycamzcam)Tpc_world= −r11xcam−r21ycam−r31zcam+0.1(r11t1+r21t2+r31t3)r12xcam+r22ycam+r32zcam−0.1(r12t1+r22t2+r32t3)r13xcam+r23ycam+r33zcam−0.1(r13t1+r23t2+r33t3)
-
KITTI
-
pykitti中读取并且保存的pose
-
代码:
def get_transpose(idx): # cam to velo T_velo_cam3 = np.linalg.inv(dataset.calib.T_cam3_velo) # velo to world oxt = oxts_list[idx] T_w_imu = oxt.T_w_imu T_velo_imu = dataset.calib.T_velo_imu T_imu_velo = inverse_pose(T_velo_imu) T_w_velo = np.dot(T_w_imu, T_imu_velo) transpose = np.dot(T_w_velo, T_velo_cam3) # 保存好以后,KITTI只需要 pc_world = np.matmul(transpose, pc_cam.T).T[:, :3] 即可 return transpose
-
记作:
( r 11 r 12 r 13 t 1 r 21 r 21 r 23 t 2 r 31 r 32 r 33 t 3 0 0 0 1 ) \begin{pmatrix} r_{11} & r_{12} & r_{13} & t_{1} \\ r_{21} & r_{21} & r_{23} & t_{2} \\ r_{31} & r_{32} & r_{33} & t_{3} \\ 0 & 0 & 0 &1 \end{pmatrix} r11r21r310r12r21r320r13r23r330t1t2t31
-
-
操作方式
p c _ c a m = ( x c a m y c a m z c a m ) T p c _ w o r l d = ( r 11 x c a m + r 12 y c a m + r 13 z c a m + t 1 r 21 x c a m + r 22 y c a m + r 23 z c a m + t 2 r 31 x c a m + r 32 y c a m + r 33 z c a m + t 3 ) pc\_cam = \begin{pmatrix}x_{cam}&y_{cam}&z_{cam} \end{pmatrix}^T\\ pc\_world = \begin{pmatrix} r_{11}x_{cam} + r_{12}y_{cam} + r_{13}z_{cam} + t_1 \\ r_{21}x_{cam} + r_{22}y_{cam} + r_{23}z_{cam} + t_2 \\ r_{31}x_{cam} + r_{32}y_{cam} + r_{33}z_{cam} + t_3 \end{pmatrix} pc_cam=(xcamycamzcam)Tpc_world= r11xcam+r12ycam+r13zcam+t1r21xcam+r22ycam+r23zcam+t2r31xcam+r32ycam+r33zcam+t3
-
5.2 代码
pose是用来做cam系到world系坐标变换的,这里直接放代码了:
# tf_exchange.py
import numpy as np
import copy
def exchange(x, source, target):
'''
x.shape = (4, 4)
'''
if source == 'vkitti2' and target == 'scannet':
TR = np.eye(4)
x_cp = copy.deepcopy(x)
TR[0,0], TR[0,1], TR[0,2] = -x_cp[0,0], x_cp[1,0], x_cp[2,0]
TR[0,-1] = 0.1 * (x_cp[0,0]*x_cp[0,-1] + x_cp[1,0]*x_cp[1,-1] + x_cp[2,0]*x_cp[2,-1])
TR[1,0], TR[1,1], TR[1,2] = x_cp[0,1], -x_cp[1,1], -x_cp[2,1]
TR[1,-1] = -0.1 * (x_cp[0,1]*x_cp[0,-1] + x_cp[1,1]*x_cp[1,-1] + x_cp[2,1]*x_cp[2,-1])
TR[2,0], TR[2,1], TR[2,2] = x_cp[0,2], -x_cp[1,2], -x_cp[2,2]
TR[2,-1] = -0.1 * (x_cp[0,2]*x_cp[0,-1] + x_cp[1,2]*x_cp[1,-1] + x_cp[2,2]*x_cp[2,-1])
y = TR
elif source == 'scannet' and target == 'vkitti2':
TR = np.eye(4)
TR[0,0], TR[0,1], TR[0,2] = -x[0,0], x[1,0], x[2,0]
TR[1,0], TR[1,1], TR[1,2] = x[0,1], -x[1,1], -x[2,1]
TR[2,0], TR[2,1], TR[2,2] = x[0,2], -x[1,2], -x[2,2]
x_cp = copy.deepcopy(x)
x_cp[0, -1] *= -1
TR[:3, -1] = -10 * np.matmul(np.linalg.inv(TR[:3,:3].T), x_cp[:3, -1])
y = TR
elif source == 'kitti' and target == 'scannet':
TR = copy.deepcopy(x)
TR[:3, 1] *= -1
TR[:3, 2] *= -1
y = TR
elif source == 'scannet' and target == 'kitti':
TR = copy.deepcopy(x)
TR[:3, 1] *= -1
TR[:3, 2] *= -1
y = TR
elif source == 'vkitti2' and target == 'kitti':
x = exchange(x, 'vkitti2', 'scannet')
y = exchange(x, 'scannet', 'kitti')
elif source == 'kitti' and target == 'vkitti2':
x = exchange(x, 'kitti', 'scannet')
y = exchange(x, 'scannet', 'vkitti2')
return y