import torch
"""
x_bounds: Forward direction in the ego-car.
y_bounds: Sides
z_bounds: Height
"""
x_bounds = [-50.0, 50.0, 0.5]
y_bounds = [-50.0, 50.0, 0.5]
z_bounds = [-10.0, 10.0, 20.0]
d_bounds = [2.0, 50.0, 1.0]
bev_resolution = torch.tensor([row[2] for row in [x_bounds, y_bounds, z_bounds]])
bev_start_position = torch.tensor([row[0] + row[2] / 2.0 for row in [x_bounds, y_bounds, z_bounds]])
bev_dimension = torch.tensor([(row[1] - row[0]) / row[2] for row in [x_bounds, y_bounds, z_bounds]],dtype=torch.long)
tensor([ 0.5000, 0.5000, 20.0000]) x、y和z方向上的分辨率每个像素点所代表的实际长度
tensor([-49.7500, -49.7500, 0.0000]) x、y和z方向上的起始位置
tensor([200, 200, 1]) 在x、y和z方向上的鸟瞰图空间维度第一个数即鸟瞰图的宽度(像素数量)
x_grid = torch.linspace(0, 480-1, 60, dtype=torch.float)
"""
tensor([ 0.0000, 8.1186, 16.2373, 24.3559, 32.4746, 40.5932, 48.7119,
56.8305, 64.9492, 73.0678, 81.1864, 89.3051, 97.4237, 105.5424,
113.6610, 121.7797, 129.8983, 138.0169, 146.1356, 154.2542, 162.3729,
170.4915, 178.6102, 186.7288, 194.8474, 202.9661, 211.0847, 219.2034,
227.3220, 235.4407, 243.5593, 251.6779, 259.7966, 267.9153, 276.0339,
284.1526, 292.2712, 300.3899, 308.5085, 316.6271, 324.7458, 332.8644,
340.9831, 349.1017, 357.2204, 365.3390, 373.4576, 381.5763, 389.6949,
397.8136, 405.9322, 414.0508, 422.1695, 430.2881, 438.4068, 446.5254,
454.6441, 462.7627, 470.8813, 479.0000])
"""
x_grid.view(1, 1, 60)
"""
tensor([[[ 0.0000, 8.1186, 16.2373, 24.3559, 32.4746, 40.5932, 48.7119,
56.8305, 64.9492, 73.0678, 81.1864, 89.3051, 97.4237, 105.5424,
113.6610, 121.7797, 129.8983, 138.0169, 146.1356, 154.2542, 162.3729,
170.4915, 178.6102, 186.7288, 194.8474, 202.9661, 211.0847, 219.2034,
227.3220, 235.4407, 243.5593, 251.6779, 259.7966, 267.9153, 276.0339,
284.1526, 292.2712, 300.3899, 308.5085, 316.6271, 324.7458, 332.8644,
340.9831, 349.1017, 357.2204, 365.3390, 373.4576, 381.5763, 389.6949,
397.8136, 405.9322, 414.0508, 422.1695, 430.2881, 438.4068, 446.5254,
454.6441, 462.7627, 470.8813, 479.0000]]])
"""
x_grid.view(1, 1, 60).expand(48, 24, 60)
"""
48块,每块24行,每行60列
"""
import torch
FINAL_DIM = (224, 480)
encoder_downsample = 8
D_BOUND = [2.0, 50.0, 1.0]
h, w = FINAL_DIM
downsampled_h, downsampled_w = h // encoder_downsample, w // encoder_downsample
# Depth grid
depth_grid = torch.arange(*D_BOUND, dtype=torch.float)
depth_grid = depth_grid.view(-1, 1, 1).expand(-1, downsampled_h, downsampled_w)
n_depth_slices = depth_grid.shape[0]
# x and y grids
x_grid = torch.linspace(0, w - 1, downsampled_w, dtype=torch.float)
x_grid = x_grid.view(1, 1, downsampled_w).expand(n_depth_slices, downsampled_h, downsampled_w)
y_grid = torch.linspace(0, h - 1, downsampled_h, dtype=torch.float)
y_grid = y_grid.view(1, downsampled_h, 1).expand(n_depth_slices, downsampled_h, downsampled_w)
frustum = torch.stack((x_grid, y_grid, depth_grid), -1)
import torch
FINAL_DIM = (224, 480)
encoder_downsample = 8
D_BOUND = [2.0, 50.0, 1.0]
h, w = FINAL_DIM
downsampled_h, downsampled_w = h // encoder_downsample, w // encoder_downsample
# Depth grid
depth_grid = torch.arange(*D_BOUND, dtype=torch.float)
print(depth_grid)
depth_grid = depth_grid.view(-1, 1, 1).expand(-1, downsampled_h, downsampled_w)
n_depth_slices = depth_grid.shape[0]
# x and y grids
x_grid = torch.linspace(0, w - 1, downsampled_w, dtype=torch.float)
print(x_grid)
x_grid = x_grid.view(1, 1, downsampled_w).expand(n_depth_slices, downsampled_h, downsampled_w)
y_grid = torch.linspace(0, h - 1, downsampled_h, dtype=torch.float)
print(y_grid)
y_grid = y_grid.view(1, downsampled_h, 1).expand(n_depth_slices, downsampled_h, downsampled_w)
frustum = torch.stack((x_grid, y_grid, depth_grid), -1)
print(frustum[-1,-1,-1,:])
import matplotlib.pyplot as plt
# 将frustum转换为Numpy数组
frustum_np = frustum.detach().cpu().numpy()
# 创建散点图
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(frustum_np[:, :, :, 0].flatten(),
frustum_np[:, :, :, 1].flatten(),
frustum_np[:, :, :, 2].flatten(),
s=1)
# 设置坐标轴标签
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('depth')
# 显示图像
plt.show()
points = frustum.unsqueeze(0).unsqueeze(0).unsqueeze(-1)
points = torch.cat((points[:, :, :, :, :, :2] * points[:, :, :, :, :, 2:3], points[:, :, :, :, :, 2:3]), 5)
bev_resolution = torch.tensor([row[2] for row in [x_bounds, y_bounds, z_bounds]])
bev_start_position = torch.tensor([row[0] + row[2] / 2.0 for row in [x_bounds, y_bounds, z_bounds]])
bev_dimension = torch.tensor([(row[1] - row[0]) / row[2] for row in [x_bounds, y_bounds, z_bounds]],dtype=torch.long)
resolution = [ 0.5000, 0.5000, 20.0000]
geometry = (points[0] - (bev_start_position - bev_resolution/2))/bev_resolution
geometry = geometry.view(-1,3).long()
print(geometry.size())
"""
torch.Size([80640, 3])
"""
import numpy as np
import matplotlib.pyplot as plt
SE3 = np.array([[0.707, -0.707, 0, 1], [0.707, 0.707, 0, 2], [0, 0, 1, 3], [0, 0, 0, 1]]) # 旋转45度,平移向量为[1, 2, 3]
homogeneous_geometry = np.concatenate((geometry, np.ones((geometry.shape[0], 1))), axis=1) # 扩展为4乘N的齐次坐标
result = np.dot(SE3, homogeneous_geometry.T).T[:, :3] # 右乘SE(3)群,去掉最后一列,得到3乘N的新坐标
# 取出x,y,z坐标
x = result[:, 0]
y = result[:, 1]
z = result[:, 2]
# 绘制散点图
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(x, y, z, s=1, c='b', marker='.')
plt.show()
ranks = (geometry[:, 0] * (bev_dimension[1] * bev_dimension[2])
+ geometry[:, 1] * (bev_dimension[2])
+ geometry[:, 2]).detach().cpu().numpy()
ax.scatter(x, y, z, s=1, c=ranks, marker='.')
具体来说,geometry是一个形状为(N, 3)的numpy数组,每行表示一个三维空间点的坐标。bev_dimension是一个形状为(3,)的numpy数组,表示Bird's Eye View(BEV)的长、宽、高。将geometry的每个点的坐标进行线性组合,得到一个一维的numpy数组,用于表示该点在BEV中的位置,其中每个元素的值都表示该点在BEV中某个方向上的位置。这样可以将三维空间中的点映射到一维空间中,方便进行计算和存储。
ranks = (geometry[:, 0] * (bev_dimension[1] * bev_dimension[2])
+ geometry[:, 1] * (bev_dimension[2])
+ geometry[:, 2])
ranks_indices = ranks.argsort()
ranks = ranks[ranks_indices].detach().cpu().numpy()
颜色按照ranks显示
mask = torch.ones(geometry.shape[0], device=geometry.device, dtype=torch.bool)
mask[:-1] = ranks[1:] != ranks[:-1]
geometry = geometry[mask]
ranks = ranks[mask]
geometry = geometry.detach().cpu().numpy()
ranks = ranks.detach().cpu().numpy()
ranks = ranks[ranks_indices]
BEV特征其中每个像素表示空间中的一个小区域
逐元素相乘
import torch
x1 = torch.randn(32, 1, 48, 28, 60)
x2 = torch.randn(32, 64, 1, 28, 60)
# 将x1和x2的形状扩充为[32, 64, 48, 28, 60]
x1_broadcasted = x1.expand(-1, 64, -1, -1, -1)
x2_broadcasted = x2.expand(-1, -1, 48, -1, -1)
# 逐元素相乘
result = x1_broadcasted * x2_broadcasted
print(result.shape) # torch.Size([32, 64, 48, 28, 60])
最终·
import torch
import numpy as np
import matplotlib.pyplot as plt
FINAL_DIM = (224, 480)
encoder_downsample = 8
h, w = FINAL_DIM
downsampled_h, downsampled_w = h // encoder_downsample, w // encoder_downsample
# Depth grid
D_BOUND = [2.0, 50.0, 1.0]
depth_grid = torch.arange(*D_BOUND, dtype=torch.float)# print(depth_grid)
depth_grid = depth_grid.view(-1, 1, 1).expand(-1, downsampled_h, downsampled_w)
n_depth_slices = depth_grid.shape[0]
# x and y grids
x_grid = torch.linspace(0, w - 1, downsampled_w, dtype=torch.float)# print(x_grid)
x_grid = x_grid.view(1, 1, downsampled_w).expand(n_depth_slices, downsampled_h, downsampled_w)
y_grid = torch.linspace(0, h - 1, downsampled_h, dtype=torch.float)# print(y_grid)
y_grid = y_grid.view(1, downsampled_h, 1).expand(n_depth_slices, downsampled_h, downsampled_w)
# Dimension (n_depth_slices, downsampled_h, downsampled_w, 3)
# containing data points in the image: left-right, top-bottom, depth
frustum = torch.stack((x_grid, y_grid, depth_grid), -1)
# Add a dummy dimension at the end
points = frustum.unsqueeze(-1)
# Camera to ego reference frame
extrinsics_np = np.array([[0.707, -0.707, 0, 0], [0.707, 0.707, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]]) # 旋转45度,平移向量为[0, 0, 0]
extrinsics = torch.from_numpy(extrinsics_np.astype(np.float32))
intrinsics = torch.tensor([[1260.8474446004698,0.0,807.968244525554],[0.0,1260.8474446004698,495.3344268742088],[0.0,0.0,1.0]])
rotation, translation = extrinsics[:3, :3], extrinsics[ :3, 3]
points = torch.cat((points[ :, :, :, :2] * points[:, :, :, 2:3], points[ :, :, :, 2:3]), 3)
combined_transformation = rotation.matmul(torch.inverse(intrinsics))
points = combined_transformation.view( 1, 1, 1, 3, 3).matmul(points).squeeze(-1)
points += translation.view(1, 1, 1, 3)# torch.Size([48, 28, 60, 3])
# The 3 dimensions in the ego reference frame are: (forward, sides, height)
# 将最后一个维度拉平成 (N, 3) 形状
result = frustum.reshape(-1, 3)
# 取出x,y,z坐标
x = result[:, 0]
z = result[:, 1]
y = result[:, 2]
# 绘制散点图
fig = plt.figure()
ax = fig.add_subplot(121, projection='3d')
ax.scatter(x, y, z, s=1, c='b', marker='.')
# 将最后一个维度拉平成 (N, 3) 形状
result1 = points.reshape(-1, 3)
# 取出x,y,z坐标
x1 = result1[:, 0]
z1 = result1[:, 1]
y1 = result1[:, 2]
# 绘制散点图
ax1 = fig.add_subplot(122, projection='3d')
ax1.scatter(x1, y1, z1, s=1, c='b', marker='.')
plt.show()