BEV感知的

m0_46973820

已于 2023-06-16 17:01:22 修改

阅读量101

点赞数

文章标签： python 深度学习开发语言

于 2023-06-13 23:04:33 首次发布

本文链接：https://blog.csdn.net/m0_46973820/article/details/131139494

版权

import torch
"""
        x_bounds: Forward direction in the ego-car.
        y_bounds: Sides
        z_bounds: Height
"""
x_bounds = [-50.0, 50.0, 0.5]
y_bounds = [-50.0, 50.0, 0.5]
z_bounds = [-10.0, 10.0, 20.0]
d_bounds = [2.0, 50.0, 1.0]

bev_resolution = torch.tensor([row[2] for row in [x_bounds, y_bounds, z_bounds]])
bev_start_position = torch.tensor([row[0] + row[2] / 2.0 for row in [x_bounds, y_bounds, z_bounds]])
bev_dimension = torch.tensor([(row[1] - row[0]) / row[2] for row in [x_bounds, y_bounds, z_bounds]],dtype=torch.long)

tensor([ 0.5000, 0.5000, 20.0000]) x、y和z方向上的分辨率每个像素点所代表的实际长度
tensor([-49.7500, -49.7500, 0.0000]) x、y和z方向上的起始位置
tensor([200, 200, 1]) 在x、y和z方向上的鸟瞰图空间维度第一个数即鸟瞰图的宽度（像素数量）

x_grid = torch.linspace(0, 480-1, 60, dtype=torch.float)
"""
tensor([  0.0000,   8.1186,  16.2373,  24.3559,  32.4746,  40.5932,  48.7119,
         56.8305,  64.9492,  73.0678,  81.1864,  89.3051,  97.4237, 105.5424,
        113.6610, 121.7797, 129.8983, 138.0169, 146.1356, 154.2542, 162.3729,
        170.4915, 178.6102, 186.7288, 194.8474, 202.9661, 211.0847, 219.2034,
        227.3220, 235.4407, 243.5593, 251.6779, 259.7966, 267.9153, 276.0339,
        284.1526, 292.2712, 300.3899, 308.5085, 316.6271, 324.7458, 332.8644,
        340.9831, 349.1017, 357.2204, 365.3390, 373.4576, 381.5763, 389.6949,
        397.8136, 405.9322, 414.0508, 422.1695, 430.2881, 438.4068, 446.5254,
        454.6441, 462.7627, 470.8813, 479.0000])
"""
x_grid.view(1, 1, 60)
"""
tensor([[[  0.0000,   8.1186,  16.2373,  24.3559,  32.4746,  40.5932,  48.7119,
           56.8305,  64.9492,  73.0678,  81.1864,  89.3051,  97.4237, 105.5424,
          113.6610, 121.7797, 129.8983, 138.0169, 146.1356, 154.2542, 162.3729,
          170.4915, 178.6102, 186.7288, 194.8474, 202.9661, 211.0847, 219.2034,
          227.3220, 235.4407, 243.5593, 251.6779, 259.7966, 267.9153, 276.0339,
          284.1526, 292.2712, 300.3899, 308.5085, 316.6271, 324.7458, 332.8644,
          340.9831, 349.1017, 357.2204, 365.3390, 373.4576, 381.5763, 389.6949,
          397.8136, 405.9322, 414.0508, 422.1695, 430.2881, 438.4068, 446.5254,
          454.6441, 462.7627, 470.8813, 479.0000]]])
"""
x_grid.view(1, 1, 60).expand(48, 24, 60)
"""
48块，每块24行，每行60列
"""

import torch

FINAL_DIM = (224, 480)
encoder_downsample = 8
D_BOUND = [2.0, 50.0, 1.0]
h, w = FINAL_DIM
downsampled_h, downsampled_w = h // encoder_downsample, w // encoder_downsample

# Depth grid
depth_grid = torch.arange(*D_BOUND, dtype=torch.float)
depth_grid = depth_grid.view(-1, 1, 1).expand(-1, downsampled_h, downsampled_w)
n_depth_slices = depth_grid.shape[0]

# x and y grids
x_grid = torch.linspace(0, w - 1, downsampled_w, dtype=torch.float)
x_grid = x_grid.view(1, 1, downsampled_w).expand(n_depth_slices, downsampled_h, downsampled_w)
y_grid = torch.linspace(0, h - 1, downsampled_h, dtype=torch.float)
y_grid = y_grid.view(1, downsampled_h, 1).expand(n_depth_slices, downsampled_h, downsampled_w)

frustum = torch.stack((x_grid, y_grid, depth_grid), -1)

import torch

FINAL_DIM = (224, 480)
encoder_downsample = 8
D_BOUND = [2.0, 50.0, 1.0]
h, w = FINAL_DIM
downsampled_h, downsampled_w = h // encoder_downsample, w // encoder_downsample

# Depth grid
depth_grid = torch.arange(*D_BOUND, dtype=torch.float)
print(depth_grid)
depth_grid = depth_grid.view(-1, 1, 1).expand(-1, downsampled_h, downsampled_w)
n_depth_slices = depth_grid.shape[0]

# x and y grids
x_grid = torch.linspace(0, w - 1, downsampled_w, dtype=torch.float)
print(x_grid)
x_grid = x_grid.view(1, 1, downsampled_w).expand(n_depth_slices, downsampled_h, downsampled_w)
y_grid = torch.linspace(0, h - 1, downsampled_h, dtype=torch.float)
print(y_grid)
y_grid = y_grid.view(1, downsampled_h, 1).expand(n_depth_slices, downsampled_h, downsampled_w)

frustum = torch.stack((x_grid, y_grid, depth_grid), -1)

print(frustum[-1,-1,-1,:])

import matplotlib.pyplot as plt

# 将frustum转换为Numpy数组
frustum_np = frustum.detach().cpu().numpy()

# 创建散点图
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(frustum_np[:, :, :, 0].flatten(),
           frustum_np[:, :, :, 1].flatten(),
           frustum_np[:, :, :, 2].flatten(),
           s=1)

# 设置坐标轴标签
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('depth')

# 显示图像
plt.show()

points = frustum.unsqueeze(0).unsqueeze(0).unsqueeze(-1)
points = torch.cat((points[:, :, :, :, :, :2] * points[:, :, :, :, :, 2:3], points[:, :, :, :, :, 2:3]), 5)

bev_resolution = torch.tensor([row[2] for row in [x_bounds, y_bounds, z_bounds]])
bev_start_position = torch.tensor([row[0] + row[2] / 2.0 for row in [x_bounds, y_bounds, z_bounds]])
bev_dimension = torch.tensor([(row[1] - row[0]) / row[2] for row in [x_bounds, y_bounds, z_bounds]],dtype=torch.long)
resolution = [ 0.5000,  0.5000, 20.0000]
geometry = (points[0] - (bev_start_position - bev_resolution/2))/bev_resolution
geometry = geometry.view(-1,3).long()
print(geometry.size())
"""
torch.Size([80640, 3])
"""

import numpy as np
import matplotlib.pyplot as plt


SE3 = np.array([[0.707, -0.707, 0, 1], [0.707, 0.707, 0, 2], [0, 0, 1, 3], [0, 0, 0, 1]])  # 旋转45度，平移向量为[1, 2, 3]

homogeneous_geometry = np.concatenate((geometry, np.ones((geometry.shape[0], 1))), axis=1)  # 扩展为4乘N的齐次坐标
result = np.dot(SE3, homogeneous_geometry.T).T[:, :3]  # 右乘SE(3)群，去掉最后一列，得到3乘N的新坐标


# 取出x,y,z坐标
x = result[:, 0]
y = result[:, 1]
z = result[:, 2]

# 绘制散点图
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(x, y, z, s=1, c='b', marker='.')
plt.show()

ranks = (geometry[:, 0] * (bev_dimension[1] * bev_dimension[2])
          + geometry[:, 1] * (bev_dimension[2])
          + geometry[:, 2]).detach().cpu().numpy()
ax.scatter(x, y, z, s=1, c=ranks, marker='.')

具体来说，geometry是一个形状为(N, 3)的numpy数组，每行表示一个三维空间点的坐标。bev_dimension是一个形状为(3,)的numpy数组，表示Bird's Eye View（BEV）的长、宽、高。将geometry的每个点的坐标进行线性组合，得到一个一维的numpy数组，用于表示该点在BEV中的位置，其中每个元素的值都表示该点在BEV中某个方向上的位置。这样可以将三维空间中的点映射到一维空间中，方便进行计算和存储。

ranks = (geometry[:, 0] * (bev_dimension[1] * bev_dimension[2])
          + geometry[:, 1] * (bev_dimension[2])
          + geometry[:, 2])
ranks_indices = ranks.argsort()
ranks = ranks[ranks_indices].detach().cpu().numpy()

颜色按照ranks显示

mask = torch.ones(geometry.shape[0], device=geometry.device, dtype=torch.bool)
mask[:-1] = ranks[1:] != ranks[:-1]
geometry = geometry[mask]
ranks = ranks[mask]
geometry = geometry.detach().cpu().numpy()
ranks = ranks.detach().cpu().numpy()

ranks = ranks[ranks_indices]

BEV特征其中每个像素表示空间中的一个小区域

逐元素相乘

import torch

x1 = torch.randn(32, 1, 48, 28, 60)
x2 = torch.randn(32, 64, 1, 28, 60)

# 将x1和x2的形状扩充为[32, 64, 48, 28, 60]
x1_broadcasted = x1.expand(-1, 64, -1, -1, -1)
x2_broadcasted = x2.expand(-1, -1, 48, -1, -1)

# 逐元素相乘
result = x1_broadcasted * x2_broadcasted

print(result.shape)  # torch.Size([32, 64, 48, 28, 60])

最终·

import torch
import numpy as np
import matplotlib.pyplot as plt

FINAL_DIM = (224, 480)
encoder_downsample = 8
h, w = FINAL_DIM
downsampled_h, downsampled_w = h // encoder_downsample, w // encoder_downsample

# Depth grid
D_BOUND = [2.0, 50.0, 1.0]
depth_grid = torch.arange(*D_BOUND, dtype=torch.float)# print(depth_grid)
depth_grid = depth_grid.view(-1, 1, 1).expand(-1, downsampled_h, downsampled_w)
n_depth_slices = depth_grid.shape[0]

# x and y grids
x_grid = torch.linspace(0, w - 1, downsampled_w, dtype=torch.float)# print(x_grid)
x_grid = x_grid.view(1, 1, downsampled_w).expand(n_depth_slices, downsampled_h, downsampled_w)
y_grid = torch.linspace(0, h - 1, downsampled_h, dtype=torch.float)# print(y_grid)
y_grid = y_grid.view(1, downsampled_h, 1).expand(n_depth_slices, downsampled_h, downsampled_w)

# Dimension (n_depth_slices, downsampled_h, downsampled_w, 3)
# containing data points in the image: left-right, top-bottom, depth
frustum = torch.stack((x_grid, y_grid, depth_grid), -1)
# Add a dummy dimension at the end
points = frustum.unsqueeze(-1)

# Camera to ego reference frame
extrinsics_np = np.array([[0.707, -0.707, 0, 0], [0.707, 0.707, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]])  # 旋转45度，平移向量为[0, 0, 0]
extrinsics = torch.from_numpy(extrinsics_np.astype(np.float32))
intrinsics = torch.tensor([[1260.8474446004698,0.0,807.968244525554],[0.0,1260.8474446004698,495.3344268742088],[0.0,0.0,1.0]])
rotation, translation = extrinsics[:3, :3], extrinsics[ :3, 3]
points = torch.cat((points[ :, :, :, :2] * points[:, :, :, 2:3], points[ :, :, :, 2:3]), 3)
combined_transformation = rotation.matmul(torch.inverse(intrinsics))
points = combined_transformation.view( 1, 1, 1, 3, 3).matmul(points).squeeze(-1)
points += translation.view(1, 1, 1, 3)# torch.Size([48, 28, 60, 3])
# The 3 dimensions in the ego reference frame are: (forward, sides, height)


# 将最后一个维度拉平成 (N, 3) 形状
result = frustum.reshape(-1, 3)
# 取出x,y,z坐标
x = result[:, 0]
z = result[:, 1]
y = result[:, 2]
# 绘制散点图
fig = plt.figure()
ax = fig.add_subplot(121, projection='3d')
ax.scatter(x, y, z, s=1, c='b', marker='.')

# 将最后一个维度拉平成 (N, 3) 形状
result1 = points.reshape(-1, 3)
# 取出x,y,z坐标
x1 = result1[:, 0]
z1 = result1[:, 1]
y1 = result1[:, 2]
# 绘制散点图
ax1 = fig.add_subplot(122, projection='3d')
ax1.scatter(x1, y1, z1, s=1, c='b', marker='.')
plt.show()