3D Bounding Box Estimation Using Deep Learning and Geometry 代码笔记
代码链接: https://github.com/skhadem/3D-BoundingBox
论文笔记链接:https://blog.csdn.net/wuchaohuo724/article/details/116081473
1. 网络结构
先看以下网络结构
2. 代码实现
从上图可以看到,从一个feature map后面添加三个分支:
- Dimensions Regression
- Orientation Regression
- Confidences Regression
代码比较简单明了。
class Model(nn.Module):
def __init__(self, features=None, bins=2, w = 0.4):
super(Model, self).__init__()
self.bins = bins
self.w = w
self.features = features
self.orientation = nn.Sequential(
nn.Linear(512 * 7 * 7, 256),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(256, 256),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(256, bins*2) # to get sin and cos
)
self.confidence = nn.Sequential(
nn.Linear(512 * 7 * 7, 256),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(256, 256),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(256, bins),
# nn.Softmax()
#nn.Sigmoid()
)
self.dimension = nn.Sequential(
nn.Linear(512 * 7 * 7, 512),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(512, 512),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(512, 3)
)
def forward(self, x):
x = self.features(x) # 512 x 7 x 7
x = x.view(-1, 512 * 7 * 7)
orientation = self.orientation(x)
orientation = orientation.view(-1, self.bins, 2)
orientation = F.normalize(orientation, dim=2)
confidence = self.confidence(x)
dimension = self.dimension(x)
return orientation, confidence, dimension
都是比较直接的结构。下面来看看Loss function。
conf_loss_func = nn.CrossEntropyLoss().cuda()
dim_loss_func = nn.MSELoss().cuda()
orient_loss_func = OrientationLoss
orient_loss = orient_loss_func(orient, truth_orient, truth_conf)
dim_loss = dim_loss_func(dim, truth_dim)
truth_conf = torch.max(truth_conf, dim=1)[1]
conf_loss = conf_loss_func(conf, truth_conf)
loss_theta = conf_loss + w * orient_loss
loss = alpha * dim_loss + loss_theta
def OrientationLoss(orient_batch, orientGT_batch, confGT_batch):
batch_size = orient_batch.size()[0]
indexes = torch.max(confGT_batch, dim=1)[1]
# extract just the important bin
orientGT_batch = orientGT_batch[torch.arange(batch_size), indexes]
orient_batch = orient_batch[torch.arange(batch_size), indexes]
theta_diff = torch.atan2(orientGT_batch[:,1], orientGT_batch[:,0])
estimated_theta_diff = torch.atan2(orient_batch[:,1], orient_batch[:,0])
return -1 * torch.cos(theta_diff - estimated_theta_diff).mean()
Reference
- https://github.com/skhadem/3D-BoundingBox