PointNet网络模型代码解析
论文地址:https://arxiv.org/pdf/1612.00593
参考代码地址:https://github.com/fxia22/pointnet.pytorch;
T-Net3d
首先数据输入为n*3,然后接一个T-net;
根据论文中的介绍:第一个变换网络是一个迷你PointNet,它以原始点云为输入,回归到3×3矩阵。它由每个点上的共享M LP(64,128,1024)网络(层输出大小为64,128,1024)、跨点的最大池和两个输出大小为512,256的完全连接层组成。输出矩阵初始化为单位矩阵。除最后一层外,所有层都包括ReLU和批处理规范化。
class TNet3d(nn.Module): # 注意这里一般都是继承pytorch中的基类模型
def __init__(self):
#super()函数用于调用父类的一个方法。
# 具体来说,当你在一个类的方法中使用 super().method() 形式时,
# 你实际上是在调用这个方法的父类实现。
super(TNet3d,self).__init__()
#MLP(多层感知机),主要作用是数据的升维处理
self.conv1 = torch.nn.Conv1d(3,64,1)
self.conv2 = torch.nn.Conv1d(64,128,1)
self.conv3 = torch.nn.Conv1d(128,1024,1)
#FC(全连接层),主要作用为数据降维,聚合特征
self.fc1 = nn.Linear(1024,512)
self.fc2 = nn.Linear(512,256)
self.fc3 = nn.Linear(256,9)
self.relu = nn.ReLU()
self.bn1 = nn.BatchNorm1d(64)
self.bn2 = nn.BatchNorm1d(128)
self.bn3 = nn.BatchNorm1d(1024)
self.bn4 = nn.BatchNorm1d(512)
self.bn5 = nn.BatchNorm1d(256)
# forward 函数通常用于定义数据通过神经网络时的前向传播逻辑
# forward 方法定义了如何将输入数据传递通过各个层、激活函数,以及其他可能的操作,然后返回输出结果
# 对一般网络而言查看forward函数最容易了解该网络结构
def forward(self,x):
batchsize = x.size()[0]
x = F.relu(self.bn1(self.conv1(x)))
x = F.relu(self.bn2(self.conv2(x)))
x = F.relu(self.bn3(self.conv3(x)))
x = torch.max(x,2,keepdim=True)[0]
x = x.view(-1,1024)
x = F.relu(self.bn4(self.fc1(x)))
x = F.relu(self.bn5(self.fc2(x)))
x = self.fc3(x)
# 该代码的目的是创建一个包含单位矩阵(经过展平后)的张量,并重复这个张量,使其适应批次处理。
# 假设 batchsize 为 3,那么最终的结果是:
# tensor([[1, 0, 0, 0, 1, 0, 0, 0, 1],
# [1, 0, 0, 0, 1, 0, 0, 0, 1],
# [1, 0, 0, 0, 1, 0, 0, 0, 1]])
# 每一行都是一个展平的3x3单位矩阵。
iden = Variable(torch.from_numpy(np.array([1,0,0,0,1,0,0,0,1]).astype(np.float32))).view(1,9).repeat(batchsize,1)
# iden和x是否使用GPU要一致
if x.is_cuda:
iden = iden.cuda()
x = x + iden
x = x.view(-1,3,3)
return x
T-Netkd
这个类和前面的T-Net3d结果完全一样,只是利用k作为变量进行输入输出
class TNetkd(nn.Module):
def __init__(self,k=64):
super(TNetkd,self).__init__()
self.conv1 = torch.nn.Conv1d(k,64,1)
self.conv2 = torch.nn.Conv1d(64,128,1)
self.conv3 = torch.nn.Conv1d(128,1024,1)
self.fc1 = nn.Linear(1024,512)
self.fc2 = nn.Linear(512,256)
self.fc3 = nn.Linear(256,k*k)
self.relu = nn.ReLU()
self.bn1 = nn.BatchNorm1d(64)
self.bn2 = nn.BatchNorm1d(128)
self.bn3 = nn.BatchNorm1d(1024)
self.bn4 = nn.BatchNorm1d(512)
self.bn5 = nn.BatchNorm1d(256)
self.k = k
def forward(self,x):
batchsize = x.size()[0]
x = F.relu(self.bn1(self.conv1(x)))
x = F.relu(self.bn2(self.conv2(x)))
x = F.relu(self.bn3(self.conv3(x)))
x = x.view(-1,1024)
x = F.relu(self.bn4(self.fc1(x)))
x = F.relu(self.bn5(self.fc2(x)))
x = self.fc3(x)
iden = Variable(torch.from_numpy(np.eye(self.k).flatten().astype(np.float32))).view(1,self.k*self.k).repeat(batchsize,1)
if x.is_cuda:
iden = iden.cuda()
x = x + iden
x = x.view(-1,self.k,self.k)
return x
FeatNet
PointNet中的特征提取层,结合了两个T-Net网络,进行数据升维,提取global feature
class PointNetFeat(nn.Module):
def __init__(self,global_feat = True,feature_transform = False):
super(PointNetFeat,self).__init__()
self.stn = TNet3d()
self.conv1 = torch.nn.Conv1d(3,64,1)
self.conv2 = torch.nn.Conv1d(64,128,1)
self.conv3 = torch.nn.Conv1d(128,1024,1)
self.bn1 = nn.BatchNorm1d(64)
self.bn2 = nn.BatchNorm1d(128)
self.bn3 = nn.BatchNorm1d(1024)
self.global_feat = global_feat
self.feature_transform = feature_transform
if self.feature_transform:
self.fstn = TNetkd(64)
def forward(self,x):
n_pts = x.size()[2] #记录点的数量
trans = self.stn(x) #第一TNet结构预测一个点的仿射变换矩阵,该仿射变换矩阵用于对输入进行空间变换,以提高模型的几何不变性和泛化能力。
x = x.transpose(2,1)
x = torch.bmm(x,trans)
x = x.transpose(2,1) #这三行代码是将点集与变换矩阵相乘
x = F.relu(self.bn1(self.conv1(x))) #对变换后的点进行特征提取
if self.feature_transform: # 对特征进行仿射变换
trans_feat = self.fstn(x)
x = x.transpose(2,1)
x = torch.bmm(x,trans_feat)
x = x.transpose(2,1)
else:
trans_feat = None
pointfeat = x # 将特征层进行暂存,为后面分割使用
x = F.relu(self.bn2(self.conv2(x)))
x = self.bn3(self.conv3(x))
x = torch.max(x,2,keepdim=True)[0] # 最大池化层聚合所有点的信息
x = x.view(-1,1024)
if self.global_feat:
return x,trans,trans_feat
else:
x = x.view(-1,1024,1).repeat(1,1,n_pts)
return torch.cat([x,pointfeat],1),trans,trans_feat
PointNetCls
特征设计完成后,根据论文就可以写出PointNet中的分类网络结果,论文中分类网络在得到global feature之后接了一个全连接层,就输出结果了,仔细想想是不是差了点什么,没错就是local feature,这也是PointNet++改进的地方,后续跟进PointNet++的讲解;
在类分数预测之前,在输出维度为256的最后一个完全连接层上使用保持比为0.7的丢弃;批量归一化的衰减率从0.5开始,逐渐增加到0.99;使用初始学习率为0.001、动量为0.9、批量大小为32的adam优化器,学习率每20个时期除以2;
class PointNetCls(nn.Module):
def __init__(self,k=2,feature_transform=False):
super(PointNetCls,self).__init__()
self.feature_transform = feature_transform
self.feat = PointNetFeat(global_feat=True,feature_transform=feature_transform)
self.fc1 = nn.Linear(1024,512)
self.fc2 = nn.Linear(512,256)
self.fc3 = nn.Linear(256,k)
self.dropout = nn.Dropout(p=0.3) # 论文写的p=0.7,代码中给的0.3后续效果还需要测试之后在看
self.bn1 = nn.BatchNorm1d(512)
self.bn2 = nn.BatchNorm1d(256)
self.relu = nn.ReLU()
def forward(self,x):
x,trans,trans_feat = self.feat(x)
x = F.relu(self.bn1(self.fc1(x)))
x = F.relu(self.bn2(self.dropout(self.fc2(x))))
x = self.fc3(x)
return F.log_softmax(x,dim=1),trans,trans_feat
网络结构可视化
利用神经网络可视化工具torchview画出Tnet网络结果: