PointNet分类代码注释

最新推荐文章于 2024-06-14 16:59:57 发布

哪来那么多热情^^

最新推荐文章于 2024-06-14 16:59:57 发布

阅读量377

点赞数 3

本文链接：https://blog.csdn.net/weixin_49305813/article/details/111085511

版权

写在前面，看了激光点云的一些传统聚类方法之后，最近在学习基于深度学习的点云处理方法，看了不少论文和原理，下面是Pointnet的一些内容，同时记录学习过程。

一、论文

论文原文：link1
论文翻译：link2
对论文以及Pointnet的理解网上也有很多不错的资源：link3

二、代码

代码地址：link4
数据集下载(百度云)：link5

网络结构如下：
在这里插入图片描述源码结构图如下：

在这里插入图片描述看分类代码的分支，用到的其实就是transform_nets.py，pointnet_cls.py,外加训练测试加预测了

分类网络的理解（T-net和这个差不多）：
在这里插入图片描述

transform_nets.py

生成T-net的代码

// An highlighted block
import tensorflow as tf
import numpy as np
import sys
import os
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(BASE_DIR)
sys.path.append(os.path.join(BASE_DIR, '../utils'))
import tf_util

# B*N*3的意思是，B个点云，每一个点云有N个点，xyz三个坐标
def input_transform_net(point_cloud, is_training, bn_decay=None, K=3):
    """ Input (XYZ) Transform Net, input is BxNx3 gray image
        Return:
            Transformation matrix of size 3xK """
    batch_size = point_cloud.get_shape()[0].value
    num_point = point_cloud.get_shape()[1].value

    # expand函数扩展数组的维度[B,N,3,1] (32,2048,3,1)
    input_image = tf.expand_dims(point_cloud, -1)
    net = tf_util.conv2d(input_image, 64, [1,3],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='tconv1', bn_decay=bn_decay)
    # 将(32,2048,3,1)-->(32,2048,1,64)
    net = tf_util.conv2d(net, 128, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='tconv2', bn_decay=bn_decay)
    # (32,2048,1,64)-->(32,2048,1,128)
    net = tf_util.conv2d(net, 1024, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='tconv3', bn_decay=bn_decay)
    # (32,2048,1,128)-->(32,2048,1,1024)  

    #maxpooling  (32,2048,1,1024)-->(32,1,1,1024)  
    #利用1024维特征生成256维度的特征
    #(32,1,1,1024)-->(32,1024)
    net = tf_util.max_pool2d(net, [num_point,1],
                             padding='VALID', scope='tmaxpool')

    net = tf.reshape(net, [batch_size, -1])
    #(32,1024)-->(32.512)
    net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training,
                                  scope='tfc1', bn_decay=bn_decay)
    #(32,512)-->(32,256) 
    net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training,
                                  scope='tfc2', bn_decay=bn_decay)
    
    #生成点云旋转矩阵 T=3*3
    with tf.variable_scope('transform_XYZ') as sc:
        assert(K==3)
        weights = tf.get_variable('weights', [256, 3*K],
                                  initializer=tf.constant_initializer(0.0),
                                  dtype=tf.float32)
        biases = tf.get_variable('biases', [3*K],
                                 initializer=tf.constant_initializer(0.0),
                                 dtype=tf.float32)
        biases += tf.constant([1,0,0,0,1,0,0,0,1], dtype=tf.float32)
        transform = tf.matmul(net, weights)
        transform = tf.nn.bias_add(transform, biases)

    transform = tf.reshape(transform, [batch_size, 3, K])  # 32，3，3
    return transform



#生成点云旋转矩阵 T=k*k，基本与上述一致
def feature_transform_net(inputs, is_training, bn_decay=None, K=64):
    """ Feature Transform Net, input is BxNx1xK
        Return:
            Transformation matrix of size KxK """
    batch_size = inputs.get_shape()[0].value
    num_point = inputs.get_shape()[1].value

    net = tf_util.conv2d(inputs, 64, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='tconv1', bn_decay=bn_decay)
    net = tf_util.conv2d(net, 128, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='tconv2', bn_decay=bn_decay)
    net = tf_util.conv2d(net, 1024, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='tconv3', bn_decay=bn_decay)
    net = tf_util.max_pool2d(net, [num_point,1],
                             padding='VALID', scope='tmaxpool')

    net = tf.reshape(net, [batch_size, -1])
    net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training,
                                  scope='tfc1', bn_decay=bn_decay)
    net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training,
                                  scope='tfc2', bn_decay=bn_decay)

    with tf.variable_scope('transform_feat') as sc:
        weights = tf.get_variable('weights', [256, K*K],
                                  initializer=tf.constant_initializer(0.0),
                                  dtype=tf.float32)
        biases = tf.get_variable('biases', [K*K],
                                 initializer=tf.constant_initializer(0.0),
                                 dtype=tf.float32)
        biases += tf.constant(np.eye(K).flatten(), dtype=tf.float32)
        transform = tf.matmul(net, weights)
        transform = tf.nn.bias_add(transform, biases)

    transform = tf.reshape(transform, [batch_size, K, K])
    return transform

pointnet_cls.py

// An highlighted block
# 这个文件实现了网络的分类结构，输出为B*40，是每个样本对于每个类别的概率。网络结构在get_model()中定义，loss则在get_loss中定义
import tensorflow as tf
import numpy as np
import math
import sys
import os
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(BASE_DIR)
sys.path.append(os.path.join(BASE_DIR, '../utils'))
# 定义函数
import tf_util
# 引入旋转矩阵T-net
from transform_nets import input_transform_net, feature_transform_net

def placeholder_inputs(batch_size, num_point):
    # 理解为形参，后面要用的时候再调用
    pointclouds_pl = tf.placeholder(tf.float32, shape=(batch_size, num_point, 3))
    labels_pl = tf.placeholder(tf.int32, shape=(batch_size))
    return pointclouds_pl, labels_pl


def get_model(point_cloud, is_training, bn_decay=None):
    """ Classification PointNet, input is BxNx3, output Bx40 """
    batch_size = point_cloud.get_shape()[0].value
    num_point = point_cloud.get_shape()[1].value
    end_points = {}

    # 创建一个transform_net1的命名空间，然后在该作用域下定义一个变量transform（该变量可以在后面使用）
    # 调用在transform函数里定义的T-net函数
    with tf.variable_scope('transform_net1') as sc:
        transform = input_transform_net(point_cloud, is_training, bn_decay, K=3)

    # 两个三维矩阵的乘法怎样计算呢?我通过实验发现，tensorflow把前面的维度当成是batch，对最后两维进行普通的矩阵乘法。 
    # 也就是说，最后两维之前的维度，都需要相同。这里相当于对矩阵做了一个旋转，加强数据的抗干扰能力？
    point_cloud_transformed = tf.matmul(point_cloud, transform)  # 32，2048，3乘以32，3，3

    # 扩展成4D张量，在最后增加一维
    input_image = tf.expand_dims(point_cloud_transformed, -1)

    # 和T-net类似，二维卷积    
    # 将(32,2048,3,1)-->(32,2048,1,64)
    net = tf_util.conv2d(input_image, 64, [1,3],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv1', bn_decay=bn_decay)

    # (32,2048,1,64)-->(32,2048,1,64)
    net = tf_util.conv2d(net, 64, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv2', bn_decay=bn_decay)

    # 定义一个transform_net2的命名空间，引入一个64*64的T-net矩阵
    with tf.variable_scope('transform_net2') as sc:
        transform = feature_transform_net(net, is_training, bn_decay, K=64)
    # end_points用于存储张量transform的信息，是一个字典？
    end_points['transform'] = transform

    # tf.squeeze( ): 默认从tensor中删除所有大小是1的维度。tf.squeeze(net, axis=[2]) 移除第三维，因为维度的开始索引为0，变为（32，2048，64），然后两个矩阵相乘
    net_transformed = tf.matmul(tf.squeeze(net, axis=[2]), transform)
    # tf.expand_dims(input, axis=None, name=None, dim=None) 在第axis位置增加一个维度，变回(32,2048,1,64)
    net_transformed = tf.expand_dims(net_transformed, [2])

    # （32，2048，1，64）变为（32，2048，1，64）
    net = tf_util.conv2d(net_transformed, 64, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv3', bn_decay=bn_decay)
    # （32，2048，1，64）变为（32，2048，1，128）
    net = tf_util.conv2d(net, 128, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv4', bn_decay=bn_decay)
    # （32，2048，1，128）变为（32，2048，1，1024）
    net = tf_util.conv2d(net, 1024, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv5', bn_decay=bn_decay)

    # Symmetric function: max pooling   变为（32，1，1，1024）
    net = tf_util.max_pool2d(net, [num_point,1],
                             padding='VALID', scope='maxpool')
    # 将（32，1，1，1024）变为（32，1024）
    net = tf.reshape(net, [batch_size, -1])
    # 将（32，1024）变为（32，512） 
    net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training,
                                  scope='fc1', bn_decay=bn_decay)

    # Dropout就是在不同的训练过程中随机扔掉一部分神经元。也就是让某个神经元的激活值以一定的概率p，让其停止工作，这次训练过程中不更新权值，也不参加神经网络的计算。但是它的权重得保留下来（只是暂时不更新而已），因为下次样本输入时它可能又得工作了。
    # 从代码可以看出是对tf的api进行了封装以0.7的概率，随机将inputs中的元素设置为0，其他元素按照1.0 / (1 - rate)的倍率进行缩放，这里是1/0.3。
    # Dropout函数是为了防止或减轻过拟合而使用的函数，一般用在全连接层，网络结构不变？
    net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training,
                          scope='dp1')
    net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training,
                                  scope='fc2', bn_decay=bn_decay)
    net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training,
                          scope='dp2')

    # 最终输出为（32，40）。即属于每个类的概率
    net = tf_util.fully_connected(net, 40, activation_fn=None, scope='fc3')

    return net, end_points


# 定义损失函数，这里的pred我感觉就是上面输出的net？
def get_loss(pred, label, end_points, reg_weight=0.001):
    """ pred: B*NUM_CLASSES,
        label: B, """
    # 计算logits 和 labels 之间的稀疏softmax 交叉熵
    # labels 是 稀疏表示的，是 [0，num_classes）中的一个数值，代表正确分类结果。
    # tf.nn.sparse_softmax_cross_entropy_with_logits（）比tf.nn.softmax_cross_entropy_with_logits多了一步将labels稀疏化的操作
    # 具体就是先对上面输出的神经网络net做一个softmax，具体公式百度，然后将标签稀疏化？
    # 对logits 和 labels做交叉熵，公式百度，越小表示越准确（前面有个负号）
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=label)

    # 上面loss返回的是一个向量，reduce_mean就是这个向量里的数相加，然后求平均值
    classify_loss = tf.reduce_mean(loss)

    # 用来收集标量信息，一般在画loss,accuary时会用到这个函数。
    tf.summary.scalar('classify loss', classify_loss)

    # Enforce the transformation as orthogonal matrix
    transform = end_points['transform'] # BxKxK
    K = transform.get_shape()[1].value  # K=64

    #  perm=[0,2,1]说明要交换内层里面的两个维度（比如从原来的2x3x4变成2x4x3的张量），为什么有这一步我也不太清楚。。
    mat_diff = tf.matmul(transform, tf.transpose(transform, perm=[0,2,1]))  # B*K*K
    # np.eye生成对角为1的对角矩阵
    mat_diff -= tf.constant(np.eye(K), dtype=tf.float32)
    # l2_loss()这个函数的作用是利用L2范数来计算张量的误差值，但是没有开发并且只取L2范数的值的一半。简单的可以理解成张量中的每一个元素进行平方，然后求和，最后乘一个1/2
    mat_diff_loss = tf.nn.l2_loss(mat_diff) 
    tf.summary.scalar('mat loss', mat_diff_loss)

    # 代码看懂了，但是损失函数为啥这样设置还是有点迷。。
    return classify_loss + mat_diff_loss * reg_weight


if __name__=='__main__':
    with tf.Graph().as_default():
        inputs = tf.zeros((32,1024,3))
        outputs = get_model(inputs, tf.constant(True))
        print(outputs)

哪来那么多热情^^

关注

3
点赞
踩
6

收藏

觉得还不错? 一键收藏
0
评论
PointNet分类代码注释

写在前面，看了激光点云的一些传统聚类方法之后，最近在学习基于深度学习的点云处理方法，看了不少论文和原理，下面是Pointnet的一些内容，同时记录学习过程。一、论文论文原文：link1论文翻译：link2对论文以及Pointnet的理解网上也有很多不错的资源：link3二、代码代码地址：link4数据集下载(百度云)：link5...
复制链接

扫一扫