【论文】pointnet代码解读

1 引言

PointNet论文解读已经在前文中做以总结,本文简单讲述PointNet的复现,并对源码及其架构作以总结

2 论文复现

2.1 环境配置

源码:PointNet
组件版本:tensorflowGPU1.13.1+cuda10.0.1+cudnn7.4.2
配置流程:TensorflowGPU安装

2.2 运行

1 从Anaconda Prompt中激活TensorflowGPU环境
在这里插入图片描述
2 进入源码根目录之下,运行train.py
在这里插入图片描述
3 在dump文件夹之下查看错误训练结果
在这里插入图片描述上述错误图片命名格式为错误序号_预测值_pred_真实值

4 进入part_seg文件夹,运行sh download_data.sh下载ShapeNetPart数据以及HDF5数据
5 运行python train.py进行训练,结束后运行test.py进行验证。
6 进入test_result查看部分分割结果
在这里插入图片描述
每一组分割目标以序号区别,每组中,log文件为分割结果描述,diff为分割有误的点,gt为groundtruth真是值,pred为预测值。使用pointcloud compare软件打开这些obj文件。
在这里插入图片描述
7 同样的方法,进入sem_seg,下载数据并进行训练与测试,在log文件夹之下的dump中查看语义分割的结果。
在这里插入图片描述

3 源码解读

主要是对models文件夹中的三个文件进行解读。

3.1 pointnet_cls.py 点云分类

import tensorflow as tf
import numpy as np
import math
import sys
import os
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(BASE_DIR)
sys.path.append(os.path.join(BASE_DIR, '../utils'))
import tf_util
from transform_nets import input_transform_net, feature_transform_net

# 输入batch大小以及点数量 返回点云矩阵以及标签矩阵
def placeholder_inputs(batch_size, num_point):

    pointclouds_pl = tf.placeholder(tf.float32, shape=(batch_size, num_point, 3))
    labels_pl = tf.placeholder(tf.int32, shape=(batch_size))
    return pointclouds_pl, labels_pl


def get_model(point_cloud, is_training, bn_decay=None):
    """ Classification PointNet, input is BxNx3, output Bx40 """

    # 获取batchsize以及点数量
    batch_size = point_cloud.get_shape()[0].value
    num_point = point_cloud.get_shape()[1].value

    # 存储原始特征
    end_points = {}

    # 使用T-Net获取第一次点云转换的矩阵[B,3,3]
    with tf.variable_scope('transform_net1') as sc:
        transform = input_transform_net(point_cloud, is_training, bn_decay, K=3)

    # 原始点云变换[B,n,3,1]*[B,3,3]=[B,n,3,1]
    point_cloud_transformed = tf.matmul(point_cloud, transform)

    # 扩充点云数据维度 便于后续卷积操作
    input_image = tf.expand_dims(point_cloud_transformed, -1)

    # 第一次卷积 输入[B,n,3,1] 64个大小为[1,3]的卷积核 输出[B,n,1,64]
    net = tf_util.conv2d(input_image, 64, [1,3],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv1', bn_decay=bn_decay)

    # 第二次卷积 输入[B,n,1,64] 64个大小为[1,1]的卷积核 输出[B,n,1,64]
    net = tf_util.conv2d(net, 64, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv2', bn_decay=bn_decay)

    # 使用T-Net获取第二次点云转换的矩阵[B,64,64]
    with tf.variable_scope('transform_net2') as sc:
        transform = feature_transform_net(net, is_training, bn_decay, K=64)

    # 记录原始特征
    end_points['transform'] = transform

    # squeeze之后net维度为[batch_size, num_point, 64] 想乘得以变换
    net_transformed = tf.matmul(tf.squeeze(net, axis=[2]), transform)

    # 指定第二个维度并膨胀为4维张量[batch_size, num_point, 1, 64]
    net_transformed = tf.expand_dims(net_transformed, [2])

    # 第三次卷积 输入[B,n,1,64] 64个大小为[1,1]的卷积核 输出[B,n,1,64]
    net = tf_util.conv2d(net_transformed, 64, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv3', bn_decay=bn_decay)

    # 第四次卷积 输入[B,n,1,64] 128个大小为[1,1]的卷积核 输出[B,n,1,128]
    net = tf_util.conv2d(net, 128, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv4', bn_decay=bn_decay)

    # 第五次卷积 输入[B,n,1,128] 1024个大小为[1,1]的卷积核 输出[B,n,1,1024]
    net = tf_util.conv2d(net, 1024, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv5', bn_decay=bn_decay)

    # 最大池化函数 输入[B,n,1,1024] 池化层[n,1] 输出[B,1,1,1024]
    # 即每一个batch通过maxpooling保留下了1024个特征中最max(明显)的值
    # Symmetric function: max pooling
    net = tf_util.max_pool2d(net, [num_point,1],
                             padding='VALID', scope='maxpool')

    # reshape为二维张量[B,1024]
    net = tf.reshape(net, [batch_size, -1])

    # 全连接层 全连接层 输入[B,1024] 先转为[B,512] 在转为[B,256] 最后转为[B,40] 每一次全连接都要进行一次drop out正则化
    net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training,
                                  scope='fc1', bn_decay=bn_decay)
    net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training,
                          scope='dp1')
    net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training,
                                  scope='fc2', bn_decay=bn_decay)
    net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training,
                          scope='dp2')
    net = tf_util.fully_connected(net, 40, activation_fn=None, scope='fc3')

    return net, end_points


def get_loss(pred, label, end_points, reg_weight=0.001):
    """ pred: B*NUM_CLASSES,
        label: B, """

    # 求出预测值最后一层的softmax分类值 并与真是情况做一个交叉熵求loss
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=label)

    # 求平均值
    classify_loss = tf.reduce_mean(loss)

    # 记录值 后续tensorbroad将予以展示
    tf.summary.scalar('classify loss', classify_loss)

    # Enforce the transformation as orthogonal matrix

    # 取出变换矩阵
    transform = end_points['transform'] # BxKxK

    # 取出k
    K = transform.get_shape()[1].value

    #
    mat_diff = tf.matmul(transform, tf.transpose(transform, perm=[0,2,1]))
    mat_diff -= tf.constant(np.eye(K), dtype=tf.float32)
    mat_diff_loss = tf.nn.l2_loss(mat_diff) 
    tf.summary.scalar('mat loss', mat_diff_loss)

    #损失函数有两部分构成 一是分类损失 二是Tnet转换损失
    return classify_loss + mat_diff_loss * reg_weight


if __name__=='__main__':
    with tf.Graph().as_default():
        inputs = tf.zeros((32,1024,3))
        outputs = get_model(inputs, tf.constant(True))
        print(outputs)

3.2 pointnet_seg.py 分割

import tensorflow as tf
import numpy as np
import math
import sys
import os
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(BASE_DIR)
sys.path.append(os.path.join(BASE_DIR, '../utils'))
import tf_util
from transform_nets import input_transform_net, feature_transform_net

def placeholder_inputs(batch_size, num_point):
    pointclouds_pl = tf.placeholder(tf.float32,
                                     shape=(batch_size, num_point, 3))
    labels_pl = tf.placeholder(tf.int32,
                                shape=(batch_size, num_point))
    return pointclouds_pl, labels_pl


def get_model(point_cloud, is_training, bn_decay=None):
    """ Classification PointNet, input is BxNx3, output BxNx50 """

    # 获取Batchsize以及点数量
    batch_size = point_cloud.get_shape()[0].value
    num_point = point_cloud.get_shape()[1].value

    # 记录原始特征
    end_points = {}

    # 获取第一次点云变换矩阵
    with tf.variable_scope('transform_net1') as sc:
        transform = input_transform_net(point_cloud, is_training, bn_decay, K=3)

    # 点云相乘仿射变换
    point_cloud_transformed = tf.matmul(point_cloud, transform)

    # 维度扩充 便于后续卷积操作
    input_image = tf.expand_dims(point_cloud_transformed, -1)

    # 第一次卷积 输入[B,n,3,1] 64个大小为[1,3]的卷积核 输出[B,n,1,64]
    net = tf_util.conv2d(input_image, 64, [1,3],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv1', bn_decay=bn_decay)

    # 第二次卷积 输入[B,n,1,64] 64个大小为[1,1]的卷积核 输出[B,n,1,64]
    net = tf_util.conv2d(net, 64, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv2', bn_decay=bn_decay)

    # 获取第二次点云变化的矩阵
    with tf.variable_scope('transform_net2') as sc:
        transform = feature_transform_net(net, is_training, bn_decay, K=64)

    # 记录原始点云特征
    end_points['transform'] = transform

    # 去除为1的维度 相乘仿射变换
    net_transformed = tf.matmul(tf.squeeze(net, axis=[2]), transform)

    # 维度扩充
    point_feat = tf.expand_dims(net_transformed, [2])
    print(point_feat)

    # 第三次卷积 输入[B,n,1,64] 64个大小为[1,1]的卷积核 输出[B,n,1,64]
    net = tf_util.conv2d(point_feat, 64, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv3', bn_decay=bn_decay)

    # 第四次卷积 输入[B,n,1,64] 128个大小为[1,1]的卷积核 输出[B,n,1,128]
    net = tf_util.conv2d(net, 128, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv4', bn_decay=bn_decay)

    # 第五次卷积 输入[B,n,1,128] 1024个大小为[1,1]的卷积核 输出[B,n,1,1024]
    net = tf_util.conv2d(net, 1024, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv5', bn_decay=bn_decay)

    # 最大池化函数 输入[B,n,1,1024] 池化层[n,1] 输出[B,1,1,1024]
    # 即每一个batch通过maxpooling保留下了1024个特征中最max(明显)的值
    global_feat = tf_util.max_pool2d(net, [num_point,1],
                                     padding='VALID', scope='maxpool')
    print(global_feat)

    # 利用tile函数对全局池化张量每一维度进行乘法扩充 倍数为[1, num_point, 1, 1]
    global_feat_expand = tf.tile(global_feat, [1, num_point, 1, 1])

    # 全局扩充池化张量[B,n,1,1024](全局特征)与点张量连接[B,n,1,64] 形成联合张量[B,n,1,1088]
    concat_feat = tf.concat(3, [point_feat, global_feat_expand])
    print(concat_feat)

    # 第六次卷积 输入[B,n,1,1088] 512个大小为[1,1]的卷积核 输出[B,n,1,512]
    net = tf_util.conv2d(concat_feat, 512, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv6', bn_decay=bn_decay)

    # 第七次卷积 输入[B,n,1,512] 256个大小为[1,1]的卷积核 输出[B,n,1,256]
    net = tf_util.conv2d(net, 256, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv7', bn_decay=bn_decay)

    # 第八次卷积 输入[B,n,1,256] 128个大小为[1,1]的卷积核 输出[B,n,1,128]
    net = tf_util.conv2d(net, 128, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv8', bn_decay=bn_decay)

    # 第九次卷积 输入[B,n,1,128] 128个大小为[1,1]的卷积核 输出[B,n,1,128]
    net = tf_util.conv2d(net, 128, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv9', bn_decay=bn_decay)

    # 第十次卷积 输入[B,n,1,128] 50个大小为[1,1]的卷积核 输出[B,n,1,50]
    net = tf_util.conv2d(net, 50, [1,1],
                         padding='VALID', stride=[1,1], activation_fn=None,
                         scope='conv10')

    # 删除为1的维度 输出[B,n,50]
    net = tf.squeeze(net, [2]) # BxNxC

    return net, end_points


def get_loss(pred, label, end_points, reg_weight=0.001):
    """ pred: BxNxC,
        label: BxN, """
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=label)
    classify_loss = tf.reduce_mean(loss)
    tf.scalar_summary('classify loss', classify_loss)

    # Enforce the transformation as orthogonal matrix
    transform = end_points['transform'] # BxKxK
    K = transform.get_shape()[1].value
    mat_diff = tf.matmul(transform, tf.transpose(transform, perm=[0,2,1]))
    mat_diff -= tf.constant(np.eye(K), dtype=tf.float32)
    mat_diff_loss = tf.nn.l2_loss(mat_diff) 
    tf.scalar_summary('mat_loss', mat_diff_loss)

    return classify_loss + mat_diff_loss * reg_weight


if __name__=='__main__':
    with tf.Graph().as_default():
        inputs = tf.zeros((32,1024,3))
        outputs = get_model(inputs, tf.constant(True))
        print(outputs)

3.3 transform_nets.py 点云以及特征转换

import tensorflow as tf
import numpy as np
import sys
import os
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(BASE_DIR)
sys.path.append(os.path.join(BASE_DIR, '../utils'))
import tf_util

def input_transform_net(point_cloud, is_training, bn_decay=None, K=3):
    """ Input (XYZ) Transform Net, input is BxNx3 gray image
        Return:
            Transformation matrix of size 3xK """

    # 获取batchsize以及点数量
    batch_size = point_cloud.get_shape()[0].value
    num_point = point_cloud.get_shape()[1].value

    # 扩充点云数据维度,便于后续卷积操作
    input_image = tf.expand_dims(point_cloud, -1)

    # 第一次卷积,输入[B,n,3,1],卷积核[1,3],数量64个,输出[B,n,1,64]
    net = tf_util.conv2d(input_image, 64, [1,3],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='tconv1', bn_decay=bn_decay)

    # 第二次卷积,输入[B,n,1,64],卷积核[1,1],数量128个,输出[B,n,1,128]
    net = tf_util.conv2d(net, 128, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='tconv2', bn_decay=bn_decay)

    # 第三次卷积,输入[B,n,1,128],卷积核[1,1],数量1024个,输出[B,n,1,1024]
    net = tf_util.conv2d(net, 1024, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='tconv3', bn_decay=bn_decay)

    # 池化操作,输入[B,n,1,1024],池化层[n,1],输出[B,1,1,1024]
    # 即每一个batch通过maxpooling保留下了1024个特征中最max(明显)的值
    net = tf_util.max_pool2d(net, [num_point,1],
                             padding='VALID', scope='tmaxpool')

    # 拼接所有batch保留下来的特征值形成二维张量
    net = tf.reshape(net, [batch_size, -1])

    # 全连接层,输入[B,1024],先转为[B,512],在转为[B,256]。
    net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training,
                                  scope='tfc1', bn_decay=bn_decay)
    net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training,
                                  scope='tfc2', bn_decay=bn_decay)

    # 全连接层,[B,256]*w[256, 3*K]+b[3*K]得到[B,3*K]
    with tf.variable_scope('transform_XYZ') as sc:
        assert(K==3)
        weights = tf.get_variable('weights', [256, 3*K],
                                  initializer=tf.constant_initializer(0.0),
                                  dtype=tf.float32)
        biases = tf.get_variable('biases', [3*K],
                                 initializer=tf.constant_initializer(0.0),
                                 dtype=tf.float32)
        biases += tf.constant([1,0,0,0,1,0,0,0,1], dtype=tf.float32)
        transform = tf.matmul(net, weights)
        transform = tf.nn.bias_add(transform, biases)

    # [B,3*K]转为[B,3,K]
    transform = tf.reshape(transform, [batch_size, 3, K])
    return transform


def feature_transform_net(inputs, is_training, bn_decay=None, K=64):
    """ Feature Transform Net, input is BxNx1xK
        Return:
            Transformation matrix of size KxK """
    # 获取batchsize以及点个数
    batch_size = inputs.get_shape()[0].value
    num_point = inputs.get_shape()[1].value

    # 经由前叙MLP(64,64)生成[B,n,1,64]
    # 第一次卷积 输入[B,n,1,K] 64个为大小为[1,1]的卷积核 输出[B,n,1,64]
    net = tf_util.conv2d(inputs, 64, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='tconv1', bn_decay=bn_decay)

    # 第二次卷积 输入[B,n,1,64] 128个为大小为[1,1]的卷积核 输出[B,n,1,128]
    net = tf_util.conv2d(net, 128, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='tconv2', bn_decay=bn_decay)

    # 第三次卷积 输入[B,n,1,1024] 1024个为大小为[1,1]的卷积核 输出[B,n,1,1024]
    net = tf_util.conv2d(net, 1024, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='tconv3', bn_decay=bn_decay)

    # 池化操作 输入[B,n,1,1024] 池化层[n,1] 输出[B,1,1,1024]
    # 即每一个batch通过maxpooling保留下了1024个特征中最max(明显)的值
    net = tf_util.max_pool2d(net, [num_point,1],
                             padding='VALID', scope='tmaxpool')

    # 拼接所有batch保留下来的特征值形成二维张量
    net = tf.reshape(net, [batch_size, -1])

    # 全连接层 输入[B,1024] 先转为[B,512] 在转为[B,256]。
    net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training,
                                  scope='tfc1', bn_decay=bn_decay)
    net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training,
                                  scope='tfc2', bn_decay=bn_decay)

    # 全连接层 [B,256]*w[256,K*K]+b[K*K]得到[B,K*K]
    with tf.variable_scope('transform_feat') as sc:
        weights = tf.get_variable('weights', [256, K*K],
                                  initializer=tf.constant_initializer(0.0),
                                  dtype=tf.float32)
        biases = tf.get_variable('biases', [K*K],
                                 initializer=tf.constant_initializer(0.0),
                                 dtype=tf.float32)
        biases += tf.constant(np.eye(K).flatten(), dtype=tf.float32)
        transform = tf.matmul(net, weights)
        transform = tf.nn.bias_add(transform, biases)

    # [B,K*K]转为[B,K,K]
    transform = tf.reshape(transform, [batch_size, K, K])
    return transform

3.4主目录下的train.py

import argparse
import math
import h5py
import numpy as np
import tensorflow as tf
import socket
import importlib
import os
import sys

# 通过文件的绝对路径 获取该文件所在目录的名称
BASE_DIR = os.path.dirname(os.path.abspath(__file__))

# 将BASE_DIR永久添加至搜索路径集
sys.path.append(BASE_DIR)

# 通过将models与utils和BASE_DIR拼接 获取这两个目录 并将其永久添加至搜索路径集
sys.path.append(os.path.join(BASE_DIR, 'models'))
sys.path.append(os.path.join(BASE_DIR, 'utils'))


import provider
import tf_util


# 定义命令行接口 创建一个解析器
parser = argparse.ArgumentParser()

# 定义数据信息
parser.add_argument('--gpu', type=int, default=0, help='GPU to use [default: GPU 0]')
parser.add_argument('--model', default='pointnet_cls', help='Model name: pointnet_cls or pointnet_cls_basic [default: pointnet_cls]')
parser.add_argument('--log_dir', default='log', help='Log dir [default: log]')
parser.add_argument('--num_point', type=int, default=1024, help='Point Number [256/512/1024/2048] [default: 1024]')
parser.add_argument('--max_epoch', type=int, default=250, help='Epoch to run [default: 250]')
parser.add_argument('--batch_size', type=int, default=32, help='Batch Size during training [default: 32]')
parser.add_argument('--learning_rate', type=float, default=0.001, help='Initial learning rate [default: 0.001]')
parser.add_argument('--momentum', type=float, default=0.9, help='Initial learning rate [default: 0.9]')
parser.add_argument('--optimizer', default='adam', help='adam or momentum [default: adam]')
parser.add_argument('--decay_step', type=int, default=200000, help='Decay step for lr decay [default: 200000]')
parser.add_argument('--decay_rate', type=float, default=0.7, help='Decay rate for lr decay [default: 0.8]')

# Flag为上述参数的命名空间 命名空间里包含了上述参数对象 这些对象被转换为FLAGS命名空间的属性
FLAGS = parser.parse_args()

# 获取参数
BATCH_SIZE = FLAGS.batch_size
NUM_POINT = FLAGS.num_point
MAX_EPOCH = FLAGS.max_epoch
BASE_LEARNING_RATE = FLAGS.learning_rate
GPU_INDEX = FLAGS.gpu
MOMENTUM = FLAGS.momentum
OPTIMIZER = FLAGS.optimizer
DECAY_STEP = FLAGS.decay_step
DECAY_RATE = FLAGS.decay_rate

# 导入model模块 依据上述默认值 这里的model为models文件夹之下的pointnet_cls
MODEL = importlib.import_module(FLAGS.model) # import network module

# 联结名称 将pointnet_cls.py的路径永久加入默认搜索路径
MODEL_FILE = os.path.join(BASE_DIR, 'models', FLAGS.model+'.py')

# 获取日志文件名
LOG_DIR = FLAGS.log_dir

# 如果该文件不存在 则创建文件名为LOG_DIR的文件存放日志
if not os.path.exists(LOG_DIR): os.mkdir(LOG_DIR)

# 将pointnet_cls.py与train.py内的文件全部复制到log日志中
os.system('cp %s %s' % (MODEL_FILE, LOG_DIR)) # bkp of model def
os.system('cp train.py %s' % (LOG_DIR)) # bkp of train procedure

# 创建LOG_FOUT容器并打开 开始写入日志书记 先将FLAGS中的内容写入log_train.txt中
LOG_FOUT = open(os.path.join(LOG_DIR, 'log_train.txt'), 'w')
LOG_FOUT.write(str(FLAGS)+'\n')

# 定义最大点数量以及种类数量
MAX_NUM_POINT = 2048
NUM_CLASSES = 40

# 定义相应学习率动态衰减参数
BN_INIT_DECAY = 0.5
BN_DECAY_DECAY_RATE = 0.5
BN_DECAY_DECAY_STEP = float(DECAY_STEP)
BN_DECAY_CLIP = 0.99

# 获取本机主机名与IP
HOSTNAME = socket.gethostname()

# 获取训练数据与测试数据
# provider.py 为modelnet40数据自带python文件 用以对modelnet40进行一些操作
# ModelNet40 official train/test split
TRAIN_FILES = provider.getDataFiles( \
    os.path.join(BASE_DIR, 'data/modelnet40_ply_hdf5_2048/train_files.txt'))
TEST_FILES = provider.getDataFiles(\
    os.path.join(BASE_DIR, 'data/modelnet40_ply_hdf5_2048/test_files.txt'))

#
def log_string(out_str):
    LOG_FOUT.write(out_str+'\n')
    LOG_FOUT.flush()
    print(out_str)


def get_learning_rate(batch):
    learning_rate = tf.train.exponential_decay(
                        BASE_LEARNING_RATE,  # Base learning rate.
                        batch * BATCH_SIZE,  # Current index into the dataset.
                        DECAY_STEP,          # Decay step.
                        DECAY_RATE,          # Decay rate.
                        staircase=True)
    learning_rate = tf.maximum(learning_rate, 0.00001) # CLIP THE LEARNING RATE!
    return learning_rate        

def get_bn_decay(batch):
    bn_momentum = tf.train.exponential_decay(
                      BN_INIT_DECAY,
                      batch*BATCH_SIZE,
                      BN_DECAY_DECAY_STEP,
                      BN_DECAY_DECAY_RATE,
                      staircase=True)
    bn_decay = tf.minimum(BN_DECAY_CLIP, 1 - bn_momentum)
    return bn_decay

def train():

    # 定义计算图 设置为默认图
    with tf.Graph().as_default():

        # 指定以第GPU_INDEX块GPU对该图进行计算
        with tf.device('/gpu:'+str(GPU_INDEX)):

            # 定义点云矩阵占位符以及标签矩阵占位符
            pointclouds_pl, labels_pl = MODEL.placeholder_inputs(BATCH_SIZE, NUM_POINT)

            # 定义是否训练占位符
            is_training_pl = tf.placeholder(tf.bool, shape=())

            # 打印训练标志
            print(is_training_pl)
            
            # Note the global_step=batch parameter to minimize. 
            # That tells the optimizer to helpfully increment the 'batch' parameter for you every time it trains.

            # 定义batch为零 传入get_bn_decay()函数使得学习率下降失效
            batch = tf.Variable(0)
            bn_decay = get_bn_decay(batch)

            # 将学习率记录下来 tensorbroad将予以显示
            tf.summary.scalar('bn_decay', bn_decay)

            # Get model and loss
            # 调用get_model函数 进行一次前向传播 得到预测值矩阵以及状态
            pred, end_points = MODEL.get_model(pointclouds_pl, is_training_pl, bn_decay=bn_decay)

            # 计算损失 并记录 用于tensorbroad
            loss = MODEL.get_loss(pred, labels_pl, end_points)
            tf.summary.scalar('loss', loss)

            # 求出预测值每一列最大值所在的索引 以索引代替表示种类 并与真是值做比较以判断是否为真
            correct = tf.equal(tf.argmax(pred, 1), tf.to_int64(labels_pl))

            # 计算准确率 并记录
            accuracy = tf.reduce_sum(tf.cast(correct, tf.float32)) / float(BATCH_SIZE)
            tf.summary.scalar('accuracy', accuracy)

            # Get training operator
            # 获取学习率并记录
            learning_rate = get_learning_rate(batch)
            tf.summary.scalar('learning_rate', learning_rate)

            # 选择优化器
            if OPTIMIZER == 'momentum':
                optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=MOMENTUM)
            elif OPTIMIZER == 'adam':
                optimizer = tf.train.AdamOptimizer(learning_rate)

            # 训练优化梯度下降
            train_op = optimizer.minimize(loss, global_step=batch)


            # Add ops to save and restore all the variables.
            # 保存模型
            saver = tf.train.Saver()
            
        # Create a session
        # 配置tensorflow运算方式
        config = tf.ConfigProto()

        # 当使用GPU时候 Tensorflow运行自动慢慢达到最大GPU的内存
        config.gpu_options.allow_growth = True

        # 会自动分配GPU或者CPU
        config.allow_soft_placement = True

        # 打印出TensorFlow使用了那种操作
        config.log_device_placement = False

        # 启动图 以config配置
        sess = tf.Session(config=config)

        # Add summary writers
        #merged = tf.merge_all_summaries()

        # 将所有summary保存至磁盘
        merged = tf.summary.merge_all()

        # 指定文件保存train与test流程图
        train_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'train'),
                                  sess.graph)
        test_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'test'))

        # 初始化参数
        # Init variables
        init = tf.global_variables_initializer()
        # To fix the bug introduced in TF 0.12.1 as in
        # http://stackoverflow.com/questions/41543774/invalidargumenterror-for-tensor-bool-tensorflow-0-12-1
        #sess.run(init)
        sess.run(init, {is_training_pl: True})

        # 操作数字典
        ops = {'pointclouds_pl': pointclouds_pl,
               'labels_pl': labels_pl,
               'is_training_pl': is_training_pl,
               'pred': pred,
               'loss': loss,
               'train_op': train_op,
               'merged': merged,
               'step': batch}

        # 开始迭代训练
        for epoch in range(MAX_EPOCH):
            log_string('**** EPOCH %03d ****' % (epoch))

            # 刷新缓存区
            sys.stdout.flush()
             
            train_one_epoch(sess, ops, train_writer)
            eval_one_epoch(sess, ops, test_writer)
            
            # Save the variables to disk.
            if epoch % 10 == 0:
                save_path = saver.save(sess, os.path.join(LOG_DIR, "model.ckpt"))
                log_string("Model saved in file: %s" % save_path)



def train_one_epoch(sess, ops, train_writer):
    """ ops: dict mapping from string to tf ops """
    is_training = True
    
    # Shuffle train files
    train_file_idxs = np.arange(0, len(TRAIN_FILES))
    np.random.shuffle(train_file_idxs)
    
    for fn in range(len(TRAIN_FILES)):
        log_string('----' + str(fn) + '-----')
        current_data, current_label = provider.loadDataFile(TRAIN_FILES[train_file_idxs[fn]])
        current_data = current_data[:,0:NUM_POINT,:]
        current_data, current_label, _ = provider.shuffle_data(current_data, np.squeeze(current_label))            
        current_label = np.squeeze(current_label)
        
        file_size = current_data.shape[0]
        num_batches = file_size // BATCH_SIZE
        
        total_correct = 0
        total_seen = 0
        loss_sum = 0
       
        for batch_idx in range(num_batches):
            start_idx = batch_idx * BATCH_SIZE
            end_idx = (batch_idx+1) * BATCH_SIZE
            
            # Augment batched point clouds by rotation and jittering
            rotated_data = provider.rotate_point_cloud(current_data[start_idx:end_idx, :, :])
            jittered_data = provider.jitter_point_cloud(rotated_data)
            feed_dict = {ops['pointclouds_pl']: jittered_data,
                         ops['labels_pl']: current_label[start_idx:end_idx],
                         ops['is_training_pl']: is_training,}
            summary, step, _, loss_val, pred_val = sess.run([ops['merged'], ops['step'],
                ops['train_op'], ops['loss'], ops['pred']], feed_dict=feed_dict)
            train_writer.add_summary(summary, step)
            pred_val = np.argmax(pred_val, 1)
            correct = np.sum(pred_val == current_label[start_idx:end_idx])
            total_correct += correct
            total_seen += BATCH_SIZE
            loss_sum += loss_val
        
        log_string('mean loss: %f' % (loss_sum / float(num_batches)))
        log_string('accuracy: %f' % (total_correct / float(total_seen)))

        
def eval_one_epoch(sess, ops, test_writer):
    """ ops: dict mapping from string to tf ops """
    is_training = False
    total_correct = 0
    total_seen = 0
    loss_sum = 0
    total_seen_class = [0 for _ in range(NUM_CLASSES)]
    total_correct_class = [0 for _ in range(NUM_CLASSES)]
    
    for fn in range(len(TEST_FILES)):
        log_string('----' + str(fn) + '-----')
        current_data, current_label = provider.loadDataFile(TEST_FILES[fn])
        current_data = current_data[:,0:NUM_POINT,:]
        current_label = np.squeeze(current_label)
        
        file_size = current_data.shape[0]
        num_batches = file_size // BATCH_SIZE
        
        for batch_idx in range(num_batches):
            start_idx = batch_idx * BATCH_SIZE
            end_idx = (batch_idx+1) * BATCH_SIZE

            feed_dict = {ops['pointclouds_pl']: current_data[start_idx:end_idx, :, :],
                         ops['labels_pl']: current_label[start_idx:end_idx],
                         ops['is_training_pl']: is_training}
            summary, step, loss_val, pred_val = sess.run([ops['merged'], ops['step'],
                ops['loss'], ops['pred']], feed_dict=feed_dict)
            pred_val = np.argmax(pred_val, 1)
            correct = np.sum(pred_val == current_label[start_idx:end_idx])
            total_correct += correct
            total_seen += BATCH_SIZE
            loss_sum += (loss_val*BATCH_SIZE)
            for i in range(start_idx, end_idx):
                l = current_label[i]
                total_seen_class[l] += 1
                total_correct_class[l] += (pred_val[i-start_idx] == l)
            
    log_string('eval mean loss: %f' % (loss_sum / float(total_seen)))
    log_string('eval accuracy: %f'% (total_correct / float(total_seen)))
    log_string('eval avg class acc: %f' % (np.mean(np.array(total_correct_class)/np.array(total_seen_class,dtype=np.float))))
         


if __name__ == "__main__":
    train()
    LOG_FOUT.close()

  • 1
    点赞
  • 13
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值