1 引言
PointNet论文解读已经在前文中做以总结,本文简单讲述PointNet的复现,并对源码及其架构作以总结
2 论文复现
2.1 环境配置
源码:PointNet
组件版本:tensorflowGPU1.13.1+cuda10.0.1+cudnn7.4.2
配置流程:TensorflowGPU安装
2.2 运行
1 从Anaconda Prompt中激活TensorflowGPU环境
2 进入源码根目录之下,运行train.py
3 在dump文件夹之下查看错误训练结果
上述错误图片命名格式为错误序号_预测值_pred_真实值
4 进入part_seg文件夹,运行sh download_data.sh
下载ShapeNetPart数据以及HDF5数据
5 运行python train.py进行训练,结束后运行test.py进行验证。
6 进入test_result查看部分分割结果
每一组分割目标以序号区别,每组中,log文件为分割结果描述,diff为分割有误的点,gt为groundtruth真是值,pred为预测值。使用pointcloud compare软件打开这些obj文件。
7 同样的方法,进入sem_seg,下载数据并进行训练与测试,在log文件夹之下的dump中查看语义分割的结果。
3 源码解读
主要是对models文件夹中的三个文件进行解读。
3.1 pointnet_cls.py 点云分类
import tensorflow as tf
import numpy as np
import math
import sys
import os
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(BASE_DIR)
sys.path.append(os.path.join(BASE_DIR, '../utils'))
import tf_util
from transform_nets import input_transform_net, feature_transform_net
# 输入batch大小以及点数量 返回点云矩阵以及标签矩阵
def placeholder_inputs(batch_size, num_point):
pointclouds_pl = tf.placeholder(tf.float32, shape=(batch_size, num_point, 3))
labels_pl = tf.placeholder(tf.int32, shape=(batch_size))
return pointclouds_pl, labels_pl
def get_model(point_cloud, is_training, bn_decay=None):
""" Classification PointNet, input is BxNx3, output Bx40 """
# 获取batchsize以及点数量
batch_size = point_cloud.get_shape()[0].value
num_point = point_cloud.get_shape()[1].value
# 存储原始特征
end_points = {}
# 使用T-Net获取第一次点云转换的矩阵[B,3,3]
with tf.variable_scope('transform_net1') as sc:
transform = input_transform_net(point_cloud, is_training, bn_decay, K=3)
# 原始点云变换[B,n,3,1]*[B,3,3]=[B,n,3,1]
point_cloud_transformed = tf.matmul(point_cloud, transform)
# 扩充点云数据维度 便于后续卷积操作
input_image = tf.expand_dims(point_cloud_transformed, -1)
# 第一次卷积 输入[B,n,3,1] 64个大小为[1,3]的卷积核 输出[B,n,1,64]
net = tf_util.conv2d(input_image, 64, [1,3],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv1', bn_decay=bn_decay)
# 第二次卷积 输入[B,n,1,64] 64个大小为[1,1]的卷积核 输出[B,n,1,64]
net = tf_util.conv2d(net, 64, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv2', bn_decay=bn_decay)
# 使用T-Net获取第二次点云转换的矩阵[B,64,64]
with tf.variable_scope('transform_net2') as sc:
transform = feature_transform_net(net, is_training, bn_decay, K=64)
# 记录原始特征
end_points['transform'] = transform
# squeeze之后net维度为[batch_size, num_point, 64] 想乘得以变换
net_transformed = tf.matmul(tf.squeeze(net, axis=[2]), transform)
# 指定第二个维度并膨胀为4维张量[batch_size, num_point, 1, 64]
net_transformed = tf.expand_dims(net_transformed, [2])
# 第三次卷积 输入[B,n,1,64] 64个大小为[1,1]的卷积核 输出[B,n,1,64]
net = tf_util.conv2d(net_transformed, 64, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv3', bn_decay=bn_decay)
# 第四次卷积 输入[B,n,1,64] 128个大小为[1,1]的卷积核 输出[B,n,1,128]
net = tf_util.conv2d(net, 128, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv4', bn_decay=bn_decay)
# 第五次卷积 输入[B,n,1,128] 1024个大小为[1,1]的卷积核 输出[B,n,1,1024]
net = tf_util.conv2d(net, 1024, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv5', bn_decay=bn_decay)
# 最大池化函数 输入[B,n,1,1024] 池化层[n,1] 输出[B,1,1,1024]
# 即每一个batch通过maxpooling保留下了1024个特征中最max(明显)的值
# Symmetric function: max pooling
net = tf_util.max_pool2d(net, [num_point,1],
padding='VALID', scope='maxpool')
# reshape为二维张量[B,1024]
net = tf.reshape(net, [batch_size, -1])
# 全连接层 全连接层 输入[B,1024] 先转为[B,512] 在转为[B,256] 最后转为[B,40] 每一次全连接都要进行一次drop out正则化
net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training,
scope='fc1', bn_decay=bn_decay)
net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training,
scope='dp1')
net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training,
scope='fc2', bn_decay=bn_decay)
net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training,
scope='dp2')
net = tf_util.fully_connected(net, 40, activation_fn=None, scope='fc3')
return net, end_points
def get_loss(pred, label, end_points, reg_weight=0.001):
""" pred: B*NUM_CLASSES,
label: B, """
# 求出预测值最后一层的softmax分类值 并与真是情况做一个交叉熵求loss
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=label)
# 求平均值
classify_loss = tf.reduce_mean(loss)
# 记录值 后续tensorbroad将予以展示
tf.summary.scalar('classify loss', classify_loss)
# Enforce the transformation as orthogonal matrix
# 取出变换矩阵
transform = end_points['transform'] # BxKxK
# 取出k
K = transform.get_shape()[1].value
#
mat_diff = tf.matmul(transform, tf.transpose(transform, perm=[0,2,1]))
mat_diff -= tf.constant(np.eye(K), dtype=tf.float32)
mat_diff_loss = tf.nn.l2_loss(mat_diff)
tf.summary.scalar('mat loss', mat_diff_loss)
#损失函数有两部分构成 一是分类损失 二是Tnet转换损失
return classify_loss + mat_diff_loss * reg_weight
if __name__=='__main__':
with tf.Graph().as_default():
inputs = tf.zeros((32,1024,3))
outputs = get_model(inputs, tf.constant(True))
print(outputs)
3.2 pointnet_seg.py 分割
import tensorflow as tf
import numpy as np
import math
import sys
import os
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(BASE_DIR)
sys.path.append(os.path.join(BASE_DIR, '../utils'))
import tf_util
from transform_nets import input_transform_net, feature_transform_net
def placeholder_inputs(batch_size, num_point):
pointclouds_pl = tf.placeholder(tf.float32,
shape=(batch_size, num_point, 3))
labels_pl = tf.placeholder(tf.int32,
shape=(batch_size, num_point))
return pointclouds_pl, labels_pl
def get_model(point_cloud, is_training, bn_decay=None):
""" Classification PointNet, input is BxNx3, output BxNx50 """
# 获取Batchsize以及点数量
batch_size = point_cloud.get_shape()[0].value
num_point = point_cloud.get_shape()[1].value
# 记录原始特征
end_points = {}
# 获取第一次点云变换矩阵
with tf.variable_scope('transform_net1') as sc:
transform = input_transform_net(point_cloud, is_training, bn_decay, K=3)
# 点云相乘仿射变换
point_cloud_transformed = tf.matmul(point_cloud, transform)
# 维度扩充 便于后续卷积操作
input_image = tf.expand_dims(point_cloud_transformed, -1)
# 第一次卷积 输入[B,n,3,1] 64个大小为[1,3]的卷积核 输出[B,n,1,64]
net = tf_util.conv2d(input_image, 64, [1,3],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv1', bn_decay=bn_decay)
# 第二次卷积 输入[B,n,1,64] 64个大小为[1,1]的卷积核 输出[B,n,1,64]
net = tf_util.conv2d(net, 64, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv2', bn_decay=bn_decay)
# 获取第二次点云变化的矩阵
with tf.variable_scope('transform_net2') as sc:
transform = feature_transform_net(net, is_training, bn_decay, K=64)
# 记录原始点云特征
end_points['transform'] = transform
# 去除为1的维度 相乘仿射变换
net_transformed = tf.matmul(tf.squeeze(net, axis=[2]), transform)
# 维度扩充
point_feat = tf.expand_dims(net_transformed, [2])
print(point_feat)
# 第三次卷积 输入[B,n,1,64] 64个大小为[1,1]的卷积核 输出[B,n,1,64]
net = tf_util.conv2d(point_feat, 64, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv3', bn_decay=bn_decay)
# 第四次卷积 输入[B,n,1,64] 128个大小为[1,1]的卷积核 输出[B,n,1,128]
net = tf_util.conv2d(net, 128, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv4', bn_decay=bn_decay)
# 第五次卷积 输入[B,n,1,128] 1024个大小为[1,1]的卷积核 输出[B,n,1,1024]
net = tf_util.conv2d(net, 1024, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv5', bn_decay=bn_decay)
# 最大池化函数 输入[B,n,1,1024] 池化层[n,1] 输出[B,1,1,1024]
# 即每一个batch通过maxpooling保留下了1024个特征中最max(明显)的值
global_feat = tf_util.max_pool2d(net, [num_point,1],
padding='VALID', scope='maxpool')
print(global_feat)
# 利用tile函数对全局池化张量每一维度进行乘法扩充 倍数为[1, num_point, 1, 1]
global_feat_expand = tf.tile(global_feat, [1, num_point, 1, 1])
# 全局扩充池化张量[B,n,1,1024](全局特征)与点张量连接[B,n,1,64] 形成联合张量[B,n,1,1088]
concat_feat = tf.concat(3, [point_feat, global_feat_expand])
print(concat_feat)
# 第六次卷积 输入[B,n,1,1088] 512个大小为[1,1]的卷积核 输出[B,n,1,512]
net = tf_util.conv2d(concat_feat, 512, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv6', bn_decay=bn_decay)
# 第七次卷积 输入[B,n,1,512] 256个大小为[1,1]的卷积核 输出[B,n,1,256]
net = tf_util.conv2d(net, 256, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv7', bn_decay=bn_decay)
# 第八次卷积 输入[B,n,1,256] 128个大小为[1,1]的卷积核 输出[B,n,1,128]
net = tf_util.conv2d(net, 128, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv8', bn_decay=bn_decay)
# 第九次卷积 输入[B,n,1,128] 128个大小为[1,1]的卷积核 输出[B,n,1,128]
net = tf_util.conv2d(net, 128, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv9', bn_decay=bn_decay)
# 第十次卷积 输入[B,n,1,128] 50个大小为[1,1]的卷积核 输出[B,n,1,50]
net = tf_util.conv2d(net, 50, [1,1],
padding='VALID', stride=[1,1], activation_fn=None,
scope='conv10')
# 删除为1的维度 输出[B,n,50]
net = tf.squeeze(net, [2]) # BxNxC
return net, end_points
def get_loss(pred, label, end_points, reg_weight=0.001):
""" pred: BxNxC,
label: BxN, """
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=label)
classify_loss = tf.reduce_mean(loss)
tf.scalar_summary('classify loss', classify_loss)
# Enforce the transformation as orthogonal matrix
transform = end_points['transform'] # BxKxK
K = transform.get_shape()[1].value
mat_diff = tf.matmul(transform, tf.transpose(transform, perm=[0,2,1]))
mat_diff -= tf.constant(np.eye(K), dtype=tf.float32)
mat_diff_loss = tf.nn.l2_loss(mat_diff)
tf.scalar_summary('mat_loss', mat_diff_loss)
return classify_loss + mat_diff_loss * reg_weight
if __name__=='__main__':
with tf.Graph().as_default():
inputs = tf.zeros((32,1024,3))
outputs = get_model(inputs, tf.constant(True))
print(outputs)
3.3 transform_nets.py 点云以及特征转换
import tensorflow as tf
import numpy as np
import sys
import os
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(BASE_DIR)
sys.path.append(os.path.join(BASE_DIR, '../utils'))
import tf_util
def input_transform_net(point_cloud, is_training, bn_decay=None, K=3):
""" Input (XYZ) Transform Net, input is BxNx3 gray image
Return:
Transformation matrix of size 3xK """
# 获取batchsize以及点数量
batch_size = point_cloud.get_shape()[0].value
num_point = point_cloud.get_shape()[1].value
# 扩充点云数据维度,便于后续卷积操作
input_image = tf.expand_dims(point_cloud, -1)
# 第一次卷积,输入[B,n,3,1],卷积核[1,3],数量64个,输出[B,n,1,64]
net = tf_util.conv2d(input_image, 64, [1,3],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='tconv1', bn_decay=bn_decay)
# 第二次卷积,输入[B,n,1,64],卷积核[1,1],数量128个,输出[B,n,1,128]
net = tf_util.conv2d(net, 128, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='tconv2', bn_decay=bn_decay)
# 第三次卷积,输入[B,n,1,128],卷积核[1,1],数量1024个,输出[B,n,1,1024]
net = tf_util.conv2d(net, 1024, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='tconv3', bn_decay=bn_decay)
# 池化操作,输入[B,n,1,1024],池化层[n,1],输出[B,1,1,1024]
# 即每一个batch通过maxpooling保留下了1024个特征中最max(明显)的值
net = tf_util.max_pool2d(net, [num_point,1],
padding='VALID', scope='tmaxpool')
# 拼接所有batch保留下来的特征值形成二维张量
net = tf.reshape(net, [batch_size, -1])
# 全连接层,输入[B,1024],先转为[B,512],在转为[B,256]。
net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training,
scope='tfc1', bn_decay=bn_decay)
net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training,
scope='tfc2', bn_decay=bn_decay)
# 全连接层,[B,256]*w[256, 3*K]+b[3*K]得到[B,3*K]
with tf.variable_scope('transform_XYZ') as sc:
assert(K==3)
weights = tf.get_variable('weights', [256, 3*K],
initializer=tf.constant_initializer(0.0),
dtype=tf.float32)
biases = tf.get_variable('biases', [3*K],
initializer=tf.constant_initializer(0.0),
dtype=tf.float32)
biases += tf.constant([1,0,0,0,1,0,0,0,1], dtype=tf.float32)
transform = tf.matmul(net, weights)
transform = tf.nn.bias_add(transform, biases)
# [B,3*K]转为[B,3,K]
transform = tf.reshape(transform, [batch_size, 3, K])
return transform
def feature_transform_net(inputs, is_training, bn_decay=None, K=64):
""" Feature Transform Net, input is BxNx1xK
Return:
Transformation matrix of size KxK """
# 获取batchsize以及点个数
batch_size = inputs.get_shape()[0].value
num_point = inputs.get_shape()[1].value
# 经由前叙MLP(64,64)生成[B,n,1,64]
# 第一次卷积 输入[B,n,1,K] 64个为大小为[1,1]的卷积核 输出[B,n,1,64]
net = tf_util.conv2d(inputs, 64, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='tconv1', bn_decay=bn_decay)
# 第二次卷积 输入[B,n,1,64] 128个为大小为[1,1]的卷积核 输出[B,n,1,128]
net = tf_util.conv2d(net, 128, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='tconv2', bn_decay=bn_decay)
# 第三次卷积 输入[B,n,1,1024] 1024个为大小为[1,1]的卷积核 输出[B,n,1,1024]
net = tf_util.conv2d(net, 1024, [1,1],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='tconv3', bn_decay=bn_decay)
# 池化操作 输入[B,n,1,1024] 池化层[n,1] 输出[B,1,1,1024]
# 即每一个batch通过maxpooling保留下了1024个特征中最max(明显)的值
net = tf_util.max_pool2d(net, [num_point,1],
padding='VALID', scope='tmaxpool')
# 拼接所有batch保留下来的特征值形成二维张量
net = tf.reshape(net, [batch_size, -1])
# 全连接层 输入[B,1024] 先转为[B,512] 在转为[B,256]。
net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training,
scope='tfc1', bn_decay=bn_decay)
net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training,
scope='tfc2', bn_decay=bn_decay)
# 全连接层 [B,256]*w[256,K*K]+b[K*K]得到[B,K*K]
with tf.variable_scope('transform_feat') as sc:
weights = tf.get_variable('weights', [256, K*K],
initializer=tf.constant_initializer(0.0),
dtype=tf.float32)
biases = tf.get_variable('biases', [K*K],
initializer=tf.constant_initializer(0.0),
dtype=tf.float32)
biases += tf.constant(np.eye(K).flatten(), dtype=tf.float32)
transform = tf.matmul(net, weights)
transform = tf.nn.bias_add(transform, biases)
# [B,K*K]转为[B,K,K]
transform = tf.reshape(transform, [batch_size, K, K])
return transform
3.4主目录下的train.py
import argparse
import math
import h5py
import numpy as np
import tensorflow as tf
import socket
import importlib
import os
import sys
# 通过文件的绝对路径 获取该文件所在目录的名称
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
# 将BASE_DIR永久添加至搜索路径集
sys.path.append(BASE_DIR)
# 通过将models与utils和BASE_DIR拼接 获取这两个目录 并将其永久添加至搜索路径集
sys.path.append(os.path.join(BASE_DIR, 'models'))
sys.path.append(os.path.join(BASE_DIR, 'utils'))
import provider
import tf_util
# 定义命令行接口 创建一个解析器
parser = argparse.ArgumentParser()
# 定义数据信息
parser.add_argument('--gpu', type=int, default=0, help='GPU to use [default: GPU 0]')
parser.add_argument('--model', default='pointnet_cls', help='Model name: pointnet_cls or pointnet_cls_basic [default: pointnet_cls]')
parser.add_argument('--log_dir', default='log', help='Log dir [default: log]')
parser.add_argument('--num_point', type=int, default=1024, help='Point Number [256/512/1024/2048] [default: 1024]')
parser.add_argument('--max_epoch', type=int, default=250, help='Epoch to run [default: 250]')
parser.add_argument('--batch_size', type=int, default=32, help='Batch Size during training [default: 32]')
parser.add_argument('--learning_rate', type=float, default=0.001, help='Initial learning rate [default: 0.001]')
parser.add_argument('--momentum', type=float, default=0.9, help='Initial learning rate [default: 0.9]')
parser.add_argument('--optimizer', default='adam', help='adam or momentum [default: adam]')
parser.add_argument('--decay_step', type=int, default=200000, help='Decay step for lr decay [default: 200000]')
parser.add_argument('--decay_rate', type=float, default=0.7, help='Decay rate for lr decay [default: 0.8]')
# Flag为上述参数的命名空间 命名空间里包含了上述参数对象 这些对象被转换为FLAGS命名空间的属性
FLAGS = parser.parse_args()
# 获取参数
BATCH_SIZE = FLAGS.batch_size
NUM_POINT = FLAGS.num_point
MAX_EPOCH = FLAGS.max_epoch
BASE_LEARNING_RATE = FLAGS.learning_rate
GPU_INDEX = FLAGS.gpu
MOMENTUM = FLAGS.momentum
OPTIMIZER = FLAGS.optimizer
DECAY_STEP = FLAGS.decay_step
DECAY_RATE = FLAGS.decay_rate
# 导入model模块 依据上述默认值 这里的model为models文件夹之下的pointnet_cls
MODEL = importlib.import_module(FLAGS.model) # import network module
# 联结名称 将pointnet_cls.py的路径永久加入默认搜索路径
MODEL_FILE = os.path.join(BASE_DIR, 'models', FLAGS.model+'.py')
# 获取日志文件名
LOG_DIR = FLAGS.log_dir
# 如果该文件不存在 则创建文件名为LOG_DIR的文件存放日志
if not os.path.exists(LOG_DIR): os.mkdir(LOG_DIR)
# 将pointnet_cls.py与train.py内的文件全部复制到log日志中
os.system('cp %s %s' % (MODEL_FILE, LOG_DIR)) # bkp of model def
os.system('cp train.py %s' % (LOG_DIR)) # bkp of train procedure
# 创建LOG_FOUT容器并打开 开始写入日志书记 先将FLAGS中的内容写入log_train.txt中
LOG_FOUT = open(os.path.join(LOG_DIR, 'log_train.txt'), 'w')
LOG_FOUT.write(str(FLAGS)+'\n')
# 定义最大点数量以及种类数量
MAX_NUM_POINT = 2048
NUM_CLASSES = 40
# 定义相应学习率动态衰减参数
BN_INIT_DECAY = 0.5
BN_DECAY_DECAY_RATE = 0.5
BN_DECAY_DECAY_STEP = float(DECAY_STEP)
BN_DECAY_CLIP = 0.99
# 获取本机主机名与IP
HOSTNAME = socket.gethostname()
# 获取训练数据与测试数据
# provider.py 为modelnet40数据自带python文件 用以对modelnet40进行一些操作
# ModelNet40 official train/test split
TRAIN_FILES = provider.getDataFiles( \
os.path.join(BASE_DIR, 'data/modelnet40_ply_hdf5_2048/train_files.txt'))
TEST_FILES = provider.getDataFiles(\
os.path.join(BASE_DIR, 'data/modelnet40_ply_hdf5_2048/test_files.txt'))
#
def log_string(out_str):
LOG_FOUT.write(out_str+'\n')
LOG_FOUT.flush()
print(out_str)
def get_learning_rate(batch):
learning_rate = tf.train.exponential_decay(
BASE_LEARNING_RATE, # Base learning rate.
batch * BATCH_SIZE, # Current index into the dataset.
DECAY_STEP, # Decay step.
DECAY_RATE, # Decay rate.
staircase=True)
learning_rate = tf.maximum(learning_rate, 0.00001) # CLIP THE LEARNING RATE!
return learning_rate
def get_bn_decay(batch):
bn_momentum = tf.train.exponential_decay(
BN_INIT_DECAY,
batch*BATCH_SIZE,
BN_DECAY_DECAY_STEP,
BN_DECAY_DECAY_RATE,
staircase=True)
bn_decay = tf.minimum(BN_DECAY_CLIP, 1 - bn_momentum)
return bn_decay
def train():
# 定义计算图 设置为默认图
with tf.Graph().as_default():
# 指定以第GPU_INDEX块GPU对该图进行计算
with tf.device('/gpu:'+str(GPU_INDEX)):
# 定义点云矩阵占位符以及标签矩阵占位符
pointclouds_pl, labels_pl = MODEL.placeholder_inputs(BATCH_SIZE, NUM_POINT)
# 定义是否训练占位符
is_training_pl = tf.placeholder(tf.bool, shape=())
# 打印训练标志
print(is_training_pl)
# Note the global_step=batch parameter to minimize.
# That tells the optimizer to helpfully increment the 'batch' parameter for you every time it trains.
# 定义batch为零 传入get_bn_decay()函数使得学习率下降失效
batch = tf.Variable(0)
bn_decay = get_bn_decay(batch)
# 将学习率记录下来 tensorbroad将予以显示
tf.summary.scalar('bn_decay', bn_decay)
# Get model and loss
# 调用get_model函数 进行一次前向传播 得到预测值矩阵以及状态
pred, end_points = MODEL.get_model(pointclouds_pl, is_training_pl, bn_decay=bn_decay)
# 计算损失 并记录 用于tensorbroad
loss = MODEL.get_loss(pred, labels_pl, end_points)
tf.summary.scalar('loss', loss)
# 求出预测值每一列最大值所在的索引 以索引代替表示种类 并与真是值做比较以判断是否为真
correct = tf.equal(tf.argmax(pred, 1), tf.to_int64(labels_pl))
# 计算准确率 并记录
accuracy = tf.reduce_sum(tf.cast(correct, tf.float32)) / float(BATCH_SIZE)
tf.summary.scalar('accuracy', accuracy)
# Get training operator
# 获取学习率并记录
learning_rate = get_learning_rate(batch)
tf.summary.scalar('learning_rate', learning_rate)
# 选择优化器
if OPTIMIZER == 'momentum':
optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=MOMENTUM)
elif OPTIMIZER == 'adam':
optimizer = tf.train.AdamOptimizer(learning_rate)
# 训练优化梯度下降
train_op = optimizer.minimize(loss, global_step=batch)
# Add ops to save and restore all the variables.
# 保存模型
saver = tf.train.Saver()
# Create a session
# 配置tensorflow运算方式
config = tf.ConfigProto()
# 当使用GPU时候 Tensorflow运行自动慢慢达到最大GPU的内存
config.gpu_options.allow_growth = True
# 会自动分配GPU或者CPU
config.allow_soft_placement = True
# 打印出TensorFlow使用了那种操作
config.log_device_placement = False
# 启动图 以config配置
sess = tf.Session(config=config)
# Add summary writers
#merged = tf.merge_all_summaries()
# 将所有summary保存至磁盘
merged = tf.summary.merge_all()
# 指定文件保存train与test流程图
train_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'train'),
sess.graph)
test_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'test'))
# 初始化参数
# Init variables
init = tf.global_variables_initializer()
# To fix the bug introduced in TF 0.12.1 as in
# http://stackoverflow.com/questions/41543774/invalidargumenterror-for-tensor-bool-tensorflow-0-12-1
#sess.run(init)
sess.run(init, {is_training_pl: True})
# 操作数字典
ops = {'pointclouds_pl': pointclouds_pl,
'labels_pl': labels_pl,
'is_training_pl': is_training_pl,
'pred': pred,
'loss': loss,
'train_op': train_op,
'merged': merged,
'step': batch}
# 开始迭代训练
for epoch in range(MAX_EPOCH):
log_string('**** EPOCH %03d ****' % (epoch))
# 刷新缓存区
sys.stdout.flush()
train_one_epoch(sess, ops, train_writer)
eval_one_epoch(sess, ops, test_writer)
# Save the variables to disk.
if epoch % 10 == 0:
save_path = saver.save(sess, os.path.join(LOG_DIR, "model.ckpt"))
log_string("Model saved in file: %s" % save_path)
def train_one_epoch(sess, ops, train_writer):
""" ops: dict mapping from string to tf ops """
is_training = True
# Shuffle train files
train_file_idxs = np.arange(0, len(TRAIN_FILES))
np.random.shuffle(train_file_idxs)
for fn in range(len(TRAIN_FILES)):
log_string('----' + str(fn) + '-----')
current_data, current_label = provider.loadDataFile(TRAIN_FILES[train_file_idxs[fn]])
current_data = current_data[:,0:NUM_POINT,:]
current_data, current_label, _ = provider.shuffle_data(current_data, np.squeeze(current_label))
current_label = np.squeeze(current_label)
file_size = current_data.shape[0]
num_batches = file_size // BATCH_SIZE
total_correct = 0
total_seen = 0
loss_sum = 0
for batch_idx in range(num_batches):
start_idx = batch_idx * BATCH_SIZE
end_idx = (batch_idx+1) * BATCH_SIZE
# Augment batched point clouds by rotation and jittering
rotated_data = provider.rotate_point_cloud(current_data[start_idx:end_idx, :, :])
jittered_data = provider.jitter_point_cloud(rotated_data)
feed_dict = {ops['pointclouds_pl']: jittered_data,
ops['labels_pl']: current_label[start_idx:end_idx],
ops['is_training_pl']: is_training,}
summary, step, _, loss_val, pred_val = sess.run([ops['merged'], ops['step'],
ops['train_op'], ops['loss'], ops['pred']], feed_dict=feed_dict)
train_writer.add_summary(summary, step)
pred_val = np.argmax(pred_val, 1)
correct = np.sum(pred_val == current_label[start_idx:end_idx])
total_correct += correct
total_seen += BATCH_SIZE
loss_sum += loss_val
log_string('mean loss: %f' % (loss_sum / float(num_batches)))
log_string('accuracy: %f' % (total_correct / float(total_seen)))
def eval_one_epoch(sess, ops, test_writer):
""" ops: dict mapping from string to tf ops """
is_training = False
total_correct = 0
total_seen = 0
loss_sum = 0
total_seen_class = [0 for _ in range(NUM_CLASSES)]
total_correct_class = [0 for _ in range(NUM_CLASSES)]
for fn in range(len(TEST_FILES)):
log_string('----' + str(fn) + '-----')
current_data, current_label = provider.loadDataFile(TEST_FILES[fn])
current_data = current_data[:,0:NUM_POINT,:]
current_label = np.squeeze(current_label)
file_size = current_data.shape[0]
num_batches = file_size // BATCH_SIZE
for batch_idx in range(num_batches):
start_idx = batch_idx * BATCH_SIZE
end_idx = (batch_idx+1) * BATCH_SIZE
feed_dict = {ops['pointclouds_pl']: current_data[start_idx:end_idx, :, :],
ops['labels_pl']: current_label[start_idx:end_idx],
ops['is_training_pl']: is_training}
summary, step, loss_val, pred_val = sess.run([ops['merged'], ops['step'],
ops['loss'], ops['pred']], feed_dict=feed_dict)
pred_val = np.argmax(pred_val, 1)
correct = np.sum(pred_val == current_label[start_idx:end_idx])
total_correct += correct
total_seen += BATCH_SIZE
loss_sum += (loss_val*BATCH_SIZE)
for i in range(start_idx, end_idx):
l = current_label[i]
total_seen_class[l] += 1
total_correct_class[l] += (pred_val[i-start_idx] == l)
log_string('eval mean loss: %f' % (loss_sum / float(total_seen)))
log_string('eval accuracy: %f'% (total_correct / float(total_seen)))
log_string('eval avg class acc: %f' % (np.mean(np.array(total_correct_class)/np.array(total_seen_class,dtype=np.float))))
if __name__ == "__main__":
train()
LOG_FOUT.close()