参考博客:参考博客
for epoch in range(MAX_EPOCH):
log_string('**** EPOCH %03d ****' % (epoch))
sys.stdout.flush()
""" 强制刷新缓冲区"""
train_one_epoch(sess, ops, train_writer)
if epoch % 5 == 0:
acc = eval_one_epoch(sess, ops, test_writer)
acc = eval_whole_scene_one_epoch(sess, ops, test_writer)
if acc > best_acc:
best_acc = acc
save_path = saver.save(sess, os.path.join(LOG_DIR, "best_model_epoch_%03d.ckpt" % (epoch)))
log_string("Model saved in file: %s" % save_path)
构建好图结构后,进行每个epoch的训练和测试,在每5个倍数epoch进行一次测试
train_one_epoch()
def get_batch_wdp(dataset, idxs, start_idx, end_idx):
bsize = end_idx - start_idx
batch_data = np.zeros((bsize, NUM_POINT, 3))
batch_label = np.zeros((bsize, NUM_POINT), dtype=np.int32)
batch_smpw = np.zeros((bsize, NUM_POINT), dtype=np.float32)
for i in range(bsize):
ps, seg, smpw = dataset[idxs[i + start_idx]]
batch_data[i, ...] = ps
batch_label[i, :] = seg
batch_smpw[i, :] = smpw
dropout_ratio = np.random.random() * 0.875 # 0-0.875
drop_idx = np.where(np.random.random((ps.shape[0])) <= dropout_ratio)[0]
batch_data[i, drop_idx, :] = batch_data[i, 0, :]
batch_label[i, drop_idx] = batch_label[i, 0]
batch_smpw[i, drop_idx] *= 0
return batch_data, batch_label, batch_smpw
加载scannet_train.pickle文件
- 1201个场景数据集。
- 假定每个场景有n个点云,文件中有1201*n个点的标签。
- 21个类别的初始权值,初始权值计算的依据为某一类点云标签数量占所有点云的比例。
coordmax = np.max(point_set,axis=0)
coordmin = np.min(point_set,axis=0)
打乱索引,随机从中选取4个批次的场景数据(batch_size=4),批次数据逐一输入。
取出当前批次点云坐标的最大值coormax,最小值coormin
smpmin = np.maximum(coordmax-[1.5,1.5,3.0], coordmin)
smpmin[2] = coordmin[2]
coormax为场景中的右上角A点,往里采样,大小为1.51.53,smpmin是采样体素的A点的对角线点B点坐标。
smpsz = np.minimum(coordmax-smpmin,[1.5,1.5,3.0])
smpsz[2] = coordmax[2]-coordmin[2]
实际体素的尺寸大小,对角线点相减,体素大小1.5 * 1.5 * h
for i in range(10):
curcenter = point_set[np.random.choice(len(semantic_seg),1)[0],:]
curmin = curcenter-[0.75,0.75,1.5]
curmax = curcenter+[0.75,0.75,1.5]
curmin[2] = coordmin[2]
curmax[2] = coordmax[2]
curchoice = np.sum((point_set>=(curmin-0.2))*(point_set<=(curmax+0.2)),axis=1)==3
cur_point_set = point_set[curchoice,:]
cur_semantic_seg = semantic_seg[curchoice]
从当前场景点集中随机取一点作为中心点
curmin = curcenter-[0.75,0.75,1.5]
curmax = curcenter+[0.75,0.75,1.5]
curmin[2] = coordmin[2]
curmax[2] = coordmax[2]
以当前点为体素中心,采样一个1.5 * 1.5 * h大小的体素,把此体素记为V1,h为点集包围盒的高度。
curchoice = np.sum((point_set>=(curmin-0.2))*(point_set<=(curmax+0.2)),axis=1)==3
cur_point_set = point_set[curchoice,:]
cur_semantic_seg = semantic_seg[curchoice]
把体素V1的大小扩大0.2m(各边边长加0.2m),从当前场景的点云中,把此体素中的点提取出来,记为点集A
mask = np.sum((cur_point_set>=(curmin-0.01))*(cur_point_set<=(curmax+0.01)),axis=1)==3
把体素V1的大小扩大0.01m,记为体素V2,从点集A中提取出位于体素V2中的点。
vidx = np.ceil((cur_point_set[mask,:]-curmin)/(curmax-curmin)*[31.0,31.0,62.0])
vidx = np.unique(vidx[:,0]*31.0*62.0+vidx[:,1]*62.0+vidx[:,2])
isvalid = np.sum(cur_semantic_seg>0)/len(cur_semantic_seg)>=0.7 and len(vidx)/31.0/31.0/62.0>=0.02
提取出mask索引的点后,各个坐标归一化,(当前坐标点 - 最小坐标)/ (最大坐标 - 最小坐标),再乘 31,31,62。
非零语义标签占总语义标签的比例>=70%
choice = np.random.choice(len(cur_semantic_seg), self.npoints, replace=True)
point_set = cur_point_set[choice,:]
semantic_seg = cur_semantic_seg[choice]
mask = mask[choice]
sample_weight = self.labelweights[semantic_seg]
sample_weight *= mask
从体素V2中采样8912个点,部分权值为0
dropout_ratio = np.random.random()*0.875 # 丢弃比率:0-0.875
drop_idx = np.where(np.random.random((ps.shape[0]))<=dropout_ratio)[0]
batch_data[i,drop_idx,:] = batch_data[i,0,:] batch_label[i,drop_idx] = batch_label[i,0]
从8192个点中随机丢弃一些点,丢弃的点的位置坐标用第一个点的坐标代替,丢弃的点的权重设为0.有点点集丢弃比例很大,不知道这个对最终的预测效果影响大不大。
aug_data = provider.rotate_point_cloud_z(batch_data) #rotate
数据增强 :围绕Z轴随机旋转一个角度,角度在(0,2pi)里面随机产生。
summary, step, _, loss_val, pred_val = sess.run([ops['merged'], ops['step'],
ops['train_op'], ops['loss'], ops['pred']],feed_dict=feed_dict)
开始训练一个epoch的网络模型并计算损失,梯度优化,得到预测值pred_val (32,8192,21)
eval_one_epoch
total_correct = 0
total_seen = 0
loss_sum = 0
total_seen_class = [0 for _ in range(NUM_CLASSES)]
total_correct_class = [0 for _ in range(NUM_CLASSES)]
total_correct_vox = 0
total_seen_vox = 0
total_seen_class_vox = [0 for _ in range(NUM_CLASSES)]
total_correct_class_vox = [0 for _ in range(NUM_CLASSES)]
total_correct :总正确点云数
total_seen:总点云数
total_sum:计算总损失
total_seen_class
total_correct_class
total_correct_vox
total_seen_vox
total_seen_class_vox:基于体素的每一类总的点云数
total_correct_class_vox:基于体素的每一类总的正确点云数
def get_batch(dataset, idxs, start_idx, end_idx):
bsize = end_idx - start_idx
batch_data = np.zeros((bsize, NUM_POINT, 3))
batch_label = np.zeros((bsize, NUM_POINT), dtype=np.int32)
batch_smpw = np.zeros((bsize, NUM_POINT), dtype=np.float32)
for i in range(bsize):
ps, seg, smpw = dataset[idxs[i + start_idx]]
batch_data[i, ...] = ps
batch_label[i, :] = seg
batch_smpw[i, :] = smpw
return batch_data, batch_label, batch_smpw
测试部分直接用testdataset数据,不需要再进行乱序和dropout等操作。
aug_data = provider.rotate_point_cloud_z(batch_data)
数据增强:对batch_data进行z轴旋转
summary, step, loss_val, pred_val = sess.run([ops['merged'], ops['step'],ops['loss'], ops['pred']], feed_dict=feed_dict)
测试部分只需要进行模型训练以及计算损失,无优化过程,该过程为测试模型训练后的性能。
tmp, _ = np.histogram(batch_label, range(22))
labelweights += tmp
np.histogram()将生成直方图函数,batch_label为待统计数据的数组,range(22)为一个长为21的元组,表示统计范围的大小。该函数目的实质是将统计label中每一类的数量。返回两个数,tmp为每类数量,_代表21类的元组,在这里无意义不需要输出。
并将所有数都统计在labelweight中,记为test_dataset所有的实际类别数量
for l in range(NUM_CLASSES):
total_seen_class[l] += np.sum((batch_label == l) & (batch_smpw > 0))
total_correct_class[l] += np.sum((pred_val == l) & (batch_label == l) & (batch_smpw > 0))
total_seen_class[l]:统计每类个数且权值对于0
total_correct_class[l]:统计预测每类正确个数且权值大于0
def point_cloud_label_to_surface_voxel_label_fast(point_cloud, label, res=0.0484):
coordmax = np.max(point_cloud,axis=0)
coordmin = np.min(point_cloud,axis=0)
nvox = np.ceil((coordmax-coordmin)/res) #a
vidx = np.ceil((point_cloud-coordmin)/res) #b
vidx = vidx[:,0]+vidx[:,1]*nvox[0]+vidx[:,2]*nvox[0]*nvox[1] #c
uvidx, vpidx = np.unique(vidx,return_index=True) #d
if label.ndim == 1:
uvlabel = label[vpidx]
else:
assert(label.ndim == 2)
uvlabel = label[vpidx,:]
return uvidx, uvlabel, nvox
原文解释:
在体素扫描上使用完全卷积神经网络提供基线。它们完全依赖于扫描几何体而不是RGB信息,并以每个体素为基础报告精度。为了进行公平的比较,我们在所有实验中去除了RGB信息,并将点云标记预测转换为体素标记,以每个体素为基础进行报告。
- 作者为了和基于基线的方法进行精度比较,从而使用上面代码将点云label转化为体素label。
point_cloud:point_cloud是点集中权值不为0的点云。
label:是单个测试场景的实际标签和对应预测标签
a.体素的大小放大50倍,即长宽高各放大50倍,L×50=L1,W*50=W1,H×50=H1.
b.测试点云(x,y,z) (N3)减去最小坐标((x-min(x)=x1,y-min(y)=y1,z-min(z)=z1))后的x1,y1,z1各放大50倍,x150=x2, y150=y2, z150=z2
c.x+y*(场景包围盒的长度放大50倍后的数)+z*(场景包围盒的长度放大50倍后的数)(场景包围盒的宽度放大50倍后的数),vidx:N3(如:4722*3)
即:x + y*L1 +z L1w1
d. np.unique() 去除数组中的重复数字,并进行排序,return_index = True 返回原列表中第一次出现的索引值
作者在是为了和体素进行比较而将标签全部体素化,正常对点云测试我认为是不需要这个操作的。
测试集test_dataset的测试结果:
平均损失:
体素准确率:
平均类别体素准确率:
点云预测准确率:
平均类别准确率:
加权后平均类别准确率:
eval_whole_scene_one_epoch
作者为了验证密度适应的合理性,合成了类似于以下的扫描网络场景的虚拟扫描,并根据这些数据评估网络。介绍了如何生成虚拟扫描的补充材料。这类验证也不是必须的。