代码地址:
https://github.com/yanx27/Pointnet_Pointnet2_pytorch/blob/master/data_utils/indoor3d_util.py
S3DIS数据集共有6个area
每个area有多个房间
每个房间内每个物体由单独的文件描述,类别名为文件名
额外的元数据文件:
anno_paths.txt
class_names.txt
从github上下载,分别是每个房间Annotations的文件名列表和数据集的类别名。
代码:
import os
import sys
import glob
import numpy as np
DATA_PATH = '/home/zhy/Dataset/S3DIS/Stanford3dDataset_v1.2_Aligned_Version/'
output_folder = '/home/zhy/Dataset/S3DIS/S3DIS_npy/'
anno_paths = [line.rstrip() for line in open('anno_paths.txt')]
anno_paths = [os.path.join(DATA_PATH, p) for p in anno_paths]
g_classes = [x.rstrip() for x in open('class_names.txt')]
g_class2label = {cls: i for i,cls in enumerate(g_classes)}
if not os.path.exists(output_folder):
os.mkdir(output_folder)
def collect_point_label(anno_path, out_filename, file_format='txt'):
""" Convert original dataset files to data_label file (each line is XYZRGBL).
We aggregated all the points from each instance in the room.
Args:
anno_path: path to annotations. e.g. Area_1/office_2/Annotations/
out_filename: path to save collected points and labels (each line is XYZRGBL)
file_format: txt or numpy, determines what file format to save.
Returns:
None
Note:
the points are shifted before save, the most negative point is now at origin.
"""
points_list = []
for f in glob.glob(os.path.join(anno_path, '*.txt')):
cls = os.path.basename(f).split('_')[0]
if cls not in g_classes: # note: in some room there is 'staris' class..
cls = 'clutter'
points = np.loadtxt(f)
labels = np.ones((points.shape[0],1)) * g_class2label[cls]
points_list.append(np.concatenate([points, labels], 1)) # Nx7
data_label = np.concatenate(points_list, 0)
xyz_min = np.amin(data_label, axis=0)[0:3]
data_label[:, 0:3] -= xyz_min
if file_format=='txt':
fout = open(out_filename, 'w')
for i in range(data_label.shape[0]):
fout.write('%f %f %f %d %d %d %d\n' % \
(data_label[i,0], data_label[i,1], data_label[i,2],
data_label[i,3], data_label[i,4], data_label[i,5],
data_label[i,6]))
fout.close()
elif file_format=='numpy':
np.save(out_filename, data_label)
else:
print('ERROR!! Unknown file format: %s, please use txt or numpy.' % \
(file_format))
exit()
# Note: there is an extra character in the v1.2 data in Area_5/hallway_6. It's fixed manually.
for anno_path in anno_paths:
print(anno_path)
try:
elements = anno_path.split('/')
out_filename = elements[-3]+'_'+elements[-2]+'.npy' # Area_1_hallway_1.npy
print(out_filename)
collect_point_label(anno_path, os.path.join(output_folder, out_filename), 'numpy')
except:
print(anno_path, 'ERROR!!')
1、读出来每个房间的文件名
anno_paths = [line.rstrip() for line in open('anno_paths.txt')]
2、读出来每个房间的物体的名字
glob.glob(os.path.join(anno_path, '*.txt'))
3、把文件名转换成点的label
labels = np.ones((points.shape[0],1)) * g_class2label[cls]
4、每个房间合并成一个.npy文件
points_list.append(np.concatenate([points, labels], 1))