本文参考: 用modelnet40_ply_hdf5_2048数据集制作txt点云数据集(抽取特定类别)_好好学习嘻嘻的博客-CSDN博客
最近要做modelnet10的分类问题,发现网上没有modelnet10的hd5文件资源,自己使用off文件又不会制作,那就使用modelnet40_ply_hdf5_2048制作我们的modelnet10_ply_hdf5_2048文件吧。只看到有人使用modelnet40_ply_hdf5_2048制作了每个点云的txt文档,具体如何制作hd5文件没有其他的资料。本文简单的制作了一下,如有不对的地方,欢迎指正。
modelnet40_ply_hdf5_2048数据集下载地址为:https://shapenet.cs.stanford.edu/media/modelnet40_ply_hdf5_2048.zip
打开和创建 h5py 文件
新建一个hdf5文件,文件名是tutorial.h5, 写的模式,描述为test file。
f = open_file('tutorial.h5',mode='w',title='test file')
HDF5 文件通常像标准 Python 文件对象一样工作。它们支持 r / w / a 等标准模式,并且在不再使用时应关闭。但是,显然没有“text”与“binary”模式的概念。
文件名可以是字节字符串或 unicode 字符串。有效mode
是:
mode | 说明 |
---|---|
r | 只读,文件必须存在 |
r+ | 读 / 写,文件必须存在 |
w | 创建文件,已经存在的文件会被覆盖掉 |
w- / x | 创建文件,文件如果已经存在则出错 |
a | 打开已经存在的文件进行读 / 写,如果不存在则创建一个新文件读 / 写(默认) |
在当前目录下会生成一个 myh5py.hdf5 文件
import os
import sys
import numpy as np
import h5py
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.clear()
sys.path.append(BASE_DIR)
print(sys.path)
print(BASE_DIR)
# label2sub_class = (1, 2, 3, 4, 5, 6, 7, 8, 9)
label2sub_class = (2, 4, 8, 12, 14, 19, 28, 30, 33, 38)
def getDataFiles(list_filename):
return [line.rstrip() for line in open(list_filename)]
def load_h5(h5_filename):
h5f = h5py.File(h5_filename)
data = h5f['data'][:]
label = h5f['label'][:]
normal = h5f['normal'][:]
return data, label, normal
def loadDataFile(filename):
return load_h5(filename)
# 写入文件
def write_data2h5(file_name, label, h5data, h5normal):
h5f = h5py.File(file_name, 'w')
h5f['label'] = label
h5f['data'] = h5data
h5f['normal'] = h5normal
h5f.close()
# h5f.create_dataset(label, data=h5data)
# 由于 train_files 的路径设定为 .../data/modelnet40_ply_hdf5_2048/...',在这里我们不做修改
TRAIN_FILES = getDataFiles(os.path.join(BASE_DIR, 'data/modelnet40_ply_hdf5_2048/train_files.txt'))
TEST_FILES = getDataFiles(os.path.join(BASE_DIR, 'data/modelnet40_ply_hdf5_2048/test_files.txt'))
# 本人的存储路径,modelnet40_ply_hdf5_2048有5个train文件,在这里也是5个。两个test文件,本人如是
# ...//data/mydata/...为本人存放数据的路径
filename_train0 = BASE_DIR + '/data/mydata/ply_data_train0.h5' # 创建点云的路径
filename_train1 = BASE_DIR + '/data/mydata/ply_data_train1.h5' # 创建点云的路径
filename_train2 = BASE_DIR + '/data/mydata/ply_data_train2.h5' # 创建点云的路径
filename_train3 = BASE_DIR + '/data/mydata/ply_data_train3.h5' # 创建点云的路径
filename_train4 = BASE_DIR + '/data/mydata/ply_data_train4.h5' # 创建点云的路径
filename_test0 = BASE_DIR + '/data/mydata/ply_data_test0.h5' # 创建点云的路径
filename_test1 = BASE_DIR + '/data/mydata/ply_data_test1.h5' # 创建点云的路径
file_length2train = len(TRAIN_FILES)
file_length2test = len(TEST_FILES)
label_list2sub_class = []
data_list2sub_class = []
normal_list2sub_class = []
for fn in range(len(TRAIN_FILES)):
current_data, current_label, current_normal = loadDataFile(TRAIN_FILES[fn])
current_label = np.squeeze(current_label)
label_length = len(current_label)
if fn == 0:
for j in range(label_length):
label = current_label[j]
if label in label2sub_class:
data = current_data[j]
normal = current_normal[j]
label_list2sub_class.append(current_label[j])
data_list2sub_class.append(current_data[j])
normal_list2sub_class.append(current_normal[j])
write_data2h5(filename_train0, label_list2sub_class, data_list2sub_class, normal_list2sub_class)
label_list2sub_class.clear()
data_list2sub_class.clear()
normal_list2sub_class.clear()
elif fn == 1:
for j in range(label_length):
label = current_label[j]
if label in label2sub_class:
label_list2sub_class.append(current_label[j])
data_list2sub_class.append(current_data[j])
normal_list2sub_class.append(current_normal[j])
write_data2h5(filename_train1, label_list2sub_class, data_list2sub_class, normal_list2sub_class)
label_list2sub_class.clear()
data_list2sub_class.clear()
normal_list2sub_class.clear()
elif fn == 2:
for j in range(label_length):
label = current_label[j]
if label in label2sub_class:
label_list2sub_class.append(current_label[j])
data_list2sub_class.append(current_data[j])
normal_list2sub_class.append(current_normal[j])
write_data2h5(filename_train2, label_list2sub_class, data_list2sub_class, normal_list2sub_class)
label_list2sub_class.clear()
data_list2sub_class.clear()
normal_list2sub_class.clear()
elif fn == 3:
for j in range(label_length):
label = current_label[j]
if label in label2sub_class:
label_list2sub_class.append(current_label[j])
data_list2sub_class.append(current_data[j])
normal_list2sub_class.append(current_normal[j])
write_data2h5(filename_train3, label_list2sub_class, data_list2sub_class, normal_list2sub_class)
label_list2sub_class.clear()
data_list2sub_class.clear()
normal_list2sub_class.clear()
elif fn == 4:
for j in range(label_length):
label = current_label[j]
if label in label2sub_class:
label_list2sub_class.append(current_label[j])
data_list2sub_class.append(current_data[j])
normal_list2sub_class.append(current_normal[j])
write_data2h5(filename_train4, label_list2sub_class, data_list2sub_class, normal_list2sub_class)
label_list2sub_class.clear()
data_list2sub_class.clear()
normal_list2sub_class.clear()
for fn in range(len(TEST_FILES)):
current_data, current_label, current_normal=loadDataFile(TEST_FILES[fn])
current_label = np.squeeze(current_label)
label_length2test = len(current_label)
if fn == 0:
for j in range(label_length2test):
label = current_label[j]
if label in label2sub_class:
label_list2sub_class.append(current_label[j])
data_list2sub_class.append(current_data[j])
normal_list2sub_class.append(current_normal[j])
write_data2h5(filename_test0, label_list2sub_class, data_list2sub_class, normal_list2sub_class)
label_list2sub_class.clear()
data_list2sub_class.clear()
normal_list2sub_class.clear()
else:
for j in range(label_length2test):
label = current_label[j]
if label in label2sub_class:
label_list2sub_class.append(current_label[j])
data_list2sub_class.append(current_data[j])
normal_list2sub_class.append(current_normal[j])
write_data2h5(filename_test1, label_list2sub_class, data_list2sub_class, normal_list2sub_class)
label_list2sub_class.clear()
data_list2sub_class.clear()
normal_list2sub_class.clear()
上面的代码比较冗长,low了点,本人就不删了,便于理解,下面为简洁的。
import os
import sys
import numpy as np
import h5py
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.clear()
sys.path.append(BASE_DIR)
print(sys.path)
print(BASE_DIR)
# label2sub_class = (1, 2, 3, 4, 5, 6, 7, 8, 9)
label2sub_class = (2, 4, 8, 12, 14, 19, 28, 30, 33, 38)
def getDataFiles(list_filename):
return [line.rstrip() for line in open(list_filename)]
def load_h5(h5_filename):
h5f = h5py.File(h5_filename)
data = h5f['data'][:]
label = h5f['label'][:]
normal = h5f['normal'][:]
return data, label, normal
def loadDataFile(filename):
return load_h5(filename)
# 写入文件
def write_data2h5(file_name, label, h5data, h5normal):
h5f = h5py.File(file_name, 'w')
h5f['label'] = label
h5f['data'] = h5data
h5f['normal'] = h5normal
h5f.close()
# h5f.create_dataset(label, data=h5data)
# 由于 train_files 的路径设定为 .../data/modelnet40_ply_hdf5_2048/...',在这里我们不做修改
TRAIN_FILES = getDataFiles(os.path.join(BASE_DIR, 'data/modelnet40_ply_hdf5_2048/train_files.txt'))
TEST_FILES = getDataFiles(os.path.join(BASE_DIR, 'data/modelnet40_ply_hdf5_2048/test_files.txt'))
# 本人的存储路径,modelnet40_ply_hdf5_2048有5个train文件,在这里也是5个。两个test文件,本人如是
# ...//data/mydata/...为本人存放数据的路径
filename_train0 = BASE_DIR + '/data/mydata/ply_data_train0.h5' # 创建点云的路径
filename_train1 = BASE_DIR + '/data/mydata/ply_data_train1.h5' # 创建点云的路径
filename_train2 = BASE_DIR + '/data/mydata/ply_data_train2.h5' # 创建点云的路径
filename_train3 = BASE_DIR + '/data/mydata/ply_data_train3.h5' # 创建点云的路径
filename_train4 = BASE_DIR + '/data/mydata/ply_data_train4.h5' # 创建点云的路径
filename_test0 = BASE_DIR + '/data/mydata/ply_data_test0.h5' # 创建点云的路径
filename_test1 = BASE_DIR + '/data/mydata/ply_data_test1.h5' # 创建点云的路径
file_length2train = len(TRAIN_FILES)
file_length2test = len(TEST_FILES)
label_list2sub_class = []
data_list2sub_class = []
normal_list2sub_class = []
for fn in range(len(TRAIN_FILES)):
current_data, current_label, current_normal = loadDataFile(TRAIN_FILES[fn])
current_label = np.squeeze(current_label)
label_length = len(current_label)
for j in range(label_length):
label = current_label[j]
if label in label2sub_class:
data = current_data[j]
normal = current_normal[j]
label_list2sub_class.append(current_label[j])
data_list2sub_class.append(current_data[j])
normal_list2sub_class.append(current_normal[j])
if fn == 0:
write_data2h5(filename_train0, label_list2sub_class, data_list2sub_class, normal_list2sub_class)
elif fn == 1:
write_data2h5(filename_train1, label_list2sub_class, data_list2sub_class, normal_list2sub_class)
elif fn == 2:
write_data2h5(filename_train2, label_list2sub_class, data_list2sub_class, normal_list2sub_class)
elif fn == 3:
write_data2h5(filename_train3, label_list2sub_class, data_list2sub_class, normal_list2sub_class)
elif fn == 4:
write_data2h5(filename_train4, label_list2sub_class, data_list2sub_class, normal_list2sub_class)
label_list2sub_class.clear()
data_list2sub_class.clear()
normal_list2sub_class.clear()
for fn in range(len(TEST_FILES)):
current_data, current_label, current_normal=loadDataFile(TEST_FILES[fn])
current_label = np.squeeze(current_label)
label_length2test = len(current_label)
for j in range(label_length2test):
label = current_label[j]
if label in label2sub_class:
label_list2sub_class.append(current_label[j])
data_list2sub_class.append(current_data[j])
normal_list2sub_class.append(current_normal[j])
if fn == 0:
write_data2h5(filename_test0, label_list2sub_class, data_list2sub_class, normal_list2sub_class)
else:
write_data2h5(filename_test1, label_list2sub_class, data_list2sub_class, normal_list2sub_class)
label_list2sub_class.clear()
data_list2sub_class.clear()
normal_list2sub_class.clear()