import random
from PIL import Image
import numpy as np
import h5py
IMAGE_DIR = ['image_train', 'image_test']
HDF5_FILE = ['hdf5_train.h5', 'hdf5_test.h5']
LIST_FILE = ['list_train.txt', 'list_test.txt']
LABELS = dict(
A_0 = (0, 0),
B_0 = (1, 0),
A_1 = (0, 1),
B_1 = (1, 1),
A_2 = (0, 2),
B_2 = (1, 2),
)
print '\nplease wait...'
for kk, image_dir in enumerate(IMAGE_DIR):
file_list = ...
random.shuffle(file_list)
kind_index = ...
datas = np.zeros((len(file_list), 1, 32, 96))
labels = np.zeros((len(file_list), 2))
for ii, _file in enumerate(file_list):
datas[ii, :, :, :] = \
np.array(Image.open(_file)).astype(np.float32) / 256
labels[ii, :] = np.array(LABELS[kind_index ]).astype(np.int)
with h5py.File(HDF5_FILE[kk], 'w') as f:
f['data'] = datas
f['labels'] = labels
f.close()
with open(LIST_FILE[kk], 'w') as f:
f.write(os.path.abspath(HDF5_FILE[kk]) + '\n')
f.close()
print '\ndone...'
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 注意:
- caffe中要求1个hdf5文件大小不超过2GB,所以如果数据量太大,建议生成多个hdf5文件
- 我用的5万张图片,大小一共30几兆,生成的hdf5文件是1.8GB