import numpy
import h5py
def bytes_to_uint32(a):
return a[0] * 16 ** 6 + a[1] * 16 ** 4 + a[2] * 16 ** 2 + a[3]
def read_image_data(filename):
infile = open(filename, "rb")
magic_number = bytes_to_uint32(infile.read(4))
image_number = bytes_to_uint32(infile.read(4))
rows_number = bytes_to_uint32(infile.read(4))
columns_number = bytes_to_uint32(infile.read(4))
a = infile.read(image_number * rows_number * columns_number)
m = 0
pixels = numpy.empty((image_number, rows_number, columns_number))
for i in range(image_number):
for j in range(rows_number):
for k in range(columns_number):
pixels[i][j][k] = a[m]
m += 1
print(pixels.shape)
infile.close()
return pixels
def read_label_data(filename):
infile = open(filename, "rb")
magic_number = bytes_to_uint32(infile.read(4))
label_number = bytes_to_uint32(infile.read(4))
a = infile.read(label_number)
label = numpy.empty(label_number)
for i in range(label_number):
label[i] = a[i]
print(label.shape)
infile.close()
return label
def write_hdf5(filename):
outfile = h5py.File(filename, 'w')
outfile.create_dataset("train_image", dtype="uint8", data=read_image_data("train-images.idx3-ubyte"))
outfile.create_dataset("train_label", dtype="uint8", data=read_label_data("train-labels.idx1-ubyte"))
outfile.create_dataset("test_image", dtype="uint8", data=read_image_data("t10k-images.idx3-ubyte"))
outfile.create_dataset("test_label", dtype="uint8", data=read_label_data("t10k-labels.idx1-ubyte"))
outfile.close()
if __name__ == "__main__":
write_hdf5("data_set.hdf5")
04-27
03-08
2602
09-06
1156
10-11