MNIST数据集转hdf5

import numpy
import h5py

def bytes_to_uint32(a):
    return a[0] * 16 ** 6 + a[1] * 16 ** 4 + a[2] * 16 ** 2 + a[3]
        

def read_image_data(filename):
    infile = open(filename, "rb")
    magic_number = bytes_to_uint32(infile.read(4))
    image_number = bytes_to_uint32(infile.read(4))
    rows_number = bytes_to_uint32(infile.read(4))
    columns_number = bytes_to_uint32(infile.read(4))

    a = infile.read(image_number * rows_number * columns_number)

    m = 0
    pixels = numpy.empty((image_number, rows_number, columns_number))
    for i in range(image_number):
        for j in range(rows_number):
            for k in range(columns_number):
                pixels[i][j][k] = a[m]
                m += 1

    print(pixels.shape)
    infile.close()
    return pixels


def read_label_data(filename):
    infile = open(filename, "rb")
    magic_number = bytes_to_uint32(infile.read(4))
    label_number = bytes_to_uint32(infile.read(4))

    a = infile.read(label_number)

    label = numpy.empty(label_number)
    for i in range(label_number):
        label[i] = a[i]

    print(label.shape)
    infile.close()
    return label

def write_hdf5(filename):
    outfile = h5py.File(filename, 'w')
    outfile.create_dataset("train_image", dtype="uint8", data=read_image_data("train-images.idx3-ubyte")) 
    outfile.create_dataset("train_label", dtype="uint8", data=read_label_data("train-labels.idx1-ubyte"))
    outfile.create_dataset("test_image", dtype="uint8", data=read_image_data("t10k-images.idx3-ubyte"))
    outfile.create_dataset("test_label", dtype="uint8", data=read_label_data("t10k-labels.idx1-ubyte"))
    outfile.close()


if __name__ == "__main__":
    write_hdf5("data_set.hdf5")

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值