mnist数据集的读写(包含numpy矩阵的读写)

问问问你的心

已于 2023-04-17 09:25:32 修改

阅读量597

点赞数 1

分类专栏：代码文章标签： numpy 矩阵 python

于 2022-09-11 14:26:11 首次发布

本文链接：https://blog.csdn.net/weixin_43988390/article/details/126804867

版权

代码专栏收录该内容

1 篇文章 0 订阅

订阅专栏

import numpy as np
import struct


def create_matrix(len, rows, cols):
    # len ：表示矩阵长度，也就是列表中有几个矩阵，列表中的元素是矩阵
    # rows、cols：表示矩阵元素的行和列
    matrix_list = []
    for i in range(len):
        matrix = []
        for j in range(rows * cols):
            # 创建一个列表
            matrix.append((i + 1) * 10 + (j + 1))
            # 调用函数将列表换成矩阵
        matrix = np.reshape(matrix, (rows, cols))
        matrix_list.append(matrix)
    matrix_list = np.array(matrix_list, dtype='ubyte')
    return matrix_list


# 对idx文件的读写
type2code_dict = {'unit8': 0x08, 'int8': 0x09, 'int16': 0x0B, 'int32': 0x0C, 'float32': 0x0D, 'float64': 0x0E}
def write_matrix(matrix, filename):
    pass
    with open(filename, 'wb') as f:
        shapes = matrix.shape
        file_head_fmt = '>HBB'
        file_head = struct.pack(file_head_fmt, 0, 8, len(shapes))
        print(type(file_head), file_head)
        f.write(file_head)
        # 格式定义，>表示高位在前，I表示4字节整数
        file_head_fmt = '>I'
        for i in shapes:
            file_head = struct.pack(file_head_fmt, i)
            f.write(file_head)
        f.write(matrix)


import cv2
code2type_dict = {0x08: 'B', 0x09: 'b', 0x0B: 'h', 0x0c: 'i', 0x0D: 'f', 0x0E: 'd'}
def read_matrix(filename):
    with open(filename, 'rb') as f:
        data_buff = f.read()
        off_set = 0
        #
        file_head_fmt = '>HBB'
        _, elem_code, dimlen = struct.unpack_from(file_head_fmt, data_buff, off_set)
        off_set += struct.calcsize(file_head_fmt)
        #I表示整数类型
        file_head_fmt = '>{}I'.format(dimlen)
        shapes = struct.unpack_from(file_head_fmt, data_buff, off_set)
        off_set += struct.calcsize(file_head_fmt)
        #
        data_fmt = '>' + str(np.prod(shapes)) + code2type_dict[elem_code]
        matrix = struct.unpack_from(data_fmt, data_buff, off_set)
        matrix = np.reshape(matrix, shapes)
        matrix = np.reshape(matrix, shapes).astype('uint8')
    return matrix


# 生成器
# batch_size一组取多少数据，这是经验值，你自己去试
# drop_list 最后一组数据不满batch_size可以舍去
import random
def dataReader(img_file, label_file, batch_size=24, drop_list=False):
    mnist_matrix = read_matrix(img_file)  # (60000,28,28)
    mnist_label = read_matrix(label_file)  # (60000,,)

    buff = []
    for i in range(mnist_label.shape[0]):
        # 逗号“,”分隔各个维度(即该列表是几维矩阵)，“:”表示各个维度内的切片，只有:表示取这个维度的全部值
        buff.append((mnist_matrix[i, :], int(label_file[i])))

    # 该语句和上面的for循环起相同的作用
    # buff = list(zip(mnist_matrix, mnist_label))
    def batch_reader():
        # 随机乱序函数
        random.shuffle(buff)
        b = []
        for sample in buff:
            b.append(sample)
            if len(b) == batch_size:
                yield b
                b = []
        if drop_list and len(b) != 0:
            yield b
    return batch_reader










from PIL import Image
import matplotlib.pyplot as plt
if __name__ == '__main__':
    matrix = create_matrix(2, 3, 4)
    print(type(matrix), matrix.shape, '\n', matrix)
    # D:/IDLE/code/deeplearning/deeplearningFoundation/test
    write_matrix(matrix, 'D:/IDLE/code/deeplearning/deeplearningFoundation/test/matrix.idx')
    matrix2 = read_matrix('D:/IDLE/code/deeplearning/deeplearningFoundation/test/matrix.idx')
    mnist_matrix = read_matrix('D:/IDLE/code/mnist/t10k-images-idx3-ubyte')
    mnist_label = read_matrix('D:/IDLE/code/mnist/t10k-labels-idx1-ubyte')
    print(type(mnist_matrix), mnist_matrix.shape)
    mnist_sample = mnist_matrix[0]
    # 图像放大
    mnist_sample = cv2.resize(mnist_sample, (200, 200))
    # 窗口展示图片
    # cv2.imshow('winname', mnist_sample)
    img = Image.fromarray(mnist_sample)
    plt.imshow(img, 'gray')
    plt.show()
    # jpg有损压缩
    img.save('D:/IDLE/code/image/{}.jpg'.format('mnist_sample'))
    # bmp像素存储
    img.save('D:/IDLE/code/image/{}.bmp'.format('mnist_sample'))
    # 窗口弹出后按0后关闭
    cv2.waitKey(0)
    # # data_read = dataReader('img.idx', 'label.idx')
    # # for i, data in enumerate(data_read()):
    # #     model.tranin(data)