import numpy as np
import struct
def create_matrix(len, rows, cols):
# len :表示矩阵长度,也就是列表中有几个矩阵,列表中的元素是矩阵
# rows、cols:表示矩阵元素的行和列
matrix_list = []
for i in range(len):
matrix = []
for j in range(rows * cols):
# 创建一个列表
matrix.append((i + 1) * 10 + (j + 1))
# 调用函数将列表换成矩阵
matrix = np.reshape(matrix, (rows, cols))
matrix_list.append(matrix)
matrix_list = np.array(matrix_list, dtype='ubyte')
return matrix_list
# 对idx文件的读写
type2code_dict = {'unit8': 0x08, 'int8': 0x09, 'int16': 0x0B, 'int32': 0x0C, 'float32': 0x0D, 'float64': 0x0E}
def write_matrix(matrix, filename):
pass
with open(filename, 'wb') as f:
shapes = matrix.shape
file_head_fmt = '>HBB'
file_head = struct.pack(file_head_fmt, 0, 8, len(shapes))
print(type(file_head), file_head)
f.write(file_head)
# 格式定义,>表示高位在前,I表示4字节整数
file_head_fmt = '>I'
for i in shapes:
file_head = struct.pack(file_head_fmt, i)
f.write(file_head)
f.write(matrix)
import cv2
code2type_dict = {0x08: 'B', 0x09: 'b', 0x0B: 'h', 0x0c: 'i', 0x0D: 'f', 0x0E: 'd'}
def read_matrix(filename):
with open(filename, 'rb') as f:
data_buff = f.read()
off_set = 0
#
file_head_fmt = '>HBB'
_, elem_code, dimlen = struct.unpack_from(file_head_fmt, data_buff, off_set)
off_set += struct.calcsize(file_head_fmt)
#I表示整数类型
file_head_fmt = '>{}I'.format(dimlen)
shapes = struct.unpack_from(file_head_fmt, data_buff, off_set)
off_set += struct.calcsize(file_head_fmt)
#
data_fmt = '>' + str(np.prod(shapes)) + code2type_dict[elem_code]
matrix = struct.unpack_from(data_fmt, data_buff, off_set)
matrix = np.reshape(matrix, shapes)
matrix = np.reshape(matrix, shapes).astype('uint8')
return matrix
# 生成器
# batch_size一组取多少数据,这是经验值,你自己去试
# drop_list 最后一组数据不满batch_size可以舍去
import random
def dataReader(img_file, label_file, batch_size=24, drop_list=False):
mnist_matrix = read_matrix(img_file) # (60000,28,28)
mnist_label = read_matrix(label_file) # (60000,,)
buff = []
for i in range(mnist_label.shape[0]):
# 逗号“,”分隔各个维度(即该列表是几维矩阵),“:”表示各个维度内的切片,只有:表示取这个维度的全部值
buff.append((mnist_matrix[i, :], int(label_file[i])))
# 该语句和上面的for循环起相同的作用
# buff = list(zip(mnist_matrix, mnist_label))
def batch_reader():
# 随机乱序函数
random.shuffle(buff)
b = []
for sample in buff:
b.append(sample)
if len(b) == batch_size:
yield b
b = []
if drop_list and len(b) != 0:
yield b
return batch_reader
from PIL import Image
import matplotlib.pyplot as plt
if __name__ == '__main__':
matrix = create_matrix(2, 3, 4)
print(type(matrix), matrix.shape, '\n', matrix)
# D:/IDLE/code/deeplearning/deeplearningFoundation/test
write_matrix(matrix, 'D:/IDLE/code/deeplearning/deeplearningFoundation/test/matrix.idx')
matrix2 = read_matrix('D:/IDLE/code/deeplearning/deeplearningFoundation/test/matrix.idx')
mnist_matrix = read_matrix('D:/IDLE/code/mnist/t10k-images-idx3-ubyte')
mnist_label = read_matrix('D:/IDLE/code/mnist/t10k-labels-idx1-ubyte')
print(type(mnist_matrix), mnist_matrix.shape)
mnist_sample = mnist_matrix[0]
# 图像放大
mnist_sample = cv2.resize(mnist_sample, (200, 200))
# 窗口展示图片
# cv2.imshow('winname', mnist_sample)
img = Image.fromarray(mnist_sample)
plt.imshow(img, 'gray')
plt.show()
# jpg有损压缩
img.save('D:/IDLE/code/image/{}.jpg'.format('mnist_sample'))
# bmp像素存储
img.save('D:/IDLE/code/image/{}.bmp'.format('mnist_sample'))
# 窗口弹出后按0后关闭
cv2.waitKey(0)
# # data_read = dataReader('img.idx', 'label.idx')
# # for i, data in enumerate(data_read()):
# # model.tranin(data)
mnist数据集的读写(包含numpy矩阵的读写)
于 2022-09-11 14:26:11 首次发布