利用numpy读取mnist数据集

最新推荐文章于 2024-06-02 22:06:53 发布

yuanCruise

最新推荐文章于 2024-06-02 22:06:53 发布

阅读量4.7k

点赞数 1

分类专栏： Python 文章标签： Python mnist numpy

本文链接：https://blog.csdn.net/qiu931110/article/details/80113147

版权

Python 专栏收录该内容

23 篇文章 10 订阅

订阅专栏

读取并分析如下四个文件

‘train-images-idx3-ubyte’

‘train-labels-idx1-ubyte’

‘t10k-images-idx3-ubyte’

‘t10k-labels-idx1-ubyte’

#_*_coding:utf-8_*_
import numpy as np
import os
class Mnist(object):

    def __init__(self):

        self.dataname = "Mnist"
        self.dims = 28*28
        self.shape = [28 , 28 , 1]
        self.image_size = 28
        self.data, self.data_y = self.load_mnist()

    def load_mnist(self):

        data_dir = os.path.join("./data", "mnist")
        fd = open(os.path.join(data_dir, 'train-images-idx3-ubyte'))
		# 利用np.fromfile语句将这个ubyte文件读取进来
		# 需要注意的是用np.uint8的格式
		# 还有读取进来的是一个一维向量
		# <type 'tuple'>: (47040016,)，这就是loaded变量的读完之后的数据类型
        loaded = np.fromfile(file=fd , dtype=np.uint8)
		trX = loaded[16:].reshape((60000, 28 , 28 ,  1)).astype(np.float)
		#'train-images-idx3-ubyte'这个文件前十六位保存的是一些说明具体打印结果如下：
        point = loaded[:16]
        print(point)
        # [  0   0   8   3   0   0 234  96   0   0   0  28   0   0   0  28]
		# 序号从1开始，上述数字有下面这几个公式的含义
		# MagicNum = ((a(1)*256+a(2))*256+a(3))*256+a(4);
		# ImageNum = ((a(5)*256+a(6))*256+a(7))*256+a(8);    等于60000
		# ImageRow = ((a(9)*256+a(10))*256+a(11))*256+a(12); 等于28
		# ImageCol = ((a(13)*256+a(14))*256+a(15))*256+a(16);等于28

        fd = open(os.path.join(data_dir, 'train-labels-idx1-ubyte'))
        loaded = np.fromfile(file=fd, dtype=np.uint8)
        trY = loaded[8:].reshape((60000)).astype(np.float)
		
		
		point = loaded[:8]
		print(point)
		# [  0   0   8   1   0   0 234  96]
		# 这些数字的作用和上述类似
		# 这些数字的功能之一就是可以判断你下载的数据集对不对，全不全

        fd = open(os.path.join(data_dir, 't10k-images-idx3-ubyte'))
        loaded = np.fromfile(file=fd, dtype=np.uint8)
        teX = loaded[16:].reshape((10000, 28 , 28 , 1)).astype(np.float)

        fd = open(os.path.join(data_dir, 't10k-labels-idx1-ubyte'))
        loaded = np.fromfile(file=fd, dtype=np.uint8)
        teY = loaded[8:].reshape((10000)).astype(np.float)

        trY = np.asarray(trY)
        teY = np.asarray(teY)

        X = np.concatenate((trX, teX), axis=0)
        y = np.concatenate((trY, teY), axis=0)
		
		#目的是为了打乱数据集
		#这里随意固定一个seed，只要seed的值一样，那么打乱矩阵的规律就是一眼的
        seed = 666
        np.random.seed(seed)
        np.random.shuffle(X)
        np.random.seed(seed)
        np.random.shuffle(y)

        #convert label to one-hot
		#手动将数据转换成one-hot编码形式
        y_vec = np.zeros((len(y), 10), dtype=np.float)
        for i, label in enumerate(y):
            y_vec[i, int(y[i])] = 1.0

        return X / 255., y_vec
if __name__ == "__main__":
    #
    mn_object = Mnist()
    x = mn_object.data
    y = mn_object.data_y

yuanCruise

关注

1
点赞
踩
7

收藏

觉得还不错? 一键收藏
打赏
0
评论
利用numpy读取mnist数据集

读取并分析如下四个文件‘train-images-idx3-ubyte’‘train-labels-idx1-ubyte’‘t10k-images-idx3-ubyte’‘t10k-labels-idx1-ubyte’#_*_coding:utf-8_*_import numpy as npimport osclass Mnist(object): de...
复制链接

扫一扫