首先先导入对应的mnist模块,如果报错,可以手动添加一个input_data,然后把数据集下好放在文件夹下命名为data
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
#from tensorflow.examples.tutorials.mnist import input_data
import input_data
print ("Download and Extract MNIST dataset")
#加载数据包
mnist = input_data.read_data_sets('data', one_hot=True)
#打印数据的类型
print (" tpye of 'mnist' is %s" % (type(mnist)))
#打印训练集的数量
print (" number of trian data is %d" % (mnist.train.num_examples))
#打印测试集的数量
print (" number of test data is %d" % (mnist.test.num_examples))
#Download and Extract MNIST dataset
#Extracting data/train-images-idx3-ubyte.gz
#Extracting data/train-labels-idx1-ubyte.gz
#Extracting data/t10k-images-idx3-ubyte.gz
#Extracting data/t10k-labels-idx1-ubyte.gz
# tpye of 'mnist' is <class 'tensorflow.contrib.learn.python.learn.datasets.base.Datasets'>
# number of trian data is 55000
# number of test data is 10000
mnist的数据是什么样的?
print ("What does the data of MNIST look like?")
trainimg = mnist.train.images
trainlabel = mnist.train.labels
testimg = mnist.test.images
testlabel = mnist.test.labels
print (" type of 'trainimg' is %s" % (type(trainimg)))
print (" type of 'trainlabel' is %s" % (type(trainlabel)))
print (" type of 'testimg' is %s" % (type(testimg)))
print (" type of 'testlabel' is %s" % (type(testlabel)))
print (" shape of 'trainimg' is %s" % (trainimg.shape,))
print (" shape of 'trainlabel' is %s" % (trainlabel.shape,))
print (" shape of 'testimg' is %s" % (testimg.shape,))
print (" shape of 'testlabel' is %s" % (testlabel.shape,))
# What does the data of MNIST look like?
# type of 'trainimg' is <class 'numpy.ndarray'>
# type of 'trainlabel' is <class 'numpy.ndarray'>
# type of 'testimg' is <class 'numpy.ndarray'>
# type of 'testlabel' is <class 'numpy.ndarray'>
# shape of 'trainimg' is (55000, 784)
# shape of 'trainlabel' is (55000, 10)
# shape of 'testimg' is (10000, 784)
# shape of 'testlabel' is (10000, 10)
训练数据长得什么样?
nsample = 5
#从5500中随机取出5个整数
randidx = np.random.randint(trainimg.shape[0], size=nsample)
#分别打印出每个数据的图片信息
for i in randidx:
#把图片格式规划为28x28
curr_img = np.reshape(trainimg[i, :], (28, 28)) # 28 by 28 matrix
#去除当前图片标题信息
curr_label = np.argmax(trainlabel[i, :] ) # Label
#写入数据和标题,设置灰度
plt.matshow(curr_img, cmap=plt.get_cmap('gray'))
plt.title("" + str(i) + "th Training Data "
+ "Label is " + str(curr_label))
print ("" + str(i) + "th Training Data "
+ "Label is " + str(curr_label))
plt.show()
#How does the training data look like?
#23524th Training Data Label is 1
批量学习的数据
print ("Batch Learning? ")
batch_size = 100
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
print ("type of 'batch_xs' is %s" % (type(batch_xs)))
print ("type of 'batch_ys' is %s" % (type(batch_ys)))
print ("shape of 'batch_xs' is %s" % (batch_xs.shape,))
print ("shape of 'batch_ys' is %s" % (batch_ys.shape,))
#Batch Learning?
#type of 'batch_xs' is <class 'numpy.ndarray'>
#type of 'batch_ys' is <class 'numpy.ndarray'>
#shape of 'batch_xs' is (100, 784)
#shape of 'batch_ys' is (100, 10)