# Keras MNIST手写数字识别数据集

《TensorFlow+keras深度学习人工智能实践应用》读书笔记2

1.创建Keras程序，下载并读取MINIST数据

#导入keras相关模块
import numpy as np
import pandas as pd
from keras.utils import np_utils
np.random.seed(10)

#导入MNIST模块
from keras.datasets import mnist

#MNIST数据下载
(X_train_image,y_train_label),\

#查看MNIST数据
print('train data = ',len(X_train_image))
print(' test data = ',len(X_test_image))

2.训练数据有images和label组成，iamges是数字图片，labels是对应的数字。

3.查看训练数据中的images和label

#定义plot_image查看数字图像
import matplotlib.pyplot as plt
def plot_image(image):
fig = plt.gcf()
fig.set_size_inches(2, 2)
plt.imshow(image, cmap='binary')
plt.show()

#执行plot_image函数，查看第0个图像
plot_image(X_train_image[0])

#查看对应的第0个label
y_train_label[0]

4.查看多项训练数据images和label

#创建plot_images_labels_prediction()函数
import matplotlib.pyplot as plt
def plot_images_labels_prediction(images,labels,prediction,idx,num=10):
fig = plt.gcf()
#设置图形大小
fig.set_size_inches(12,14)
#如果显示项数参数大于25则设为2，以免发生错误
if num>25:num=25
#画出num个数字图形
for i in range(0,num):
ax = plt.subplot(5,5,i+1)#建立subplot子图形为5行5列
ax.imshow(images[idx],cmap='binary')#画出subplot子图形
title = "label="+str(labels[idx])
if len(prediction)>0:#如果传入预测结果
title += ",predit = "+str(prediction[idx])

ax.set_title(title,fontsize=10)
ax.set_xticks([])#设置不显示刻度
ax.set_yticks([])
idx+=1
pit.show()

plot_images_labels_prediction(X_train_image,y_train_label,[],0,10)

5.features图像预处理

1将28X28的数字图像以reshape转换为一维向量，长度为784，并且转换为float

2.image的数字标准化

#查看每一个数字图像的shape
print('x_train_image: ',X_train_image.shape)
print('y_train_label: ',y_train_label.shape)

x_train_image: (60000, 28, 28) y_train_label: (60000,)

#转换为一维向量
x_Train = X_train_image.reshape(60000,784).astype('float32')
x_Test = X_test_image.reshape(10000,784).astype('float32')

#查看数字图像为784个浮点数
print('x_train: ',x_Train.shape)
print('x_test: ',x_Test.shape)
x_train:  (60000, 784)
x_test:  (10000, 784)
#查看images第0项内容
X_train_image[0]

array([[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, .....

6.label数据预处理

#取前5项
y_train_label[:5]

#one-hot encoding转换
y_TrainOneHot = np_utils.to_categorical(y_train_label)
y_TestOneHot = np_utils.to_categorical(y_test_label)

#查看
y_TrainOneHot[:5]

array([5,0,4,1,9], dtype=uint8)

array([

[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],

[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],

[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],

[0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],

[0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]], dtype=float32)