# -*- coding: utf-8 -*-
"""
Created on Sat Jan 5 11:16:48 2019
@author: Administrator
"""
import numpy as np
# 导入mnist数据库, mnist是常用的手写数字库
from keras.datasets import mnist
# 导入顺序模型
from keras.models import Sequential
# 导入全连接层Dense, 激活层Activation 以及 Dropout层
from keras.layers.core import Dense, Dropout, Activation
# 导入优化器RMSProp
from keras.optimizers import RMSprop
# 导入numpy工具,主要是用to_categorical来转换类别向量
from keras.utils import np_utils
from keras.optimizers import SGD
#载入数据
(x_train, y_train), (x_test, y_test) = mnist.load_data();
#打印数据和标签格式
#(60000,28,28)
print('x_shape:', x_train.shape)
#(60000)
print('y_shape:', y_train.shape)
#格式转换(60000,28,28)->(60000,784),并归一化处理
x_train = x_train.reshape(x_train.shape[0],-1)/255.0
x_test = x_test.reshape(x_test.shape[0],-1)/255.0
#label 转换为one hot格式
y_train = np_utils.to_categorical(y_train, num_classes=10)
y_test = np_utils.to_categorical(y_test, num_classes=10)
'''
#创建模型,输入784个神经元,输出10个神经元
model = Sequential([
#Dense(units=10, input_dim=784,bias_initializer='one', activation='softmax')
Dense(units=10, input_dim=784,bias_initializer='one', activation='softmax')
])
#定义优化器,lose function, 并在训练过程中计算准确率
sgd = SGD(lr=0.2)
model.compile(optimizer=sgd, loss='mse',metrics=['accuracy'])
#model.compile(optimizer='adam', loss='mse',metrics=['accuracy'])
#训练模型
model.fit(x_train, y_trian, batch_size=32, epochs=10)
#评估模型
loss, accuracy = model.evaluate(x_test, y_test)
print('\ntest loss: ', loss)
print('\naccuracy: ', accuracy)
'''
# 建立顺序型模型
model = Sequential()
'''
模型需要知道输入数据的shape,
因此,Sequential的第一层需要接受一个关于输入数据shape的参数,
后面的各个层则可以自动推导出中间数据的shape,
因此不需要为每个层都指定这个参数
'''
# 输入层有784个神经元
# 第一个隐层有512个神经元,激活函数为ReLu,Dropout比例为0.2
model.add(Dense(512, input_shape=(784,)))
model.add(Activation('relu'))
model.add(Dropout(0.2))
# 第二个隐层有512个神经元,激活函数为ReLu,Dropout比例为0.2
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.2))
# 输出层有10个神经元,激活函数为SoftMax,得到分类结果
model.add(Dense(10))
model.add(Activation('softmax'))
# 输出模型的整体信息
# 总共参数数量为784*512+512 + 512*512+512 + 512*10+10 = 669706
model.summary()
'''
配置模型的学习过程
compile接收三个参数:
1.优化器optimizer:参数可指定为已预定义的优化器名,如rmsprop、adagrad,
或一个Optimizer类对象,如此处的RMSprop()
2.损失函数loss:参数为模型试图最小化的目标函数,可为预定义的损失函数,
如categorical_crossentropy、mse,也可以为一个损失函数
3.指标列表:对于分类问题,一般将该列表设置为metrics=['accuracy']
'''
model.compile(loss='categorical_crossentropy',
optimizer=RMSprop(),
metrics=['accuracy'])
'''
训练模型
batch_size:指定梯度下降时每个batch包含的样本数
nb_epoch:训练的轮数,nb指number of
verbose:日志显示,0为不在标准输出流输出日志信息,1为输出进度条记录,2为epoch输出一行记录
validation_data:指定验证集
fit函数返回一个History的对象,其History.history属性记录了损失函数和其他指标的数值随epoch变化的情况,
如果有验证集的话,也包含了验证集的这些指标变化情况
'''
# 设置batch的大小
batch_size = 128
# 设置类别的个数
nb_classes = 10
# 设置迭代的次数
nb_epoch = 20
history = model.fit(x_train, y_train,
batch_size = batch_size,
nb_epoch = nb_epoch,
verbose = 1,
validation_data = (x_test, y_test))
# 按batch计算在某些输入数据上模型的误差
score = model.evaluate(x_test, y_test, verbose=0)
# 输出训练好的模型在测试集上的表现
print('Test score:', score[0])
print('Test accuracy:', score[1])
结果:
runfile('E:/机器学习/3-MNIST数据集分类.py', wdir='E:/机器学习')
x_shape: (60000, 28, 28)
y_shape: (60000,)
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_7 (Dense) (None, 512) 401920
_________________________________________________________________
activation_4 (Activation) (None, 512) 0
_________________________________________________________________
dropout_3 (Dropout) (None, 512) 0
_________________________________________________________________
dense_8 (Dense) (None, 512) 262656
_________________________________________________________________
activation_5 (Activation) (None, 512) 0
_________________________________________________________________
dropout_4 (Dropout) (None, 512) 0
_________________________________________________________________
dense_9 (Dense) (None, 10) 5130
_________________________________________________________________
activation_6 (Activation) (None, 10) 0
=================================================================
Total params: 669,706
Trainable params: 669,706
Non-trainable params: 0
_________________________________________________________________
E:/机器学习/3-MNIST数据集分类.py:123: UserWarning: The `nb_epoch` argument in `fit` has been renamed `epochs`.
Train on 60000 samples, validate on 10000 samples
Epoch 1/20
60000/60000 [==============================] - 2s 41us/step - loss: 0.2442 - acc: 0.9242 - val_loss: 0.1143 - val_acc: 0.9660
Epoch 2/20
60000/60000 [==============================] - 2s 32us/step - loss: 0.1030 - acc: 0.9683 - val_loss: 0.0899 - val_acc: 0.9742
Epoch 3/20
60000/60000 [==============================] - 2s 32us/step - loss: 0.0753 - acc: 0.9771 - val_loss: 0.0860 - val_acc: 0.9756
Epoch 4/20
60000/60000 [==============================] - 2s 32us/step - loss: 0.0598 - acc: 0.9815 - val_loss: 0.0786 - val_acc: 0.9797
Epoch 5/20
60000/60000 [==============================] - 2s 32us/step - loss: 0.0497 - acc: 0.9857 - val_loss: 0.0804 - val_acc: 0.9782
Epoch 6/20
60000/60000 [==============================] - 2s 32us/step - loss: 0.0448 - acc: 0.9863 - val_loss: 0.0815 - val_acc: 0.9802
Epoch 7/20
60000/60000 [==============================] - 2s 32us/step - loss: 0.0373 - acc: 0.9892 - val_loss: 0.0890 - val_acc: 0.9785
Epoch 8/20
60000/60000 [==============================] - 2s 32us/step - loss: 0.0327 - acc: 0.9899 - val_loss: 0.0931 - val_acc: 0.9792
Epoch 9/20
60000/60000 [==============================] - 2s 32us/step - loss: 0.0324 - acc: 0.9906 - val_loss: 0.0745 - val_acc: 0.9839
Epoch 10/20
60000/60000 [==============================] - 2s 32us/step - loss: 0.0288 - acc: 0.9915 - val_loss: 0.0859 - val_acc: 0.9823
Epoch 11/20
60000/60000 [==============================] - 2s 32us/step - loss: 0.0260 - acc: 0.9924 - val_loss: 0.0927 - val_acc: 0.9811
Epoch 12/20
60000/60000 [==============================] - 2s 32us/step - loss: 0.0249 - acc: 0.9929 - val_loss: 0.0872 - val_acc: 0.9843
Epoch 13/20
60000/60000 [==============================] - 2s 32us/step - loss: 0.0247 - acc: 0.9934 - val_loss: 0.0950 - val_acc: 0.9841
Epoch 14/20
60000/60000 [==============================] - 2s 32us/step - loss: 0.0244 - acc: 0.9933 - val_loss: 0.1024 - val_acc: 0.9815
Epoch 15/20
60000/60000 [==============================] - 2s 32us/step - loss: 0.0217 - acc: 0.9940 - val_loss: 0.1075 - val_acc: 0.9814
Epoch 16/20
60000/60000 [==============================] - 2s 32us/step - loss: 0.0216 - acc: 0.9939 - val_loss: 0.1196 - val_acc: 0.9808
Epoch 17/20
60000/60000 [==============================] - 2s 32us/step - loss: 0.0227 - acc: 0.9943 - val_loss: 0.1093 - val_acc: 0.9839
Epoch 18/20
60000/60000 [==============================] - 2s 32us/step - loss: 0.0204 - acc: 0.9949 - val_loss: 0.1049 - val_acc: 0.9844
Epoch 19/20
60000/60000 [==============================] - 2s 32us/step - loss: 0.0184 - acc: 0.9957 - val_loss: 0.1046 - val_acc: 0.9852
Epoch 20/20
60000/60000 [==============================] - 2s 32us/step - loss: 0.0203 - acc: 0.9949 - val_loss: 0.1064 - val_acc: 0.9833
Test score: 0.10640147667875303
Test accuracy: 0.9833