1. 加载数据
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD,Adam
%matplotlib inline
# load数据
(x_train,y_train),(x_test,y_test) = tf.keras.datasets.fashion_mnist.load_data()
# 查看数据
print("训练集的特征",x_train.shape)
print("训练集的标签",y_train.shape)
print("测试集的特征",x_test.shape)
print("测试集的标签",y_test.shape)
# out >>>
# 训练集的特征 (60000, 28, 28)
# 训练集的标签 (60000,)
# 测试集的特征 (10000, 28, 28)
# 测试集的标签 (10000,)
2、数据预处理
# 对x进行处理
x_train, x_test = x_train / 255.0, x_test / 255.0 # 归一到[0,1]
np.set_printoptions(precision=1, suppress=True) # precision浮点输出的精度位数为1,suppress是否压缩浮点数
# print('x_train[318] = \n', x_train[318])
# 对y进行处理
y_train = tf.keras.utils.to_categorical(y_train) #转one-hot
y_test = tf.keras.utils.to_categorical(y_test)
print('y_train[318] = ', y_train[318])
print(np.argmax(y_train[318]))
3、搭建模型
model = tf.keras.models.Sequential([
# Flatten层用来将输入“压平”,即把多维的输入一维化,常用在从卷积层到全连接层的过渡
tf.keras.layers.Flatten(input_shape=(28,28)),
# 40的全连接层
tf.keras.layers.Dense(40,activation = 'relu'),
tf.keras.layers.Dense(40,activation = 'softmax'),
# 全连接层 10
tf.keras.layers.Dense(10,activation = 'softmax')]) # 这里的10表明要分10类,是不能动的
print(model.summary())
# out>>>
# Model: "sequential"
# _________________________________________________________________
# Layer (type) Output Shape Param #
# =================================================================
# flatten (Flatten) (None, 784) 0
# _________________________________________________________________
# dense (Dense) (None, 40) 31400
# _________________________________________________________________
# dense_1 (Dense) (None, 40) 1640
# _________________________________________________________________
# dense_2 (Dense) (None, 10) 410
# =================================================================
# Total params: 33,450
# Trainable params: 33,450
# Non-trainable params: 0
# _________________________________________________________________
# None
参数计算:
- flatten层是将图像拉平,没有参数参数:0
- 全连接第一层:784 × 40 + 40 = 31400
- 全连接第二层:40× 40 + 40 = 1640
- 全连接第三层:40× 10 + 10 = 410
- 总参数(每层参数之和):0 + 31400 + 1640 + 410 = 33450
4、模型训练
model.compile(optimize = SGD(0.013),loss = tf.keras.losses.mse,metrics = ['acc'])
model.fit(x_train,y_train,epochs = 10)
5、模型评估
# 原模型
model.evaluate(x_test, y_test, verbose=False)
# out>>> [0.021985134263336658, 0.8566]
第一次构建出来的模型准确率为85%,想要模型变得更好,修改模型的结果,更改激活函数为‘tanh’,修改优化器为Adam,学习率改为0.02,将损失函数改为binary_crossentropy,训练20轮
model_new = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28,28)),
tf.keras.layers.Dense(50,activation = 'relu'),
tf.keras.layers.Dense(30,activation = 'tanh'),
tf.keras.layers.Dense(10,activation = 'softmax')])
print(model_new.summary())
# out>>>
# Model: "sequential_2"
# _________________________________________________________________
# Layer (type) Output Shape Param #
# =================================================================
# flatten_2 (Flatten) (None, 784) 0
# _________________________________________________________________
# dense_6 (Dense) (None, 50) 39250
# _________________________________________________________________
# dense_7 (Dense) (None, 30) 1530
# _________________________________________________________________
# dense_8 (Dense) (None, 10) 310
# =================================================================
# Total params: 41,090
# Trainable params: 41,090
# Non-trainable params: 0
# _________________________________________________________________
# None
参数计算:
- flatten层是将图像拉平,没有参数参数:0
- 全连接第一层:784 × 50 + 50 = 39250
- 全连接第二层:50× 30 + 30 = 1530
- 全连接第三层:30× 10 + 10 = 310
- 总参数(每层参数之和):0 + 39250 + 1530+ 310 = 41090
model_new.compile(optimize = Adam(0.02),loss = tf.keras.losses.binary_crossentropy,metrics = ['acc'])
model_new.fit(x_train,y_train,epochs = 20)
# 优化器选得好,模型训练效果就好,Adam是一个自适应的学习率,可以不用给学习率
# 修改后的模型
model_new.evaluate(x_test, y_test, verbose=False) # 损失值和准确率
# out>>> [0.06750724445283413, 0.9751694]
- 模型,简单的系统就弄一个小一点的模型,如果样本很均衡(一眼看得出特征),添加深度和广度,3-10层之间
- Adam的效果一般要好,所以直接上Adam
- 损失函数不变,分类的话一般用交叉熵categorical_crossentropy
6、模型预测
# 预测前10个
np.set_printoptions(precision=1, suppress=True)
pre = model_new.predict(x_test[:10])
print("模型预测的前10:",np.argmax(pre,axis=1))
print("前10的正确答案:",np.argmax(y_test[:10],axis=1))
# 随机预测5个
pat = np.random.randint(0,1000,5)
list(pat)
pre_new = model_new.predict(x_test[pat])
print("随机预测的5个:",np.argmax(pre_new,axis=1))
print("这5个的正确答案:",np.argmax(y_test[pat],axis=1))
# out>>>
# 模型预测的前10: [9 2 1 1 0 1 4 6 5 7]
# 前10的正确答案: [9 2 1 1 6 1 4 6 5 7]
# 随机预测的5个: [0 7 3 7 9]
# 这5个的正确答案:: [0 7 3 7 9]
在随机预测的样本中有两个2预测错误,查看一下这两个的信息
list(pat) # 预测错误的两个样本分别是894和844号
# out>>> [142, 789, 713, 184, 375]
# 打印图像
fig = plt.figure()
ax1 = fig.add_subplot(2,2,1)
ax1.matshow(x_test[894])
ax2 = fig.add_subplot(2,2,2)
ax2.matshow(x_test[844])
print("894和844的预测标签",np.argmax(model_new.predict(x_test[pat[:2]]),axis=1))
print("894和844的正确标签",np.argmax(y_test[pat[:2]],axis=1))
# out>>>
# 894和844的预测标签 [0 7]
# 894和844的正确标签 [0 7]
894和844的预测标签为3 和4,正确的为0,3 模型预测894号样本为连衣裙,但其实它是一件T恤 模型预测844号样本为外套,但其实它是一件连衣裙