参考:https://tensorflow.google.cn/tutorials/images/transfer_learning
测试理解数据
- 导入相关包
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
- 使用TensorFlow Datasets下载数据
import tensorflow_datasets as tfds
tfds.disable_progress_bar()
# The tfds.load method downloads and caches the data, and returns a tf.data.Dataset object.
(raw_train, raw_validation, raw_test), metadata = tfds.load(
'cats_vs_dogs',
split=['train[:80%]', 'train[80%:90%]', 'train[90%:]'],
with_info=True,
as_supervised=True,
)
# Since "cats_vs_dogs" doesn't define standard splits, use the subsplit feature to divide it into (train, validation, test) with 80%, 10%, and 10% of the data respectively.
# The resulting tf.data.Dataset objects contain (image, label) pairs where the images have variable shape and 3 channels, and the label is a scalar.
print(raw_train)
print(raw_validation)
print(raw_test)
- 展示数据
# 上面读取的metadata用于存放标签
get_label_name = metadata.features['label'].int2str
# 图片根据id与标签对应
for image, label in raw_train.take(2):
plt.figure()
plt.imshow(image)
plt.title(get_label_name(label))
- 数据整理
规定图片大小
转换数据值再[-1,1]范围
IMG_SIZE = 160 # All images will be resized to 160x160
def format_example(image, label):
image = tf.cast(image, tf.float32)
image = (image/127.5) - 1
image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
return image, label
# 使用map方法将这个函数应用到数据集中的每一项
train = raw_train.map(format_example)
validation = raw_validation.map(format_example)
test = raw_test.map(format_example)
- 打乱(shuffle)数据并分组(batch)
BATCH_SIZE = 32
SHUFFLE_BUFFER_SIZE = 1000
train_batches = train.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
validation_batches = validation.batch(BATCH_SIZE)
test_batches = test.batch(BATCH_SIZE)
# 查看数据
for image_batch, label_batch in train_batches.take(1):
pass
image_batch.shape
创建模型
基础模型
IMG_SHAPE = (IMG_SIZE, IMG_SIZE, 3)
# Create the base model from the pre-trained model MobileNet V2
#include_top=False load a network that doesn't include the classification layers at the top, which is ideal for feature extraction.
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
include_top=False,
weights='imagenet')
这个特征提取器将每个160x160x3的图像转换成一个5x5x1280的特征块。
feature_batch = base_model(image_batch)
print(feature_batch.shape)
底层特征提取Feature extraction
# 冻结底层特称提取器,使其参数不参加更新
base_model.trainable = False
# Let's take a look at the base model architecture
base_model.summary()
添加顶层分类器
# 添加顶层分类器
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
# 测试
# feature_batch为基本模型输出的特征块
feature_batch_average = global_average_layer(feature_batch)
print(feature_batch_average.shape)
添加全连层,每张图片 输出一个预测结果
prediction_layer = tf.keras.layers.Dense(1)
# 测试查看形状
prediction_batch = prediction_layer(feature_batch_average)
print(prediction_batch.shape)
堆叠基础特征提取层、顶层分类器和全连层输出结果
model = tf.keras.Sequential([
base_model,
global_average_layer,
prediction_layer
])
编译模型,设置学习率、激活函数、损失函数
base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=base_learning_rate),
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
metrics=['accuracy'])
model.summary()
可训练对象有两个,权重和偏置项
len(model.trainable_variables)
训练模型
未训练前的模型准确度
initial_epochs = 10
validation_steps=20
loss0,accuracy0 = model.evaluate(validation_batches, steps = validation_steps)
print("initial loss: {:.2f}".format(loss0))
print("initial accuracy: {:.2f}".format(accuracy0))
开始训练
history = model.fit(train_batches,
epochs=initial_epochs,
validation_data=validation_batches)
模型精度与损失值的变化曲线
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')
plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,1.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()
微调
迁移过来的模型,虽然没有顶层分类器,但是也是遵循:随着层数的提高,模型提取的特征也越来越具体化。
可以考虑重新训练迁移模型部分顶层的参数,使模型更加适应当前的数据集。以提高模型对该数据的特征提取能力。
# 将特征提取器全部层设为可训练
base_model.trainable = True
# Let's take a look to see how many layers are in the base model
#一共155层
print("Number of layers in the base model: ", len(base_model.layers))
# 从第100层开始微调
fine_tune_at = 100
# 冻结100层之前的底层部分,使不参与训练
for layer in base_model.layers[:fine_tune_at]:
layer.trainable = False
# 编译模型
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
optimizer = tf.keras.optimizers.RMSprop(lr=base_learning_rate/10),
metrics=['accuracy'])
#模型结构
model.summary()
# 可训练的参数,2+(155-100+1)
len(model.trainable_variables)
# 继续训练模型
fine_tune_epochs = 10
# 初始训练轮数+微调后训练轮数 10+10
total_epochs = initial_epochs + fine_tune_epochs
# 喂入训练数据集train_batches,训练total_epoch轮,设置初始训练轮数initial_epoch,喂入测试集validation_data,继续训练
# initial_epoch = history.epoch[-1],是在原有训练基础上,继续训练。epochs设为20,initial_epoch设为10,会在原有基础上再训练10轮,tensorboard会合并成0-20代的整段图。
history_fine = model.fit(train_batches,
epochs=total_epochs,
initial_epoch = history.epoch[-1],
validation_data=validation_batches)
查看精度和损失值
# 拼接参数
acc += history_fine.history['accuracy']
val_acc += history_fine.history['val_accuracy']
loss += history_fine.history['loss']
val_loss += history_fine.history['val_loss']
# 输出图标
plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.ylim([0.8, 1])
plt.plot([initial_epochs-1,initial_epochs-1],
plt.ylim(), label='Start Fine Tuning')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.ylim([0, 1.0])
plt.plot([initial_epochs-1,initial_epochs-1],
plt.ylim(), label='Start Fine Tuning')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()