当我们打算对已经训练好的模型例如VGG16进行微调时,所有层都应该从训练有素的权重开始(应当预训练新加入层的权重):例如,你不应该在预先训练好的卷积基础上打一个随机初始化的全连接网络。这是因为由随机初始化的权重触发的大梯度更新将破坏卷积基础中的学习权重。在我们的例子中,这就是为什么我们首先训练顶级分类器,然后才开始微调卷积权重。
我们选择仅微调最后的卷积块而不是整个网络以防止过度拟合,因为整个网络将具有非常大的熵容量并因此具有过度拟合的强烈倾向。低级卷积块学习的特征比较高级的卷积块更加通用,不那么抽象,所以保持前几个块固定(更一般的特征)并且只调整最后一个块(更专业的特征)是明智的。 )。
微调应该以非常慢的学习速率完成,通常使用SGD优化器而不是适应性学习速率优化器,例如RMSProp。这是为了确保更新的幅度保持非常小,以免破坏以前学过的功能。
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras import applications
# dimensions of our images.
img_width, img_height = 150, 150
top_model_weights_path = 'bottleneck_fc_model.h5'
train_data_dir = 'data/train'
validation_data_dir = 'data/validation'
nb_train_samples = 2000
nb_validation_samples = 800
epochs = 50
batch_size = 16
def save_bottlebeck_features():
datagen = ImageDataGenerator(rescale=1. / 255)
# build the VGG16 network
model = applications.VGG16(include_top=False, weights='imagenet')
generator = datagen.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode=None,
shuffle=False)
bottleneck_features_train = model.predict_generator(
generator, nb_train_samples // batch_size)
np.save(open('bottleneck_features_train.npy', 'w'),
bottleneck_features_train)
generator = datagen.flow_from_directory(
validation_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode=None,
shuffle=False)
bottleneck_features_validation = model.predict_generator(
generator, nb_validation_samples // batch_size)
np.save(open('bottleneck_features_validation.npy', 'w'),
bottleneck_features_validation)
def train_top_model():
train_data = np.load(open('bottleneck_features_train.npy'))
train_labels = np.array(
[0] * (nb_train_samples / 2) + [1] * (nb_train_samples / 2))
validation_data = np.load(open('bottleneck_features_validation.npy'))
validation_labels = np.array(
[0] * (nb_validation_samples / 2) + [1] * (nb_validation_samples / 2))
model = Sequential()
model.add(Flatten(input_shape=train_data.shape[1:]))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='rmsprop',
loss='binary_crossentropy', metrics=['accuracy'])
model.fit(train_data, train_labels,
epochs=epochs,
batch_size=batch_size,
validation_data=(validation_data, validation_labels))
model.save_weights(top_model_weights_path)
save_bottlebeck_features()
train_top_model()
注:下面代码运行时可能存在问题,可能是兼容2.7版本不支持3.6版本,因为提示模型里没有添加方法可能是因为提供的VGG16是一个函数型模型并非连贯型模型只有连贯模型才有增加。方法。
下面第二个代码是笔者的改进代码,但是并未检验。
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
# path to the model weights files.
weights_path = '../keras/examples/vgg16_weights.h5'
top_model_weights_path = 'fc_model.h5'
# dimensions of our images.
img_width, img_height = 150, 150
train_data_dir = 'cats_and_dogs_small/train'
validation_data_dir = 'cats_and_dogs_small/validation'
nb_train_samples = 2000
nb_validation_samples = 800
epochs = 50
batch_size = 16
# build the VGG16 network
model = applications.VGG16(weights='imagenet', include_top=False)
print('Model loaded.')
# build a classifier model to put on top of the convolutional model
print(model.output_shape[1:])
'''
model.output_shape:
(None, 6, 6, 512)
model.output_shape[1:]
(6, 6, 512)
'''
top_model = Sequential()
top_model.add(Flatten(input_shape=model.output_shape[1:]))
top_model.add(Dense(256, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(1, activation='sigmoid'))
# note that it is necessary to start with a fully-trained
# classifier, including the top classifier,
# in order to successfully do fine-tuning
top_model.load_weights(top_model_weights_path)
# add the model on top of the convolutional base
model.add(top_model)
# set the first 25 layers (up to the last conv block)
# to non-trainable (weights will not be updated)
for layer in model.layers[:25]:
layer.trainable = False
# compile the model with a SGD/momentum optimizer
# and a very slow learning rate.
model.compile(loss='binary_crossentropy',
optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
metrics=['accuracy'])
#数据增强
# prepare data augmentation configuration
train_datagen = ImageDataGenerator(
rescale=1. / 255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='binary')
validation_generator = test_datagen.flow_from_directory(
validation_data_dir,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='binary')
# fine-tune the model
model.fit_generator(
train_generator,
samples_per_epoch=nb_train_samples,
epochs=epochs,
validation_data=validation_generator,
nb_val_samples=nb_validation_samples)
修改后:
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
# path to the model weights files.
weights_path = '../keras/examples/vgg16_weights.h5'
top_model_weights_path = 'fc_model.h5'
# dimensions of our images.
img_width, img_height = 150, 150
train_data_dir = 'cats_and_dogs_small/train'
validation_data_dir = 'cats_and_dogs_small/validation'
nb_train_samples = 2000
nb_validation_samples = 800
epochs = 50
batch_size = 16
# build the VGG16 network
base_model = applications.VGG16(weights='imagenet', include_top=False,input_shape=(200,200,3))
print("befor loading>>>")
base_model.summary()
print('Model loaded.')
# build a classifier model to put on top of the convolutional model
print("model.output_shape:\n",base_model.output_shape)
print("model.output_shape[1:]")
print(base_model.output_shape[1:])
'''
model.output_shape:
(None, 6, 6, 512)
model.output_shape[1:]
(6, 6, 512)
'''
top_model = Sequential()
top_model.add(Flatten(input_shape=base_model.output_shape[1:]))
top_model.add(Dense(256, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(1, activation='sigmoid'))
model=Sequential()
# note that it is necessary to start with a fully-trained
# classifier, including the top classifier,
# in order to successfully do fine-tuning
# add the model on top of the convolutional base
model.add(base_model)
model.add(top_model)
print("after adding>>>")
model.summary()
# set the first 25 layers (up to the last conv block)
# to non-trainable (weights will not be updated)
for layer in model.layers[:25]:
layer.trainable = False
# compile the model with a SGD/momentum optimizer
# and a very slow learning rate.
model.compile(loss='binary_crossentropy',
optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
metrics=['accuracy'])
# prepare data augmentation configuration
train_datagen = ImageDataGenerator(
rescale=1. / 255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='binary')
validation_generator = test_datagen.flow_from_directory(
validation_data_dir,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='binary')
# fine-tune the model
model.fit_generator(
train_generator,
samples_per_epoch=nb_train_samples,
epochs=epochs,
validation_data=validation_generator,
nb_val_samples=nb_validation_samples)
来源:Keras官方文档引自Keras博客