学习网站:https://lyhue1991.github.io/eat_tensorflow2_in_30_days/1-2,图片数据建模流程范例.html
cifar2数据集为cifar10数据集的子集,只包括前两种类别airplane和automobile。
训练集有airplane和automobile图片各5000张,测试集有airplane和automobile图片各1000张。
cifar2任务的目标是训练一个模型来对飞机airplane和机动车automobile两种图片进行分类。
我们准备的Cifar2数据集的文件结构如下所示
本文代码采取tf.keras中的ImageDataGenerator工具构建图片数据生成器
代码如下:
from keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
from tensorflow.keras import datasets,layers,models
train_dir = './data/cifar2/train'
test_dir = './data/cifar2/test'
# 对训练集数据设置数据增强
train_datagen = ImageDataGenerator(
rescale = 1./255,
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
fill_mode='nearest')
# 对测试集数据无需使用数据增强
test_datagen = ImageDataGenerator(rescale=1./255)
import os
from keras.preprocessing import image
from matplotlib import pyplot as plt
#查看第三张图片的效果增强
#获取train/airplane下面所有文件名
fnames = [os.path.join('./data/cifar2/train/airplane', fname) for
fname in os.listdir('./data/cifar2/train/airplane')]
print(fnames)
# 载入第3张图像
img_path = fnames[3]
img = image.load_img(img_path, target_size=(32, 32))
x = image.img_to_array(img)
plt.figure(1,figsize = (10,8))
plt.subplot(2,2,1)
plt.imshow(image.array_to_img(x))
plt.title('original image')
# 数据增强后的图像
x = x.reshape((1,) + x.shape)
i = 0
for batch in train_datagen.flow(x, batch_size=1):
plt.subplot(2,2,i+2)
plt.imshow(image.array_to_img(batch[0]))
plt.title('after augumentation %d'%(i+1))
i = i + 1
if i % 3 == 0:
break
plt.show()
#获取数据集
train_generator = train_datagen.flow_from_directory(
train_dir,
target_size=(32, 32),
batch_size=32,
shuffle = True,
class_mode='binary')
test_generator = test_datagen.flow_from_directory(
test_dir,
target_size=(32, 32),
batch_size=32,
shuffle = False,
class_mode='binary')
print(train_generator.class_indices)
#构建模型
tf.keras.backend.clear_session() #清空会话
inputs = layers.Input(shape=(32,32,3))
x = layers.Conv2D(32,kernel_size=(3,3))(inputs)
x = layers.MaxPool2D()(x)
x = layers.Conv2D(64,kernel_size=(5,5))(x)
x = layers.MaxPool2D()(x)
x = layers.Dropout(rate=0.1)(x)
x = layers.Flatten()(x)
x = layers.Dense(32,activation='relu')(x)
outputs = layers.Dense(1,activation = 'sigmoid')(x)
model = models.Model(inputs = inputs,outputs = outputs)
model.summary()
#训练模型
import datetime
logdir = "./data/keras_model/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)
model.compile(
optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
loss=tf.keras.losses.binary_crossentropy,
metrics=["accuracy"]
)
history = model.fit(train_generator,epochs= 10,validation_data=test_generator,
callbacks = [tensorboard_callback],workers = 4)
#模型评估
#%load_ext tensorboard
#%tensorboard --logdir ./data/keras_model
from tensorboard import notebook
notebook.list()
#在tensorboard中查看模型
notebook.start("--logdir ./data/keras_model")
import pandas as pd
dfhistory = pd.DataFrame(history.history)
dfhistory.index = range(1,len(dfhistory) + 1)
dfhistory.index.name = 'epoch'
print(dfhistory)
import matplotlib.pyplot as plt
def plot_metric(history, metric):
train_metrics = history.history[metric]
val_metrics = history.history['val_'+metric]
epochs = range(1, len(train_metrics) + 1)
plt.plot(epochs, train_metrics, 'bo--')
plt.plot(epochs, val_metrics, 'ro-')
plt.title('Training and validation '+ metric)
plt.xlabel("Epochs")
plt.ylabel(metric)
plt.legend(["train_"+metric, 'val_'+metric])
plt.show()
plot_metric(history,"loss")
plot_metric(history,"accuracy")
val_loss,val_accuracy = model.evaluate(test_generator,workers=4)
print(val_loss,val_accuracy)
pre=model.predict(test_generator[10])
print("预测值",pre)
print("实际值",test_generator.labels[0:10])
#模型保存
model.save('./data/tf_model_savedmodel', save_format="tf")
print('export saved model.')
model_loaded = tf.keras.models.load_model('./data/tf_model_savedmodel')
model_loaded.evaluate(test_generator)