本实例使用的数据集是3700张,包含5个种类的花。
flower_photo:daisy(雏菊)/dandelion(蒲公英)/roses(玫瑰)/sunflower(向日葵)/tulips(郁金香)
import matplotlib.pyplot as plt
import numpy as np
import os
import PIL # Python Imaging Library
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
加载数据,并检测数据是否可用。
import pathlib
dataset_url = 'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz'
# tf.keras.utils.get_file函数的三个参数 文件名,文件路径,是否需要解压缩。
data_dir = tf.keras.utils.get_file('flower_photos', origin=dataset_url, untar=True)
# 创建path对象
data_dir = pathlib.Path(data_dir)
# 检查数据是否可用 ,data_dir.glob获取路径下的所有符合'*/*.jpg'的文件,返回一个generator
image_count = len(list(data_dir.glob('*/*.jpg')))
# print(image_count)
# 检测roses的数据是否正常
roses = list(data_dir.glob('roses/*'))
roses0 = PIL.Image.open(str(roses[0]))
roses1 = PIL.Image.open(str(roses[100]))
daisy = list(data_dir.glob('daisy/*'))
daisy0 = PIL.Image.open(str(daisy[0]))
daisy1 = PIL.Image.open(str(daisy[100]))
dandelion = list(data_dir.glob('dandelion/*'))
dandelion0 = PIL.Image.open(str(dandelion[0]))
dandelion1 = PIL.Image.open(str(dandelion[100]))
对加载器定义一些变量。
batch_size = 32
img_height = 180
img_width = 180
划分训练集和测试集
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
data_dir,
validation_split=0.2,
subset='training',
seed=123,
image_size=(img_height, img_width),
batch_size=batch_size
)
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
data_dir,
validation_split=0.2,
subset='validation',
seed=123,
image_size=(img_height, img_width),
batch_size=batch_size
)
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
标准化输入数据
normalization_layer = layers.experimental.preprocessing.Rescaling(1. / 255)
normalized_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
image_batch, labels_batch = next(iter(normalized_ds))
first_image = image_batch[0]
创建模型
num_classes = 5
model = Sequential([
layers.experimental.preprocessing.Rescaling(1. / 255, input_shape=(img_height, img_width, 3)),
layers.Conv2D(16, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(32, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(64, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Flatten(),
layers.Dense(128, activation='relu'),
layers.Dense(num_classes)
])
编译模型
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
训练模型
epochs = 10
history = model.fit(
train_ds,
validation_data=val_ds,
epochs=epochs
)
评估训练的结果,并绘图。
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(epochs)
plt.figure(figsize=(8,8))
plt.subplot(2,1,1)
plt.plot(epochs_range,acc,label = 'Training Accuracy')
plt.plot(epochs_range,val_acc,label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.subplot(2,1,2)
plt.plot(epochs_range,loss,label = 'Training Loss')
plt.plot(epochs_range,val_loss,label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()