【无标题】

keras对猫狗图像数据集进行分类
一、安装Keras
二、keras对猫狗图像数据集进行分类
三、全部代码
四、结果显示

一、安装Keras
在windows下,首先添加中科大源 命令行中直接使用以下命令
conda config --add channels https://mirrors.ustc.edu.cn/anaconda/pkgs/main/conda config --add channels https://mirrors.ustc.edu.cn/anaconda/pkgs/free/conda config --add channels https://mirrors.ustc.edu.cn/anaconda/cloud/conda-forge/conda config --add channels https://mirrors.ustc.edu.cn/anaconda/cloud/msys2/conda config --add channels https://mirrors.ustc.edu.cn/anaconda/cloud/bioconda/conda config --add channels https://mirrors.ustc.edu.cn/anaconda/cloud/menpo/ conda config --set show_channel_urls yes

Anaconda3如何安装keras,主要包括下面三步:
1.安装mingw libpython
2.安装theano
3.安装keras
若已安装好,进入win菜单打开Anaconda prompt,如下图:
输入conda install mingw libpython回车,然后输入y回车,
输入conda install theano回车,然后输入y回车,
输入conda install keras回车,然后输入y回车。
二、keras对猫狗图像数据集进行分类
猫狗数据集下载链接:cats_and_dogs_small_train 导入keras所需用到的库函数,读取本地猫狗数据集,搭建模型。
import os
import shutil # 复制文件
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
from keras.preprocessing import image

原始目录所在的路径

数据集未压缩

original_dataset_dir = ‘cats_and_dogs_small_train’

存储较小数据集的目录

base_dir = ‘cats_and_dogs_small_test’
os.mkdir(base_dir)

训练、验证、测试数据集的目录

train_dir = os.path.join(base_dir, ‘train’)
os.mkdir(train_dir)
validation_dir = os.path.join(base_dir, ‘validation’)
os.mkdir(validation_dir)
test_dir = os.path.join(base_dir, ‘test’)
os.mkdir(test_dir)

猫训练图片所在目录

train_cats_dir = os.path.join(train_dir, ‘cats’)
os.mkdir(train_cats_dir)

狗训练图片所在目录

train_dogs_dir = os.path.join(train_dir, ‘dogs’)
os.mkdir(train_dogs_dir)

猫验证图片所在目录

validation_cats_dir = os.path.join(validation_dir, ‘cats’)
os.mkdir(validation_cats_dir)

狗验证数据集所在目录

validation_dogs_dir = os.path.join(validation_dir, ‘dogs’)
os.mkdir(validation_dogs_dir)

猫测试数据集所在目录

test_cats_dir = os.path.join(test_dir, ‘cats’)
os.mkdir(test_cats_dir)

狗测试数据集所在目录

test_dogs_dir = os.path.join(test_dir, ‘dogs’)
os.mkdir(test_dogs_dir)

复制最开始的1000张猫图片到 train_cats_dir

fnames = [‘cat.{}.jpg’.format(i) for i in range(1000)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(train_cats_dir, fname)
shutil.copyfile(src, dst)

复制接下来500张猫图片到 validation_cats_dir

fnames = [‘cat.{}.jpg’.format(i) for i in range(1000, 1500)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(validation_cats_dir, fname)
shutil.copyfile(src, dst)

复制接下来500张图片到 test_cats_dir

fnames = [‘cat.{}.jpg’.format(i) for i in range(1500, 2000)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(test_cats_dir, fname)
shutil.copyfile(src, dst)

复制最开始的1000张狗图片到 train_dogs_dir

fnames = [‘dog.{}.jpg’.format(i) for i in range(1000)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(train_cats_dir, fname)
shutil.copyfile(src, dst)

复制接下来500张狗图片到 validation_dogs_dir

fnames = [‘dog.{}.jpg’.format(i) for i in range(1000, 1500)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(validation_cats_dir, fname)
shutil.copyfile(src, dst)

复制接下来500张狗图片到 test_dogs_dir

fnames = [‘dog.{}.jpg’.format(i) for i in range(1500, 2000)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(test_cats_dir, fname)
shutil.copyfile(src, dst)

print(‘total training cat images:’, len(os.listdir(train_cats_dir)))

print(‘total training dog images:’, len(os.listdir(train_dogs_dir)))

print(‘total validation cat images:’, len(os.listdir(validation_cats_dir)))

print(‘total validation dog images:’, len(os.listdir(validation_dogs_dir)))

print(‘total test cat images:’, len(os.listdir(test_cats_dir)))

print(‘total test dog images:’, len(os.listdir(test_dogs_dir)))

搭建模型

model = Sequential()
model.add(Conv2D(32, (3, 3), activation=‘relu’,
input_shape=(150, 150, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation=‘relu’))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation=‘relu’))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation=‘relu’))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(512, activation=‘relu’))
model.add(Dense(1, activation=‘sigmoid’))

print(model.summary())

model.compile(loss=‘binary_crossentropy’,
optimizer=RMSprop(lr=1e-4),
metrics=[‘acc’])

数据应该在被送入我们的网络之前格式化为适当的预处理浮点张量。 目前,我们的数据作为JPEG文件位于驱动器上,因此将其放入网络的步骤大致如下: *阅读图片文件。 *将JPEG内容解码为RBG像素网格。 *将这些转换为浮点张量。 *将像素值(0到255之间)重新缩放到[0,1]间隔(如您所知,神经网络更喜欢处理小输入值)。 这可能看起来有点令人生畏,但幸好Keras有实用工具自动处理这些步骤。 Keras有一个带有图像处理辅助工具的模块,位于keras.preprocessing.image。 特别是,它包含类ImageDataGenerator,它允许快速设置Python生成器, 可以自动将磁盘上的图像文件转换为批处理的预处理张量。
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
train_dir, # target directory
target_size=(150, 150), # resize图片
batch_size=20,
class_mode=‘binary’
)

validation_generator = test_datagen.flow_from_directory(
validation_dir,
target_size=(150, 150),
batch_size=20,
class_mode=‘binary’
)

for data_batch, labels_batch in train_generator:
print(‘data batch shape:’, data_batch.shape)
print(‘labels batch shape:’, labels_batch.shape)
break

hist = model.fit_generator(
train_generator,
steps_per_epoch=100,
epochs=30,
validation_data=validation_generator,
validation_steps=50
)

model.save(‘cats_and_dogs_small_1.h5’)

acc = hist.history[‘acc’]
val_acc = hist.history[‘val_acc’]
loss = hist.history[‘loss’]
val_loss = hist.history[‘val_loss’]

epochs = range(len(acc))

plt.plot(epochs, acc, ‘bo’, label=‘Training acc’)
plt.plot(epochs, val_acc, ‘b’, label=‘Validation acc’)
plt.title(‘Training and validation accuracy’)

plt.legend()
plt.figure()

plt.figure()
plt.plot(epochs, loss, ‘bo’, label=‘Training loss’)
plt.plot(epochs, val_loss, ‘b’, label=‘Validation loss’)
plt.legend()
plt.show()

‘’’ 因为我们只有相对较少的训练样本(2000),过度拟合将是我们的头号问题。您已经了解了许多有助于缓解过度拟合的技术, 例如丢失和重量衰减(L2正规化)。我们现在将介绍一种新的,专门针对计算机视觉的,并且在使用深度学习模型处理图像时几乎普遍使用:数据增强。 ###使用数据增强 #过度拟合是由于样本太少而无法学习,导致我们无法训练能够推广到新数据的模型。 #给定无限数据,我们的模型将暴露于手头数据分布的每个可能方面:我们永远不会过度拟合。 数据增强采用从现有训练样本生成更多训练数据的方法,通过数字“扩充”样本 随机转换的数量,产生可信的图像。目标是在训练时,我们的模型永远不会看到两次完全相同的图片。 这有助于模型暴露于数据的更多方面并更好地概括。
datagen = ImageDataGenerator(
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
fill_mode=‘nearest’
)

查看数据增强的效果

frames = [os.path.join(train_cats_dir, fname) for fname in os.listdir(train_cats_dir)]

选择一张图片来做增强

img_path = fnames[3]

读取图片并进行resize

img = image.load_img(img_path, target_size=(150, 150))

转化为Numpy数组, shape(150, 150, 3)

x = image.img_to_array(img)

reshape->(1, 150, 150, 3)

x = x.reshape(1, 150, 150, 3)

i = 0
for batch in datagen.flow(x, batch_size=1):
plt.figure(i)
imgplot = plt.imshow(image.array_to_img(batch[0]))
i += 1
if i % 4 == 0:
break
plt.show()

使用数据增强后的数据来训练一个新的网络

model = Sequential()
model.add(Conv2D(32, (3, 3), activation=‘relu’,
input_shape=(150, 150, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation=‘relu’))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation=‘relu’))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation=‘relu’))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(512, activation=‘relu’))
model.add(Dense(1, activation=‘sigmoid’))

model.compile(loss=‘binary_crossentropy’,
optimizer=RMSprop(lr=1e-4),
metrics=[‘acc’])

train_datagen = ImageDataGenerator(
rescale=1./ 255,
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
train_dir,
target_size=(150, 150),
batch_size=32,
class_mode=‘binary’
)

validation_generator = test_datagen.flow_from_directory(
validation_dir,
target_size=(150, 150),
batch_size=32,
class_mode=‘binary’
)

hist = model.fit_generator(
train_generator,
steps_per_epoch=100,
epochs=100,
validation_data=validation_generator,
validation_steps=50,
)

model.save(‘cats_and_dogs_small_2.h5’)

acc = hist.history[‘acc’]
val_acc = hist.history[‘val_acc’]
loss = hist.history[‘loss’]
val_loss = hist.history[‘val_loss’]

epochs = range(len(acc))

plt.plot(epochs, acc, ‘bo’, label=‘Training acc’)
plt.plot(epochs, val_acc, ‘b’, label=‘Validation acc’)
plt.title(‘Training and validation accuracy’)
plt.legend()

plt.figure()

plt.plot(epochs, loss, ‘bo’, label=‘Training loss’)
plt.plot(epochs, val_loss, ‘b’, label=‘Validation loss’)
plt.title(‘Training and validation loss’)
plt.legend()

plt.show()

三、全部代码
import os
import shutil # 复制文件
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
from keras.preprocessing import image

原始目录所在的路径

数据集未压缩

original_dataset_dir = ‘cats_and_dogs_small_train’

存储较小数据集的目录

base_dir = ‘cats_and_dogs_small_test’
os.mkdir(base_dir)

训练、验证、测试数据集的目录

train_dir = os.path.join(base_dir, ‘train’)
os.mkdir(train_dir)
validation_dir = os.path.join(base_dir, ‘validation’)
os.mkdir(validation_dir)
test_dir = os.path.join(base_dir, ‘test’)
os.mkdir(test_dir)

猫训练图片所在目录

train_cats_dir = os.path.join(train_dir, ‘cats’)
os.mkdir(train_cats_dir)

狗训练图片所在目录

train_dogs_dir = os.path.join(train_dir, ‘dogs’)
os.mkdir(train_dogs_dir)

猫验证图片所在目录

validation_cats_dir = os.path.join(validation_dir, ‘cats’)
os.mkdir(validation_cats_dir)

狗验证数据集所在目录

validation_dogs_dir = os.path.join(validation_dir, ‘dogs’)
os.mkdir(validation_dogs_dir)

猫测试数据集所在目录

test_cats_dir = os.path.join(test_dir, ‘cats’)
os.mkdir(test_cats_dir)

狗测试数据集所在目录

test_dogs_dir = os.path.join(test_dir, ‘dogs’)
os.mkdir(test_dogs_dir)

复制最开始的1000张猫图片到 train_cats_dir

fnames = [‘cat.{}.jpg’.format(i) for i in range(1000)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(train_cats_dir, fname)
shutil.copyfile(src, dst)

复制接下来500张猫图片到 validation_cats_dir

fnames = [‘cat.{}.jpg’.format(i) for i in range(1000, 1500)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(validation_cats_dir, fname)
shutil.copyfile(src, dst)

复制接下来500张图片到 test_cats_dir

fnames = [‘cat.{}.jpg’.format(i) for i in range(1500, 2000)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(test_cats_dir, fname)
shutil.copyfile(src, dst)

复制最开始的1000张狗图片到 train_dogs_dir

fnames = [‘dog.{}.jpg’.format(i) for i in range(1000)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(train_cats_dir, fname)
shutil.copyfile(src, dst)

复制接下来500张狗图片到 validation_dogs_dir

fnames = [‘dog.{}.jpg’.format(i) for i in range(1000, 1500)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(validation_cats_dir, fname)
shutil.copyfile(src, dst)

复制接下来500张狗图片到 test_dogs_dir

fnames = [‘dog.{}.jpg’.format(i) for i in range(1500, 2000)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(test_cats_dir, fname)
shutil.copyfile(src, dst)

print(‘total training cat images:’, len(os.listdir(train_cats_dir)))

print(‘total training dog images:’, len(os.listdir(train_dogs_dir)))

print(‘total validation cat images:’, len(os.listdir(validation_cats_dir)))

print(‘total validation dog images:’, len(os.listdir(validation_dogs_dir)))

print(‘total test cat images:’, len(os.listdir(test_cats_dir)))

print(‘total test dog images:’, len(os.listdir(test_dogs_dir)))

搭建模型

model = Sequential()
model.add(Conv2D(32, (3, 3), activation=‘relu’,
input_shape=(150, 150, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation=‘relu’))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation=‘relu’))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation=‘relu’))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(512, activation=‘relu’))
model.add(Dense(1, activation=‘sigmoid’))

print(model.summary())

model.compile(loss=‘binary_crossentropy’,
optimizer=RMSprop(lr=1e-4),
metrics=[‘acc’])

train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
train_dir, # target directory
target_size=(150, 150), # resize图片
batch_size=20,
class_mode=‘binary’
)

validation_generator = test_datagen.flow_from_directory(
validation_dir,
target_size=(150, 150),
batch_size=20,
class_mode=‘binary’
)

for data_batch, labels_batch in train_generator:
print(‘data batch shape:’, data_batch.shape)
print(‘labels batch shape:’, labels_batch.shape)
break

hist = model.fit_generator(
train_generator,
steps_per_epoch=100,
epochs=30,
validation_data=validation_generator,
validation_steps=50
)

model.save(‘cats_and_dogs_small_1.h5’)

acc = hist.history[‘acc’]
val_acc = hist.history[‘val_acc’]
loss = hist.history[‘loss’]
val_loss = hist.history[‘val_loss’]

epochs = range(len(acc))

plt.plot(epochs, acc, ‘bo’, label=‘Training acc’)
plt.plot(epochs, val_acc, ‘b’, label=‘Validation acc’)
plt.title(‘Training and validation accuracy’)

plt.legend()
plt.figure()

plt.figure()
plt.plot(epochs, loss, ‘bo’, label=‘Training loss’)
plt.plot(epochs, val_loss, ‘b’, label=‘Validation loss’)
plt.legend()
plt.show()

datagen = ImageDataGenerator(
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
fill_mode=‘nearest’
)

查看数据增强的效果

frames = [os.path.join(train_cats_dir, fname) for fname in os.listdir(train_cats_dir)]

选择一张图片来做增强

img_path = fnames[3]

读取图片并进行resize

img = image.load_img(img_path, target_size=(150, 150))

转化为Numpy数组, shape(150, 150, 3)

x = image.img_to_array(img)

reshape->(1, 150, 150, 3)

x = x.reshape(1, 150, 150, 3)

i = 0
for batch in datagen.flow(x, batch_size=1):
plt.figure(i)
imgplot = plt.imshow(image.array_to_img(batch[0]))
i += 1
if i % 4 == 0:
break
plt.show()

使用数据增强后的数据来训练一个新的网络

model = Sequential()
model.add(Conv2D(32, (3, 3), activation=‘relu’,
input_shape=(150, 150, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation=‘relu’))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation=‘relu’))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation=‘relu’))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(512, activation=‘relu’))
model.add(Dense(1, activation=‘sigmoid’))

model.compile(loss=‘binary_crossentropy’,
optimizer=RMSprop(lr=1e-4),
metrics=[‘acc’])

train_datagen = ImageDataGenerator(
rescale=1./ 255,
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
train_dir,
target_size=(150, 150),
batch_size=32,
class_mode=‘binary’
)

validation_generator = test_datagen.flow_from_directory(
validation_dir,
target_size=(150, 150),
batch_size=32,
class_mode=‘binary’
)

hist = model.fit_generator(
train_generator,
steps_per_epoch=100,
epochs=100,
validation_data=validation_generator,
validation_steps=50,
)

model.save(‘cats_and_dogs_small_2.h5’)

acc = hist.history[‘acc’]
val_acc = hist.history[‘val_acc’]
loss = hist.history[‘loss’]
val_loss = hist.history[‘val_loss’]

epochs = range(len(acc))

plt.plot(epochs, acc, ‘bo’, label=‘Training acc’)
plt.plot(epochs, val_acc, ‘b’, label=‘Validation acc’)
plt.title(‘Training and validation accuracy’)
plt.legend()

plt.figure()

plt.plot(epochs, loss, ‘bo’, label=‘Training loss’)
plt.plot(epochs, val_loss, ‘b’, label=‘Validation loss’)
plt.title(‘Training and validation loss’)
plt.legend()

plt.show()

四、结果显示
keras对猫狗图像数据集进行分类
一、安装Keras二、keras对猫狗图像数据集进行分类三、全部代码四、结果显示

一、安装Keras
在windows下,首先添加中科大源 命令行中直接使用以下命令
conda config --add channels https://mirrors.ustc.edu.cn/anaconda/pkgs/main/conda config --add channels https://mirrors.ustc.edu.cn/anaconda/pkgs/free/conda config --add channels https://mirrors.ustc.edu.cn/anaconda/cloud/conda-forge/conda config --add channels https://mirrors.ustc.edu.cn/anaconda/cloud/msys2/conda config --add channels https://mirrors.ustc.edu.cn/anaconda/cloud/bioconda/conda config --add channels https://mirrors.ustc.edu.cn/anaconda/cloud/menpo/ conda config --set show_channel_urls yes

12
Anaconda3如何安装keras,主要包括下面三步:
1.安装mingw libpython
2.安装theano
3.安装keras
若你还没有按装anaconda,赶紧下载安装吧!若已安装好,进入win菜单打开Anaconda prompt,如下图:
输入conda install mingw libpython回车,然后输入y回车,
输入conda install theano回车,然后输入y回车,
输入conda install keras回车,然后输入y回车。
二、keras对猫狗图像数据集进行分类
猫狗数据集下载链接:cats_and_dogs_small_train 导入keras所需用到的库函数,读取本地猫狗数据集,搭建模型。
import os
import shutil # 复制文件
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
from keras.preprocessing import image

原始目录所在的路径

数据集未压缩

original_dataset_dir = ‘cats_and_dogs_small_train’

存储较小数据集的目录

base_dir = ‘cats_and_dogs_small_test’
os.mkdir(base_dir)

训练、验证、测试数据集的目录

train_dir = os.path.join(base_dir, ‘train’)
os.mkdir(train_dir)
validation_dir = os.path.join(base_dir, ‘validation’)
os.mkdir(validation_dir)
test_dir = os.path.join(base_dir, ‘test’)
os.mkdir(test_dir)

猫训练图片所在目录

train_cats_dir = os.path.join(train_dir, ‘cats’)
os.mkdir(train_cats_dir)

狗训练图片所在目录

train_dogs_dir = os.path.join(train_dir, ‘dogs’)
os.mkdir(train_dogs_dir)

猫验证图片所在目录

validation_cats_dir = os.path.join(validation_dir, ‘cats’)
os.mkdir(validation_cats_dir)

狗验证数据集所在目录

validation_dogs_dir = os.path.join(validation_dir, ‘dogs’)
os.mkdir(validation_dogs_dir)

猫测试数据集所在目录

test_cats_dir = os.path.join(test_dir, ‘cats’)
os.mkdir(test_cats_dir)

狗测试数据集所在目录

test_dogs_dir = os.path.join(test_dir, ‘dogs’)
os.mkdir(test_dogs_dir)

复制最开始的1000张猫图片到 train_cats_dir

fnames = [‘cat.{}.jpg’.format(i) for i in range(1000)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(train_cats_dir, fname)
shutil.copyfile(src, dst)

复制接下来500张猫图片到 validation_cats_dir

fnames = [‘cat.{}.jpg’.format(i) for i in range(1000, 1500)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(validation_cats_dir, fname)
shutil.copyfile(src, dst)

复制接下来500张图片到 test_cats_dir

fnames = [‘cat.{}.jpg’.format(i) for i in range(1500, 2000)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(test_cats_dir, fname)
shutil.copyfile(src, dst)

复制最开始的1000张狗图片到 train_dogs_dir

fnames = [‘dog.{}.jpg’.format(i) for i in range(1000)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(train_cats_dir, fname)
shutil.copyfile(src, dst)

复制接下来500张狗图片到 validation_dogs_dir

fnames = [‘dog.{}.jpg’.format(i) for i in range(1000, 1500)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(validation_cats_dir, fname)
shutil.copyfile(src, dst)

复制接下来500张狗图片到 test_dogs_dir

fnames = [‘dog.{}.jpg’.format(i) for i in range(1500, 2000)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(test_cats_dir, fname)
shutil.copyfile(src, dst)

print(‘total training cat images:’, len(os.listdir(train_cats_dir)))

print(‘total training dog images:’, len(os.listdir(train_dogs_dir)))

print(‘total validation cat images:’, len(os.listdir(validation_cats_dir)))

print(‘total validation dog images:’, len(os.listdir(validation_dogs_dir)))

print(‘total test cat images:’, len(os.listdir(test_cats_dir)))

print(‘total test dog images:’, len(os.listdir(test_dogs_dir)))

搭建模型

model = Sequential()
model.add(Conv2D(32, (3, 3), activation=‘relu’,
input_shape=(150, 150, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation=‘relu’))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation=‘relu’))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation=‘relu’))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(512, activation=‘relu’))
model.add(Dense(1, activation=‘sigmoid’))

print(model.summary())

model.compile(loss=‘binary_crossentropy’,
optimizer=RMSprop(lr=1e-4),
metrics=[‘acc’])

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
数据应该在被送入我们的网络之前格式化为适当的预处理浮点张量。 目前,我们的数据作为JPEG文件位于驱动器上,因此将其放入网络的步骤大致如下: *阅读图片文件。 *将JPEG内容解码为RBG像素网格。 *将这些转换为浮点张量。 *将像素值(0到255之间)重新缩放到[0,1]间隔(如您所知,神经网络更喜欢处理小输入值)。 这可能看起来有点令人生畏,但幸好Keras有实用工具自动处理这些步骤。 Keras有一个带有图像处理辅助工具的模块,位于keras.preprocessing.image。 特别是,它包含类ImageDataGenerator,它允许快速设置Python生成器, 可以自动将磁盘上的图像文件转换为批处理的预处理张量。
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
train_dir, # target directory
target_size=(150, 150), # resize图片
batch_size=20,
class_mode=‘binary’
)

validation_generator = test_datagen.flow_from_directory(
validation_dir,
target_size=(150, 150),
batch_size=20,
class_mode=‘binary’
)

for data_batch, labels_batch in train_generator:
print(‘data batch shape:’, data_batch.shape)
print(‘labels batch shape:’, labels_batch.shape)
break

hist = model.fit_generator(
train_generator,
steps_per_epoch=100,
epochs=30,
validation_data=validation_generator,
validation_steps=50
)

model.save(‘cats_and_dogs_small_1.h5’)

acc = hist.history[‘acc’]
val_acc = hist.history[‘val_acc’]
loss = hist.history[‘loss’]
val_loss = hist.history[‘val_loss’]

epochs = range(len(acc))

plt.plot(epochs, acc, ‘bo’, label=‘Training acc’)
plt.plot(epochs, val_acc, ‘b’, label=‘Validation acc’)
plt.title(‘Training and validation accuracy’)

plt.legend()
plt.figure()

plt.figure()
plt.plot(epochs, loss, ‘bo’, label=‘Training loss’)
plt.plot(epochs, val_loss, ‘b’, label=‘Validation loss’)
plt.legend()
plt.show()
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
‘’’ 因为我们只有相对较少的训练样本(2000),过度拟合将是我们的头号问题。您已经了解了许多有助于缓解过度拟合的技术, 例如丢失和重量衰减(L2正规化)。我们现在将介绍一种新的,专门针对计算机视觉的,并且在使用深度学习模型处理图像时几乎普遍使用:数据增强。 ###使用数据增强 #过度拟合是由于样本太少而无法学习,导致我们无法训练能够推广到新数据的模型。 #给定无限数据,我们的模型将暴露于手头数据分布的每个可能方面:我们永远不会过度拟合。 数据增强采用从现有训练样本生成更多训练数据的方法,通过数字“扩充”样本 随机转换的数量,产生可信的图像。目标是在训练时,我们的模型永远不会看到两次完全相同的图片。 这有助于模型暴露于数据的更多方面并更好地概括。
datagen = ImageDataGenerator(
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
fill_mode=‘nearest’
)

查看数据增强的效果

frames = [os.path.join(train_cats_dir, fname) for fname in os.listdir(train_cats_dir)]

选择一张图片来做增强

img_path = fnames[3]

读取图片并进行resize

img = image.load_img(img_path, target_size=(150, 150))

转化为Numpy数组, shape(150, 150, 3)

x = image.img_to_array(img)

reshape->(1, 150, 150, 3)

x = x.reshape(1, 150, 150, 3)

i = 0
for batch in datagen.flow(x, batch_size=1):
plt.figure(i)
imgplot = plt.imshow(image.array_to_img(batch[0]))
i += 1
if i % 4 == 0:
break
plt.show()

使用数据增强后的数据来训练一个新的网络

model = Sequential()
model.add(Conv2D(32, (3, 3), activation=‘relu’,
input_shape=(150, 150, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation=‘relu’))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation=‘relu’))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation=‘relu’))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(512, activation=‘relu’))
model.add(Dense(1, activation=‘sigmoid’))

model.compile(loss=‘binary_crossentropy’,
optimizer=RMSprop(lr=1e-4),
metrics=[‘acc’])

train_datagen = ImageDataGenerator(
rescale=1./ 255,
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
train_dir,
target_size=(150, 150),
batch_size=32,
class_mode=‘binary’
)

validation_generator = test_datagen.flow_from_directory(
validation_dir,
target_size=(150, 150),
batch_size=32,
class_mode=‘binary’
)

hist = model.fit_generator(
train_generator,
steps_per_epoch=100,
epochs=100,
validation_data=validation_generator,
validation_steps=50,
)

model.save(‘cats_and_dogs_small_2.h5’)

acc = hist.history[‘acc’]
val_acc = hist.history[‘val_acc’]
loss = hist.history[‘loss’]
val_loss = hist.history[‘val_loss’]

epochs = range(len(acc))

plt.plot(epochs, acc, ‘bo’, label=‘Training acc’)
plt.plot(epochs, val_acc, ‘b’, label=‘Validation acc’)
plt.title(‘Training and validation accuracy’)
plt.legend()

plt.figure()

plt.plot(epochs, loss, ‘bo’, label=‘Training loss’)
plt.plot(epochs, val_loss, ‘b’, label=‘Validation loss’)
plt.title(‘Training and validation loss’)
plt.legend()

plt.show()
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
三、全部代码
import os
import shutil # 复制文件
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
from keras.preprocessing import image

原始目录所在的路径

数据集未压缩

original_dataset_dir = ‘cats_and_dogs_small_train’

存储较小数据集的目录

base_dir = ‘cats_and_dogs_small_test’
os.mkdir(base_dir)

训练、验证、测试数据集的目录

train_dir = os.path.join(base_dir, ‘train’)
os.mkdir(train_dir)
validation_dir = os.path.join(base_dir, ‘validation’)
os.mkdir(validation_dir)
test_dir = os.path.join(base_dir, ‘test’)
os.mkdir(test_dir)

猫训练图片所在目录

train_cats_dir = os.path.join(train_dir, ‘cats’)
os.mkdir(train_cats_dir)

狗训练图片所在目录

train_dogs_dir = os.path.join(train_dir, ‘dogs’)
os.mkdir(train_dogs_dir)

猫验证图片所在目录

validation_cats_dir = os.path.join(validation_dir, ‘cats’)
os.mkdir(validation_cats_dir)

狗验证数据集所在目录

validation_dogs_dir = os.path.join(validation_dir, ‘dogs’)
os.mkdir(validation_dogs_dir)

猫测试数据集所在目录

test_cats_dir = os.path.join(test_dir, ‘cats’)
os.mkdir(test_cats_dir)

狗测试数据集所在目录

test_dogs_dir = os.path.join(test_dir, ‘dogs’)
os.mkdir(test_dogs_dir)

复制最开始的1000张猫图片到 train_cats_dir

fnames = [‘cat.{}.jpg’.format(i) for i in range(1000)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(train_cats_dir, fname)
shutil.copyfile(src, dst)

复制接下来500张猫图片到 validation_cats_dir

fnames = [‘cat.{}.jpg’.format(i) for i in range(1000, 1500)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(validation_cats_dir, fname)
shutil.copyfile(src, dst)

复制接下来500张图片到 test_cats_dir

fnames = [‘cat.{}.jpg’.format(i) for i in range(1500, 2000)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(test_cats_dir, fname)
shutil.copyfile(src, dst)

复制最开始的1000张狗图片到 train_dogs_dir

fnames = [‘dog.{}.jpg’.format(i) for i in range(1000)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(train_cats_dir, fname)
shutil.copyfile(src, dst)

复制接下来500张狗图片到 validation_dogs_dir

fnames = [‘dog.{}.jpg’.format(i) for i in range(1000, 1500)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(validation_cats_dir, fname)
shutil.copyfile(src, dst)

复制接下来500张狗图片到 test_dogs_dir

fnames = [‘dog.{}.jpg’.format(i) for i in range(1500, 2000)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(test_cats_dir, fname)
shutil.copyfile(src, dst)

print(‘total training cat images:’, len(os.listdir(train_cats_dir)))

print(‘total training dog images:’, len(os.listdir(train_dogs_dir)))

print(‘total validation cat images:’, len(os.listdir(validation_cats_dir)))

print(‘total validation dog images:’, len(os.listdir(validation_dogs_dir)))

print(‘total test cat images:’, len(os.listdir(test_cats_dir)))

print(‘total test dog images:’, len(os.listdir(test_dogs_dir)))

搭建模型

model = Sequential()
model.add(Conv2D(32, (3, 3), activation=‘relu’,
input_shape=(150, 150, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation=‘relu’))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation=‘relu’))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation=‘relu’))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(512, activation=‘relu’))
model.add(Dense(1, activation=‘sigmoid’))

print(model.summary())

model.compile(loss=‘binary_crossentropy’,
optimizer=RMSprop(lr=1e-4),
metrics=[‘acc’])

train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
train_dir, # target directory
target_size=(150, 150), # resize图片
batch_size=20,
class_mode=‘binary’
)

validation_generator = test_datagen.flow_from_directory(
validation_dir,
target_size=(150, 150),
batch_size=20,
class_mode=‘binary’
)

for data_batch, labels_batch in train_generator:
print(‘data batch shape:’, data_batch.shape)
print(‘labels batch shape:’, labels_batch.shape)
break

hist = model.fit_generator(
train_generator,
steps_per_epoch=100,
epochs=30,
validation_data=validation_generator,
validation_steps=50
)

model.save(‘cats_and_dogs_small_1.h5’)

acc = hist.history[‘acc’]
val_acc = hist.history[‘val_acc’]
loss = hist.history[‘loss’]
val_loss = hist.history[‘val_loss’]

epochs = range(len(acc))

plt.plot(epochs, acc, ‘bo’, label=‘Training acc’)
plt.plot(epochs, val_acc, ‘b’, label=‘Validation acc’)
plt.title(‘Training and validation accuracy’)

plt.legend()
plt.figure()

plt.figure()
plt.plot(epochs, loss, ‘bo’, label=‘Training loss’)
plt.plot(epochs, val_loss, ‘b’, label=‘Validation loss’)
plt.legend()
plt.show()

datagen = ImageDataGenerator(
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
fill_mode=‘nearest’
)

查看数据增强的效果

frames = [os.path.join(train_cats_dir, fname) for fname in os.listdir(train_cats_dir)]

选择一张图片来做增强

img_path = fnames[3]

读取图片并进行resize

img = image.load_img(img_path, target_size=(150, 150))

转化为Numpy数组, shape(150, 150, 3)

x = image.img_to_array(img)

reshape->(1, 150, 150, 3)

x = x.reshape(1, 150, 150, 3)

i = 0
for batch in datagen.flow(x, batch_size=1):
plt.figure(i)
imgplot = plt.imshow(image.array_to_img(batch[0]))
i += 1
if i % 4 == 0:
break
plt.show()

使用数据增强后的数据来训练一个新的网络

model = Sequential()
model.add(Conv2D(32, (3, 3), activation=‘relu’,
input_shape=(150, 150, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation=‘relu’))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation=‘relu’))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation=‘relu’))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(512, activation=‘relu’))
model.add(Dense(1, activation=‘sigmoid’))

model.compile(loss=‘binary_crossentropy’,
optimizer=RMSprop(lr=1e-4),
metrics=[‘acc’])

train_datagen = ImageDataGenerator(
rescale=1./ 255,
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
train_dir,
target_size=(150, 150),
batch_size=32,
class_mode=‘binary’
)

validation_generator = test_datagen.flow_from_directory(
validation_dir,
target_size=(150, 150),
batch_size=32,
class_mode=‘binary’
)

hist = model.fit_generator(
train_generator,
steps_per_epoch=100,
epochs=100,
validation_data=validation_generator,
validation_steps=50,
)

model.save(‘cats_and_dogs_small_2.h5’)

acc = hist.history[‘acc’]
val_acc = hist.history[‘val_acc’]
loss = hist.history[‘loss’]
val_loss = hist.history[‘val_loss’]

epochs = range(len(acc))

plt.plot(epochs, acc, ‘bo’, label=‘Training acc’)
plt.plot(epochs, val_acc, ‘b’, label=‘Validation acc’)
plt.title(‘Training and validation accuracy’)
plt.legend()

plt.figure()

plt.plot(epochs, loss, ‘bo’, label=‘Training loss’)
plt.plot(epochs, val_loss, ‘b’, label=‘Validation loss’)
plt.title(‘Training and validation loss’)
plt.legend()

plt.show()

四、结果显示
在命令行会显示运行结果,如下图。
在这里插入图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值