# coding: utf-8
# ## EDA & pre-processing
# In[1]:
import os , shutil
original_dataset_dir = '/home/lkl/.kaggle/competitions/dogs-vs-cats/train'
base_dir = '/home/lkl/.kaggle/competitions/dogs-vs-cats-small'
os.mkdir(base_dir)
# In[2]:
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')
os.mkdir(train_dir)
os.mkdir(validation_dir)
os.mkdir(test_dir)
# In[3]:
train_cats_dir = os.path.join(train_dir, 'cats')
train_dogs_dir = os.path.join(train_dir, 'dogs')
validation_cats_dir = os.path.join(validation_dir, 'cats')
validation_dogs_dir = os.path.join(validation_dir, 'dogs')
test_cats_dir = os.path.join(test_dir, 'cats')
test_dogs_dir = os.path.join(test_dir, 'dogs')
os.mkdir(train_cats_dir)
os.mkdir(train_dogs_dir)
os.mkdir(validation_cats_dir)
os.mkdir(validation_dogs_dir)
os.mkdir(test_cats_dir)
os.mkdir(test_dogs_dir)
# In[17]:
fnames = ['cat.{}.jpg'.format(i) for i in range(1000)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(train_cats_dir, fname)
shutil.copyfile(src, dst)
fnames = ['cat.{}.jpg'.format(i) for i in range(1000, 1500)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(validation_cats_dir, fname)
shutil.copyfile(src, dst)
fnames = ['cat.{}.jpg'.format(i) for i in range(1500, 2000)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(test_cats_dir, fname)
shutil.copyfile(src, dst)
# In[29]:
fnames = ['dog.{}.jpg'.format(i) for i in range(1000)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(train_dogs_dir, fname)
shutil.copyfile(src, dst)
fnames = ['dog.{}.jpg'.format(i) for i in range(1000, 1500)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(validation_dogs_dir, fname)
shutil.copyfile(src, dst)
fnames = ['dog.{}.jpg'.format(i) for i in range(1500, 2000)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(test_dogs_dir, fname)
shutil.copyfile(src, dst)
# In[30]:
len(os.listdir(train_cats_dir))
# ## Build the model
# small split has 2000 pictures with 1000 trains , 500 tests, 500 validations
# In[31]:
from keras import layers
from keras import models
# In[35]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape = (150, 150, 3)))
## 150 150 从何而来
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3) ,activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
# In[36]:
model.summary()
# In[38]:
from keras import optimizers
model.compile(loss = 'binary_crossentropy',
optimizer = optimizers.RMSprop(lr = 1e-4),
metrics = ['acc'])
# In[11]:
from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(rescale = 1./255)
test_datagen = ImageDataGenerator(rescale = 1./255) ##1.代表浮点数
train_generator = train_datagen.flow_from_directory( ##产生数据流
train_dir,
target_size = (150, 150),
batch_size = 20,
class_mode = 'binary')
validation_generator = test_datagen.flow_from_directory(
validation_dir,
target_size = (150, 150),
batch_size = 20,
class_mode = 'binary')
# In[43]:
for data_batch, labels_batch in train_generator:
print('data batch shape :', data_batch.shape)## 遍历每个图片,所以要break
print('labels batch shape :', labels_batch.shape)
break
# ## Model Processing
# In[45]:
###开始训练
history = model.fit_generator(
train_generator, ##使用训练集数据
steps_per_epoch = 100, ##一个epoch分成100个batch,每个20样本即batch_szie
epochs = 30, ##30轮
validation_data = validation_generator, #使用交叉验证数据
validation_steps = 50 ##交叉验证50次
)
# 可以观察到在22次训练时几乎没有提升了,而训练集上的准确度达到0.99,这就是标准的过拟合
# ## Save & Load model
# In[3]:
model.save('cats_and_dogs_small_1.h5')
# In[6]:
from keras.models import load_model
model = load_model('cats_and_dogs_small_1.h5')
# In[5]:
pwd
# ## DIsplay learning curve of loss and accuracy
# In[51]:
import matplotlib.pyplot as plt
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1) ##epoch :1-30
plt.plot(epochs, acc, 'bo', label = 'Training acc')
plt.plot(epochs, val_acc, 'b', label = 'Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label = 'Training loss')
plt.plot(epochs, val_loss, 'b', label = 'Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
# 过拟合的原因主要是参数太多样本太少,
基于keras的猫狗图像分类
最新推荐文章于 2024-07-06 22:37:37 发布
这篇博客介绍了如何使用Keras进行猫狗图像分类。首先,通过数据预处理将原始数据集划分为训练、验证和测试集。然后,构建了一个卷积神经网络模型,并进行了训练。在训练过程中,使用了数据增强来减少过拟合。最后,展示了训练过程中的学习曲线,以及如何保存和加载模型。
摘要由CSDN通过智能技术生成