猫狗大战

将图像复制到训练、验证和测试的目录
绝对地址打不开出现OSError: [Errno 22] Invalid argument:错误
解决方法:绝对地址加两个反斜杠
for循环的缩进有问题 注意一下

import os, shutil
#原始路径是数据未解压时的位置
#基本路径是即将储存划分小数据集的位置
#Shutil模块对文件和文件集合提供了许多高级操作。 特别是提供了支持文件复制和删除的功能。 有关对单个文件的操作,另请参见os模块。 
original_dataset_dir = 'E:\\kaggle\\dogs-vs-cats\\train'
base_dir = 'E:\kaggle\cats_and_dogs_small' 
os.mkdir(base_dir)
#分别对应划分后的训练、
#验证和测试的目录
train_dir=os.path.join(base_dir,'train')
os.mkdir(train_dir)
validation_dir=os.path.join(base_dir,'validation')
os.mkdir(validation_dir)
test_dir=os.path.join(base_dir,'test')
os.mkdir(test_dir)

#猫的训练图像目录
train_cats_dir=os.path.join(train_dir,'cats')
os.mkdir(train_cats_dir)
#狗的训练图像
train_dogs_dir = os.path.join(train_dir, 'dogs') 
os.mkdir(train_dogs_dir)
#猫的验证图像目录
validation_cats_dir = os.path.join(validation_dir, 'cats') 
os.mkdir(validation_cats_dir)
#狗的验证图像目录
validation_dogs_dir = os.path.join(validation_dir, 'dogs') 
os.mkdir(validation_dogs_dir)
#猫的测试图像目录
test_cats_dir = os.path.join(test_dir, 'cats') 
os.mkdir(test_cats_dir)
#狗的测试图像目录
test_dogs_dir = os.path.join(test_dir, 'dogs') 
os.mkdir(test_dogs_dir)
#将前 1000 张猫的图像复制到 train_cats_dir
fnames = ['cat.{}.jpg'.format(i) for i in range(1000)]
for fname in fnames:
    src=os.path.join(original_dataset_dir,fname)
    dst=os.path.join(train_cats_dir,fname)
    shutil.copyfile(src, dst)
#将接下来 500 张猫的图像复制到 validation_cats_dir
fnames = ['cat.{}.jpg'.format(i) for i in range(1000,1500)]
for fname in fnames:
    src=os.path.join(original_dataset_dir,fname)
    dst=os.path.join(validation_cats_dir,fname)
    shutil.copyfile(src,dst)
#将接下来的 500 张猫的图像复制到 test_cats_dir
fnames = ['cat.{}.jpg'.format(i) for i in range(1500,2000)]
for fname in fnames:
    src=os.path.join(original_dataset_dir,fname)
    dst=os.path.join(test_cats_dir,fname)
    shutil.copyfile(src,dst)
#将前 1000 张狗的图像复制到 train_dogs_dir
fnames = ['dog.{}.jpg'.format(i) for i in range(1000)]   
for fname in fnames:
    src=os.path.join(original_dataset_dir,fname)
    dst=os.path.join(train_dogs_dir,fname)
    shutil.copyfile(src,dst)
#将接下来 500 张狗的图像复制到 validation_dogs_dir
fnames = ['dog.{}.jpg'.format(i) for i in range(1000,1500)]   
for fname in fnames:
    src=os.path.join(original_dataset_dir,fname)
    dst=os.path.join(validation_dogs_dir,fname)
    shutil.copyfile(src,dst)
#将接下来 500 张狗的图像复制到 test_dogs_dir
fnames = ['dog.{}.jpg'.format(i) for i in range(1500,2000)]   
for fname in fnames:
    src=os.path.join(original_dataset_dir,fname)
    dst=os.path.join(test_dogs_dir,fname)
    shutil.copyfile(src,dst)
print('total training cat images:', len(os.listdir(train_cats_dir)))#计算文件夹中文件的数目
total training cat images: 1000
print('total training dog images:', len(os.listdir(train_dogs_dir)))
total training dog images: 1000
print('total validation cat images:', len(os.listdir(validation_cats_dir)))
total validation cat images: 500
print('total validation dog images:', len(os.listdir(validation_dogs_dir)))
total validation dog images: 500
 print('total test cat images:', len(os.listdir(test_cats_dir)))
total test cat images: 500
print('total test dog images:', len(os.listdir(test_dogs_dir)))
total test dog images: 500

搭建模型并写好模型优化 问题是为什么模型最后一层是1层,倒数第二层是512层

from tensorflow.keras import layers
from tensorflow.keras import models
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu',
 input_shape=(150, 150, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d (Conv2D)              (None, 148, 148, 32)      896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 74, 74, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 72, 72, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 36, 36, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 34, 34, 128)       73856     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 17, 17, 128)       0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 15, 15, 128)       147584    
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 7, 7, 128)         0         
_________________________________________________________________
flatten (Flatten)            (None, 6272)              0         
_________________________________________________________________
dense (Dense)                (None, 512)               3211776   
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 513       
=================================================================
Total params: 3,453,121
Trainable params: 3,453,121
Non-trainable params: 0
_________________________________________________________________
from tensorflow.keras import optimizers
model.compile(loss='binary_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-4),
              metrics=['acc'])

1读取图像文件
2将JPEG文件解码为RGB像素网格
3将这些像素网格转捍卫浮点数张量
4将像素值(0~255)缩放到[0,1]区间
图像处理辅助工具 keras.preprocessing.image 有个类imagedatagenerator

from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1./255) 
test_datagen = ImageDataGenerator(rescale=1./255)#将所有图像乘以 1/255 缩放

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),#将所有图像的大小调整为 150×150
    batch_size=20,
    class_mode='binary')#因为使用了 binary_crossentropy损失,所以需要用二进制标签

validation_generator = test_datagen.flow_from_directory(
     validation_dir,
     target_size=(150, 150),
     batch_size=20,
     class_mode='binary')
Found 2000 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.
#每个批量包含20个样本
for data_batch, labels_batch in train_generator:
    print('data batch shape:', data_batch.shape)
    print('labels batch shape:', labels_batch.shape)
    break
data batch shape: (20, 150, 150, 3)
labels batch shape: (20,)
history = model.fit_generator(
     train_generator,
     steps_per_epoch=100,
     epochs=30,
     validation_data=validation_generator,
     validation_steps=50)
WARNING:tensorflow:From <ipython-input-24-2267b24b1105>:6: Model.fit_generator (from tensorflow.python.keras.engine.training) is deprecated and will be removed in a future version.
Instructions for updating:
Please use Model.fit, which supports generators.
WARNING:tensorflow:sample_weight modes were coerced from
  ...
    to  
  ['...']
WARNING:tensorflow:sample_weight modes were coerced from
  ...
    to  
  ['...']
Train for 100 steps, validate for 50 steps
Epoch 1/30
100/100 [==============================] - 41s 411ms/step - loss: 0.6900 - acc: 0.5280 - val_loss: 0.6681 - val_acc: 0.6270

Epoch 30/30
100/100 [==============================] - 43s 430ms/step - loss: 0.0555 - acc: 0.9835 - val_loss: 0.8907 - val_acc: 0.7340
model.save('cats_and_dogs_small_1.h5')#保存模型
import matplotlib.pyplot as plt
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()

在这里插入图片描述
在这里插入图片描述

datagen = ImageDataGenerator(
     rotation_range=40,#rotation_range 是角度值(在 0~180 范围内),表示图像随机旋转的角度范围
     width_shift_range=0.2,#width_shift 和 height_shift 是图像在水平或垂直方向上平移的范围
     height_shift_range=0.2,
     shear_range=0.2,#shear_range 是随机错切变换的角度。
     zoom_range=0.2,#zoom_range 是图像随机缩放的范围。
     horizontal_flip=True,#horizontal_flip 是随机将一半图像水平翻转。
     fill_mode='nearest')#fill_mode是用于填充新创建像素的方法

显示几个随机增强后的训练图像

from keras.preprocessing import image  #导入图像预处理工具的模块 库名字为image
#os.path.join 路径拼接
#os.listdir(path)返回指定文件夹中包含的文件或文件夹名字的列表
fnames = [os.path.join(train_cats_dir, fname) for
 fname in os.listdir(train_cats_dir)]
#选择一张图像进行增强
img_path = fnames[3]
#读取图像并调整大小
img = image.load_img(img_path, target_size=(150, 150))
#将其转换为形状 (150, 150, 3) 的 Numpy 数组
x = image.img_to_array(img)
#将其形状改变为 (1, 150, 150, 3)
x = x.reshape((1,) + x.shape)
#生成随机变换后的图像批量。循环是无限的,因此你需要在某个时刻终止循环
i = 0
for batch in datagen.flow(x, batch_size=1):
    plt.figure(i)
    imgplot = plt.imshow(image.array_to_img(batch[0]))
    i+=1
    if i%4==0:
        break
plt.show()
![在这里插入图片描述](https://img-blog.csdnimg.cn/20210313165209704.png)
![在这里插入图片描述](https://img-blog.csdnimg.cn/20210313165209701.png)
![在这里插入图片描述](https://img-blog.csdnimg.cn/20210313165209703.png)
![在这里插入图片描述](https://img-blog.csdnimg.cn/20210313165209700.png)






dropout层添加到密集连接分类器之前

利用数据增强生成器训练卷积神经网络


```python
#构建一个包含dropout的网络
from tensorflow.keras import layers
from tensorflow.keras import models

model1 = models.Sequential()
model1.add(layers.Conv2D(32,(3,3),activation='relu',input_shape=(150,150,3)))
model1.add(layers.MaxPooling2D((2,2)))
model1.add(layers.Conv2D(64,(3,3),activation = 'relu'))
model1.add(layers.MaxPooling2D((2,2)))
model1.add(layers.Conv2D(128,(3,3),activation = 'relu'))
model1.add(layers.MaxPooling2D((2,2)))
model1.add(layers.Conv2D(128,(3,3),activation = 'relu'))
model1.add(layers.MaxPooling2D((2,2)))
model1.add(layers.Flatten())
model1.add(layers.Dropout(0.5))
model1.add(layers.Dense(512,activation='relu'))
model1.add(layers.Dense(1,activation='sigmoid'))
#配置模型用于训练
from tensorflow.keras import optimizers

model1.compile(loss='binary_crossentropy',
             optimizer=optimizers.RMSprop(lr=1e-4),
             metrics=['acc'])
train_datagen = ImageDataGenerator(rescale=1./255,
                                   rotation_range=40,#图像随机旋转的角度范围
                            width_shift_range=0.2,#图像在水平方向上平移的范围
                            height_shift_range=0.2,#图像在垂直方向上平移的范围
                            shear_range=0.2,#随机错切变换的角度
                            zoom_range=0.2,#随机缩放的范围
                            horizontal_flip=True,)#随机将一半图像水平旋转

test_datagen=ImageDataGenerator(rescale=1./255)
#注意不能增强验证数据

train_generator = train_datagen.flow_from_directory(train_dir,#目标目录
                                                    target_size=(150,150),#将所有图像调整为150*150
                                                    batch_size=32,
                                                    class_mode='binary')#与损失函数对应,使用二进制标签

validation_generator = test_datagen.flow_from_directory(validation_dir,
                                                              target_size=(150,150),#将所有图像调整为150*150
                                                              batch_size=32,
                                                              class_mode='binary')#与损失函数对应,使用二进制标签

Found 2000 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.

注意一下steps_per_epoch=2000/32 validation_steps=1000/32
当输入steps_per_epoch=100时发生 warning 显示输入数据不足的错误 建议使用repeat()

#利用批量生成器拟合模型
history = model.fit_generator(train_generator,
                             steps_per_epoch=63,
                             epochs=50,
                             validation_data=validation_generator,
                             validation_steps=32)
model.save('cats_and_dogs_small_2.h5')#保存模型
WARNING:tensorflow:sample_weight modes were coerced from
  ...
    to  
  ['...']
WARNING:tensorflow:sample_weight modes were coerced from
  ...
    to  
  ['...']
Train for 63 steps, validate for 32 steps
Epoch 1/50
63/63 [==============================] - 38s 604ms/step - loss: 0.6578 - acc: 0.5775 - val_loss: 0.6297 - val_acc: 0.6410

Epoch 50/50
63/63 [==============================] - 40s 636ms/step - loss: 0.4791 - acc: 0.7590 - val_loss: 0.4749 - val_acc: 0.7810
import matplotlib.pyplot as plt

acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1,len(acc)+1)

plt.plot(epochs,acc,'bo',label='Traning acc')
plt.plot(epochs,val_acc,'b',label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs,loss,'bo',label='Traning loss')
plt.plot(epochs,val_loss,'b',label='Validation loss')
plt.title('Training')
plt.legend()

plt.show()







使用预训练的卷积神经网络




```python
from tensorflow.keras.applications import VGG16
#从VGG中提取卷积基
conv_base=VGG16(weights='imagenet',#weights 指定模型初始化的权重检查点。
               include_top=False,#include_top 指定模型最后是否包含密集连接分类器。
               input_shape=(150,150,3))#input_shape 是输入到网络中的图像张量的形状
 conv_base.summary()
Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         [(None, 150, 150, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 150, 150, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 150, 150, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 75, 75, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 75, 75, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 75, 75, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 37, 37, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 37, 37, 256)       295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, 37, 37, 256)       590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, 37, 37, 256)       590080    
_________________________________________________________________
block3_pool (MaxPooling2D)   (None, 18, 18, 256)       0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, 18, 18, 512)       1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, 18, 18, 512)       2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (None, 18, 18, 512)       2359808   
_________________________________________________________________
block4_pool (MaxPooling2D)   (None, 9, 9, 512)         0         
_________________________________________________________________
block5_conv1 (Conv2D)        (None, 9, 9, 512)         2359808   
_________________________________________________________________
block5_conv2 (Conv2D)        (None, 9, 9, 512)         2359808   
_________________________________________________________________
block5_conv3 (Conv2D)        (None, 9, 9, 512)         2359808   
_________________________________________________________________
block5_pool (MaxPooling2D)   (None, 4, 4, 512)         0         
=================================================================
Total params: 14,714,688
Trainable params: 14,714,688
Non-trainable params: 0
_________________________________________________________________
import os 
import numpy as np
from keras.preprocessing.image import ImageDataGenerator

base_dir = 'E:\\kaggle\\cats_and_dogs_small'
train_dir = os.path.join(base_dir, 'train') 
validation_dir = os.path.join(base_dir, 'validation') 
test_dir = os.path.join(base_dir, 'test')

datagen = ImageDataGenerator(rescale=1./255)
batch_size = 20

def extract_features(directory, sample_count):
    features = np.zeros(shape=(sample_count, 4, 4, 512))#提取的特征形状为 (samples, 4, 4, 512)
    labels = np.zeros(shape=(sample_count))
    generator = datagen.flow_from_directory(
                directory, 
                target_size=(150, 150), 
                batch_size=batch_size, 
                class_mode='binary')
#注意,这些生成器在循环中不断生成数据,所以你必须在读取完所有图像后终止循环
    i=0
    for inputs_batch, labels_batch in generator:
        features_batch = conv_base.predict(inputs_batch)
        features[i * batch_size : (i + 1) * batch_size] = features_batch 
        labels[i * batch_size : (i + 1) * batch_size] = labels_batch
        i += 1
        if i * batch_size >= sample_count:
            break
    return features,labels

train_features, train_labels = extract_features(train_dir, 2000) 
validation_features, validation_labels = extract_features(validation_dir, 1000) 
test_features, test_labels = extract_features(test_dir, 1000)
Found 2000 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.

提取的特征形状为 (samples, 4, 4, 512)。我们要将其输入到密集连接分类器中,
所以首先必须将其形状展平为 (samples, 8192)。8192=44512

train_features = np.reshape(train_features, (2000, 4 * 4 * 512))
validation_features = np.reshape(validation_features, (1000, 4 * 4 * 512))
test_features = np.reshape(test_features, (1000, 4 * 4 * 512))
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import optimizers

model = models.Sequential()
model.add(layers.Dense(256, activation='relu', input_dim=4 * 4 * 512))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation='sigmoid'))

model.compile(optimizer=optimizers.RMSprop(lr=2e-5),
              loss='binary_crossentropy',
              metrics=['acc'])

history = model.fit(train_features, train_labels,
                     epochs=30,
                     batch_size=20,
                     validation_data=(validation_features, validation_labels))

Train on 2000 samples, validate on 1000 samples
Epoch 1/30
2000/2000 [==============================] - 4s 2ms/sample - loss: 0.6178 - acc: 0.6625 - val_loss: 0.4589 - val_acc: 0.8200

Epoch 30/30
2000/2000 [==============================] - 2s 1ms/sample - loss: 0.0853 - acc: 0.9735 - val_loss: 0.2402 - val_acc: 0.9030
import matplotlib.pyplot as plt
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()

使用数据增强的特征提取

from tensorflow.keras import models
from tensorflow.keras import layers
model = models.Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.summary()
Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
vgg16 (Model)                (None, 4, 4, 512)         14714688  
_________________________________________________________________
flatten_4 (Flatten)          (None, 8192)              0         
_________________________________________________________________
dense_10 (Dense)             (None, 256)               2097408   
_________________________________________________________________
dense_11 (Dense)             (None, 1)                 257       
=================================================================
Total params: 16,812,353
Trainable params: 16,812,353
Non-trainable params: 0
_________________________________________________________________
#冻结
print('This is the number of trainable weights '
 'before freezing the conv base:', len(model.trainable_weights))
This is the number of trainable weights before freezing the conv base: 30
 conv_base.trainable = False
print('This is the number of trainable weights '
 'after freezing the conv base:', len(model.trainable_weights))
This is the number of trainable weights after freezing the conv base: 4
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import optimizers
train_datagen = ImageDataGenerator(
             rescale=1./255,
             rotation_range=40,
             width_shift_range=0.2,
             height_shift_range=0.2,
             shear_range=0.2,
             zoom_range=0.2,
             horizontal_flip=True,
             fill_mode='nearest')
#注意,不能增强验证数据
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
         train_dir, 
         target_size=(150, 150), 
         batch_size=20,
         class_mode='binary') 
validation_generator = test_datagen.flow_from_directory(
         validation_dir,
         target_size=(150, 150),
         batch_size=20,
         class_mode='binary')
model.compile(loss='binary_crossentropy',
         optimizer=optimizers.RMSprop(lr=2e-5),
         metrics=['acc'])
history = model.fit_generator(
         train_generator,
         steps_per_epoch=64,
         epochs=30,
         validation_data=validation_generator,
         validation_steps=32)
Found 2000 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.
WARNING:tensorflow:sample_weight modes were coerced from
  ...
    to  
  ['...']
WARNING:tensorflow:sample_weight modes were coerced from
  ...
    to  
  ['...']
Train for 64 steps, validate for 32 steps
Epoch 1/30
64/64 [==============================] - 226s 4s/step - loss: 0.6204 - acc: 0.6547 - val_loss: 0.4882 - val_acc: 0.8078

Epoch 30/30
64/64 [==============================] - 224s 4s/step - loss: 0.3012 - acc: 0.8742 - val_loss: 0.2416 - val_acc: 0.9000
转载《Pyhton 深度学习》keras之父弗朗索瓦.肖莱这本书
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值