- Python版本: Python3.x
- 运行平台: Windows
- IDE: jupyter / Colab / pycharm
- 转载请标明出处:https://blog.csdn.net/Tian121381
- 资料下载,提取码:osly
一、前言
第二大部分一共三道课后题,数据在资料下载处下载,先自写一下,然后再看我的参考代码,代码就不讲解,不理解的可以看前几章的讲解部分,大同小异的,相关注释我会写的,那开始练习吧!!!
注:
代码是博主自写,仅供参考。
第一题—完整数据集处理
问题描述
本练习,将在完整 Cats-v-dogs数据集上训练CNN。这将需要进行大量数据预处理,因为该数据集没有拆分出训练集和验证集,需要自行处理。
处理思路如下:
编写一个名为split_data()的python函数,该函数将包含文件的SOURCE目录和一个TRAINING目录(该文件的一部分将被复制到TESTING目录),该文件的一部分将被复制到一个SPLIT SIZE以确定该部分。 还应该随机化,以便训练集是文件的随机X%,而测试集是剩余文件,例如,如果SOURCE是PetImages / Cat,而SPLIT SIZE是0.9,则90% PetImages / Cat中的图像将被复制到TRAINING目录中,并且10%的图像将被复制到TESTING目录中-还应检查所有图像,如果文件长度为零,则不会复制它们。
最后进行训练。
参考代码
#导入相关包
import os
import tensorflow as tf
import zipfile
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from shutil import copyfile
import random
#解压数据到指定文件夹
local_dir = '/content/drive/My Drive/Colab Notebooks/DateSet/cats_and_dogs.zip'
zip_ref = zipfile.ZipFile(local_dir,'r')
zip_ref.extractall('/content/drive/My Drive/Colab Notebooks/tmp')
zip_ref.close()
#print(os.listdir('tmp/'))
print(len(os.listdir('/content/drive/My Drive/Colab Notebooks/tmp/PetImages/Cat/')))
print(len(os.listdir('/content/drive/My Drive/Colab Notebooks/tmp/PetImages/Dog/')))
#使用os.mkdir创建目录。cats-v-dogs需要一个目录,以及用于训练和测试的子目录。 这些还需要“猫”和“狗”的子目录
try:
os.mkdir('/content/drive/My Drive/Colab Notebooks/tmp/cats-v-dogs')
os.mkdir('/content/drive/My Drive/Colab Notebooks/tmp/cats-v-dogs/train')
os.mkdir('/content/drive/My Drive/Colab Notebooks/tmp/cats-v-dogs/test')
os.mkdir('/content/drive/My Drive/Colab Notebooks/tmp/cats-v-dogs/train/cats')
os.mkdir('/content/drive/My Drive/Colab Notebooks/tmp/cats-v-dogs/train/dogs')
os.mkdir('/content/drive/My Drive/Colab Notebooks/tmp/cats-v-dogs/test/cats')
os.mkdir('/content/drive/My Drive/Colab Notebooks/tmp/cats-v-dogs/test/dogs')
except OSError:
pass
def split_data(SOURCE,TRAINING,TESTING,SPLIT_SIZE):
files = []
#过滤掉无效的数据
for fname in os.listdir(SOURCE):
file = os.path.join(SOURCE,fname)
#file = SOURCE + fname
# print('222:',file)
#print('111:',os.path.join(SOURCE,fname))
if os.path.getsize(file) > 0:
files.append(fname)
else:
print(fname + "长度为0,过滤")
#训练集,测试集数量
train_length = int(len(files) * SPLIT_SIZE)
test_length = int(len(files) - train_length)
#随机选元素
"""
random.sample(a,b): 在a中随机选择b个元素
"""
shuffled_set = random.sample(files,train_length)
#分割称训练集,测试集
training_set = shuffled_set[0:train_length]
testing_set = shuffled_set[-test_length:]
for filename in training_set:
this_file = os.path.join(SOURCE,filename)
destination = TRAINING + filename
copyfile(this_file,destination)
for filename in testing_set:
this_file = os.path.join(SOURCE,filename)
#this_file = SOURCE + filename
destination = TESTING + filename
copyfile(this_file,destination)
CAT_SOURCE = '/content/drive/My Drive/Colab Notebooks/tmp/PetImages/Cat/'
TRAINING_CAT = '/content/drive/My Drive/Colab Notebooks/tmp/cats-v-dogs/train/cats/'
TESTING_CAT = '/content/drive/My Drive/Colab Notebooks/tmp/cats-v-dogs/test/cats/'
DOG_SOURCE = '/content/drive/My Drive/Colab Notebooks/tmp/PetImages/Dog/'
TRAINING_DOG = '/content/drive/My Drive/Colab Notebooks/tmp/cats-v-dogs/train/dogs/'
TESTING_DOG = '/content/drive/My Drive/Colab Notebooks/tmp/cats-v-dogs/test/dogs/'
split_size= 0.9
split_data(CAT_SOURCE, TRAINING_CAT, TESTING_CAT, split_size)
split_data(DOG_SOURCE, TRAINING_DOG, TESTING_DOG, split_size)
#666.jpg长度为0,过滤
#11702.jpg长度为0,过滤
#查看各数据集的大小
print(len(os.listdir('/content/drive/My Drive/Colab Notebooks/tmp/cats-v-dogs/train/cats/')))
print(len(os.listdir('/content/drive/My Drive/Colab Notebooks/tmp/cats-v-dogs/train/dogs/')))
print(len(os.listdir('/content/drive/My Drive/Colab Notebooks/tmp/cats-v-dogs/test/cats/')))
print(len(os.listdir('/content/drive/My Drive/Colab Notebooks/tmp/cats-v-dogs/test/dogs/')))
#11250
#11250
#1250
#1250
#编写模型
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(16,(3,3),activation='relu',input_shape = (150,150,3)),
tf.keras.layers.MaxPool2D(2,2),
tf.keras.layers.Conv2D(32,(3,3),activation='relu'),
tf.keras.layers.MaxPool2D(2,2),
tf.keras.layers.Conv2D(64,(3,3),activation='relu'),
tf.keras.layers.MaxPool2D(2,2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(512,activation='relu'),
tf.keras.layers.Dense(1,activation = 'sigmoid')
])
#优化
model.compile(loss='binary_crossentropy',optimizer=RMSprop(lr = 0.001),metrics=['acc'])
#定义图像生成器
train_dir = '/content/drive/My Drive/Colab Notebooks/tmp/cats-v-dogs/train/'
train_datagen = ImageDataGenerator(1/255,
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
fill_mode='nearest')
train_generator = train_datagen.flow_from_directory( train_dir,
batch_size = 100,
class_mode='binary',
target_size = (150,150))
validation_dir = '/content/drive/My Drive/Colab Notebooks/tmp/cats-v-dogs/test/'
validation_datagen = ImageDataGenerator(1/255)
validation_generation = validation_datagen.flow_from_directory( validation_dir,
batch_size = 100,
class_mode='binary',
target_size = (150,150))
#Found 22498 images belonging to 2 classes.
#Found 2500 images belonging to 2 classes.
#开始训练
history = model.fit_generator(
train_generator,
steps_per_epoch = 225,
epochs=15,
validation_steps = 25,
validation_data=validation_generation)
#画图
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'r', "Training Accuracy")
plt.plot(epochs, val_acc, 'b', "Validation Accuracy")
plt.title('Training and validation accuracy')
plt.figure()
plt.plot(epochs, loss, 'r', "Training Loss")
plt.plot(epochs, val_loss, 'b', "Validation Loss")
plt.figure()
plt.show()
结果:
选做—运行程序
#运行程序
import numpy as np
from google.colab import files
from keras.preprocessing import image
uploaded = files.upload()
for fn in uploaded.keys():
# predicting images
path = '/content/' + fn
img = image.load_img(path, target_size=(150, 150))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
images = np.vstack([x])
classes = model.predict(images, batch_size=10)
print(classes[0])
if classes[0]>0.5:
print(fn + " is a dog")
else:
print(fn + " is a cat")
输入图像:
结果:
第二题–迁移学习
问题描述
使用inception_v3模型参数。再此基础上修改训练人-马分类器。达到99.9%准确率时停止训练。
参考代码
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
from tensorflow.keras.applications.inception_v3 import InceptionV3
local_weight = '/content/drive/My Drive/Colab Notebooks/DateSet/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5'
pre_trained_model = InceptionV3(input_shape = (150,150,3),
include_top = False, #不保留全连接层
weights = None) #随机初始化
pre_trained_model.load_weights(local_weight)
#提取特征,但不更新权值
for layer in pre_trained_model.layers:
layer.trainable = False
#查看结构
pre_trained_model.summary()
"""
Model: "inception_v3"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) [(None, 150, 150, 3) 0
__________________________________________________________________________________________________
conv2d (Conv2D) (None, 74, 74, 32) 864 input_1[0][0]
__________________________________________________________________________________________________
...
...
...长图
...
...
concatenate_1 (Concatenate) (None, 3, 3, 768) 0 activation_91[0][0]
activation_92[0][0]
__________________________________________________________________________________________________
activation_93 (Activation) (None, 3, 3, 192) 0 batch_normalization_93[0][0]
__________________________________________________________________________________________________
mixed10 (Concatenate) (None, 3, 3, 2048) 0 activation_85[0][0]
mixed9_1[0][0]
concatenate_1[0][0]
activation_93[0][0]
==================================================================================================
Total params: 21,802,784
Trainable params: 0
Non-trainable params: 21,802,784
"""
last_layer = pre_trained_model.get_layer('mixed7') #'mixed7当作最后一层
print(last_layer.output_shape)
last_output = last_layer.output
#(None, 7, 7, 768)
from tensorflow.keras.optimizers import RMSprop
x = tf.keras.layers.Flatten()(last_output)
x = tf.keras.layers.Dense(1024,activation='relu')(x)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.Dense(1,activation='sigmoid')(x)
#组合模型
model = Model(pre_trained_model.input,x)
#优化
model.compile(loss='binary_crossentropy',optimizer=RMSprop(lr = 0.001),metrics=['accuracy'] )
#查看新模型
model.summary()
"""
Model: "model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) [(None, 150, 150, 3) 0
__________________________________________________________________________________________________
conv2d (Conv2D) (None, 74, 74, 32) 864 input_1[0][0]
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 74, 74, 32) 96 conv2d[0][0]
__________________________________________________________________________________________________
activation (Activation) (None, 74, 74, 32) 0 batch_normalization[0][0]
__________________________________________________________________________________________________
conv2d_1 (Conv2D) (None, 72, 72, 32) 9216 activation[0][0]
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 72, 72, 32) 96 conv2d_1[0][0]
...
...
... 又一长图o(* ̄▽ ̄*)o
...
...
activation_69 (Activation) (None, 7, 7, 192) 0 batch_normalization_69[0][0]
__________________________________________________________________________________________________
mixed7 (Concatenate) (None, 7, 7, 768) 0 activation_60[0][0]
activation_63[0][0]
activation_68[0][0]
activation_69[0][0]
__________________________________________________________________________________________________
flatten (Flatten) (None, 37632) 0 mixed7[0][0]
__________________________________________________________________________________________________
dense (Dense) (None, 1024) 38536192 flatten[0][0]
__________________________________________________________________________________________________
dropout (Dropout) (None, 1024) 0 dense[0][0]
__________________________________________________________________________________________________
dense_1 (Dense) (None, 1) 1025 dropout[0][0]
==================================================================================================
Total params: 47,512,481
Trainable params: 38,537,217
Non-trainable params: 8,975,264
"""
#反馈类 准确率99.9%时停止训练
class myCallbacks(tf.keras.callbacks.Callback):
def on_epoch_end(self,epoch,logs = {}):
if logs.get('accuracy') > 0.999:
print("准确率到达99.9%,停止训练")
self.model.stop_training = True
#数据处理
#提取数据
import zipfile
local_zip = '/content/drive/My Drive/Colab Notebooks/DateSet/horse-or-human.zip'
zip_ref = zipfile.ZipFile(local_zip,'r')
zip_ref.extractall('/content/drive/My Drive/Colab Notebooks/tmp/training')
local_zip = '/content/drive/My Drive/Colab Notebooks/DateSet/valiation-horse-or-human.zip'
zip_ref = zipfile.ZipFile(local_zip,'r')
zip_ref.extractall('/content/drive/My Drive/Colab Notebooks/tmp/valitation')
zip_ref.close
#图片根目录
train_dir = '/content/drive/My Drive/Colab Notebooks/tmp/training'
valitation_dir = '/content/drive/My Drive/Colab Notebooks/tmp/valitation'
#各数据集路径
train_human = os.path.join(train_dir,'humans')
train_horses = os.path.join(train_dir,'horses')
validation_human = os.path.join(valitation_dir,'humans')
validation_horses = os.path.join(valitation_dir,'horses')
#各数据集个数
print(len(os.listdir(train_human)))
print(len(os.listdir(train_horses)))
print(len(os.listdir(validation_human)))
print(len(os.listdir(validation_horses))
#527
#500
#128
#128
#图像生成器
train_datagen = ImageDataGenerator(rescale = 1/255,
rotation_range = 40,
width_shift_range = 0.2,
height_shift_range = 0.2,
shear_range = 0.2,
zoom_range = 0.2,
horizontal_flip = True
)
validation_datagen = ImageDataGenerator(rescale = 1/255)
train_generation = train_datagen.flow_from_directory(train_dir,
batch_size = 20,
class_mode = 'binary',
target_size = (150,150))
validation_generation = train_datagen.flow_from_directory(valitation_dir,
batch_size = 20,
class_mode = 'binary',
target_size = (150,150))
#Found 1027 images belonging to 2 classes.
#Found 256 images belonging to 2 classes.
#开始训练
callbacks = myCallbacks()
history = model.fit_generator(train_generation,
validation_data = validation_generation,
steps_per_epoch = 100,
epochs = 100,
validation_steps = 50,
callbacks = [callbacks],
)
import matplotlib.pyplot as plt
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend(loc=0)
plt.figure()
plt.show()
结果:
第三题—.csv存储图片的处理
问题描述
有一组数据,不是以图片格式存储的,而是以像素值的格式存储在.csv文件中。我们要做的就是识别这些数据,进行训练。
思路:
需要编写代码来读取传递给此函数的文件。 第一行包含列标题,因此应该忽略它。每行连续的内容在0到255之间用逗号分隔785个值,第一行是标签,其余是该图片的像素值,该函数将返回2个np.array类型。 带有所有标签的一个带有所有图像的一个提示:如果读取整行(如“行”),则row [0]具有标签,row [1:785]具有784个像素值。看一下np.array_split将784像素变为28x28正在读取字符串,但需要将其值浮点数检出np.array().astype进行转换。
参考代码
import csv
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
def get_data(filename):
with open(filename) as training_file:
csv_reader = csv.reader(training_file,delimiter=',')
first_line = True
temp_images = []
temp_labels = []
for row in csv_reader:
if first_line:
#print("第一行是标题,忽略")
first_line = False
else:
temp_labels.append(row[0])
image_data = row[1:785]
image_data_as_array = np.array_split(image_data, 28)
temp_images.append(image_data_as_array)
images = np.array(temp_labels).astype('float')
labels = np.array(temp_images).astype('float')
return images,labels
#处理数据
import os
import zipfile
local_zip = '/content/drive/My Drive/Colab Notebooks/DateSet/sign-language-mnist.zip'
zip_ref = zipfile.ZipFile(local_zip,'r')
zip_ref.extractall('/content/drive/My Drive/Colab Notebooks/tmp/sl_mnist/')
zip_ref.close
train_file = '/content/drive/My Drive/Colab Notebooks/tmp/sl_mnist/sign_mnist_train.csv'
test_file = '/content/drive/My Drive/Colab Notebooks/tmp/sl_mnist/sign_mnist_test.csv'
train_images,train_labels = get_data(train_file)
test_images,test_labels = get_data(test_file)
print(train_images.shape)
print(train_labels.shape)
print(test_images.shape)
print(test_labels.shape)
#(27455, 28, 28)
#(27455,)
#(7172, 28, 28)
#(7172,)
train_images = np.expand_dims(train_images,axis=3)
test_images = np.expand_dims(test_images,axis=3)
#图片生成器
print(train_images.shape)
print(test_images.shape)
#(27455, 28, 28, 1)
#(7172, 28, 28, 1)
train_datagen = ImageDataGenerator( rescale=1/255,
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
fill_mode='nearest'
)
validation_datagen = ImageDataGenerator(rescale=1/255)
train_generation = train_datagen.flow(
train_images,
train_labels,
batch_size=32)
validation_generation = validation_datagen.flow(
test_images,
test_labels,
batch_size=32)
#数据提取完成,创建模型
model = tf.keras.Sequential([tf.keras.layers.Conv2D(32,(3,3),activation='relu',input_shape = (28,28,1)),
tf.keras.layers.MaxPool2D(2,2),
tf.keras.layers.Conv2D(64,(3,3),activation='relu'),
tf.keras.layers.MaxPool2D(2,2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(512,activation='relu'),
tf.keras.layers.Dense(26,activation='softmax')
#优化
model.compile(optimizer = "adam",
loss = 'sparse_categorical_crossentropy',
metrics=['accuracy'])
#开始训练
history = model.fit_generator(train_generation,
steps_per_epoch = len(train_images)/32,
epochs = 15,
validation_data = validation_generation,
validation_steps = len(test_images)/32 )
#输出结果
acc_loss = model.evaluate(test_images,test_labels)
print("准确率:",acc_loss[1],"\n损失值:",acc_loss[0])
#225/225 [==============================] - 0s 2ms/step - loss: 134.8944 - accuracy: 0.7524
#准确率: 0.7523703575134277
#损失值: 134.89442443847656
#画图
import matplotlib.pyplot as plt
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'r', label='Training Loss')
plt.plot(epochs, val_loss, 'b', label='Validation Loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
结果: