ISLVRC是一个用于图像分类的数据集
亮点:
首次使用GPU(CPU的20~50倍速度)
使用了ReLU激活函数,而不是Sigmod(求导麻烦,网络深时出现梯度消失)
使用了LRN局部响应归一化
在全连接层前两层使用了Dropout神经元随机失活,以减少过拟合
在每一层中随机失活几个神经元,可以理解为变相的减少了训练的参数
AlexNet网络结构
花的数据分类:
下载地址:https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz
下载后按照视频里的教程走就行,最后运行一下脚本(在data_set界面按住shift加右键,以powershell方式打开,再输入下图的指令即可),即可按照9:1划分训练集和验证集
代码实战:
AlexNet网络每一层的参数
代码运行如下:
cpu为串行过程,而包含gpu则变为并行过程
验证过程:识别花朵图片:(这里千万不要使用百度上的图片,可能会报错,建议直接从data_set里找一张)
代码如下:
model.py
from tensorflow.keras import layers, models, Model, Sequential
def AlexNet_v1(im_height=224, im_width=224, num_classes=1000): #图像的高度、宽度,分类的类别
# tensorflow中的tensor通道排序是NHWC
input_image = layers.Input(shape=(im_height, im_width, 3), dtype="float32") # output(None, 224, 224, 3),定义图像输入,高度、宽度、深度,float32为数据类型
x = layers.ZeroPadding2D(((1, 2), (1, 2)))(input_image) # output(None, 227, 227, 3),手动padding处理,上左补一行零,右下补一行零
x = layers.Conv2D(48, kernel_size=11, strides=4, activation="relu")(x) # output(None, 55, 55, 48),48个卷积核,大小为11x11,步长为4
x = layers.MaxPool2D(pool_size=3, strides=2)(x) # output(None, 27, 27, 48),池化操作,大小为3,步长为2,(55-3+1)/2
x = layers.Conv2D(128, kernel_size=5, padding="same", activation="relu")(x) # output(None, 27, 27, 128),步长默认为1,因此输出的高和宽都不变
x = layers.MaxPool2D(pool_size=3, strides=2)(x) # output(None, 13, 13, 128),默认为valid方法,(23-3+1)/2
x = layers.Conv2D(192, kernel_size=3, padding="same", activation="relu")(x) # output(None, 13, 13, 192),192个卷积核
x = layers.Conv2D(192, kernel_size=3, padding="same", activation="relu")(x) # output(None, 13, 13, 192)
x = layers.Conv2D(128, kernel_size=3, padding="same", activation="relu")(x) # output(None, 13, 13, 128)
x = layers.MaxPool2D(pool_size=3, strides=2)(x) # output(None, 6, 6, 128),(13-3+1)/2
#三个全连接层
x = layers.Flatten()(x) # output(None, 6*6*128),展平处理,成为一个单维向量
x = layers.Dropout(0.2)(x) #按一定比例失活神经元,防止过拟合,失活比例20%
x = layers.Dense(2048, activation="relu")(x) # output(None, 2048),全连接层1,
x = layers.Dropout(0.2)(x) #按一定比例失活神经元,防止过拟合,失活比例20%
x = layers.Dense(2048, activation="relu")(x) # output(None, 2048),全连接层2
x = layers.Dense(num_classes)(x) # output(None, 5),最后一个输出层,输出个数为我们数据集分类的个数,注意这里不需要用激活函数
predict = layers.Softmax()(x) #激活函数处理,将输出转化为概率分布
model = models.Model(inputs=input_image, outputs=predict) #定义网络的输入为图片,输出为这个概率分布
return model
#底下这个模型暂时不考虑
class AlexNet_v2(Model):
def __init__(self, num_classes=1000):
super(AlexNet_v2, self).__init__()
self.features = Sequential([
layers.ZeroPadding2D(((1, 2), (1, 2))), # output(None, 227, 227, 3)
layers.Conv2D(48, kernel_size=11, strides=4, activation="relu"), # output(None, 55, 55, 48)
layers.MaxPool2D(pool_size=3, strides=2), # output(None, 27, 27, 48)
layers.Conv2D(128, kernel_size=5, padding="same", activation="relu"), # output(None, 27, 27, 128)
layers.MaxPool2D(pool_size=3, strides=2), # output(None, 13, 13, 128)
layers.Conv2D(192, kernel_size=3, padding="same", activation="relu"), # output(None, 13, 13, 192)
layers.Conv2D(192, kernel_size=3, padding="same", activation="relu"), # output(None, 13, 13, 192)
layers.Conv2D(128, kernel_size=3, padding="same", activation="relu"), # output(None, 13, 13, 128)
layers.MaxPool2D(pool_size=3, strides=2)]) # output(None, 6, 6, 128)
self.flatten = layers.Flatten()
self.classifier = Sequential([
layers.Dropout(0.2),
layers.Dense(1024, activation="relu"), # output(None, 2048)
layers.Dropout(0.2),
layers.Dense(128, activation="relu"), # output(None, 2048)
layers.Dense(num_classes), # output(None, 5)
layers.Softmax()
])
def call(self, inputs, **kwargs):
x = self.features(inputs)
x = self.flatten(x)
x = self.classifier(x)
return x
train.py
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
from model import AlexNet_v1, AlexNet_v2
import tensorflow as tf
import json
import os
#定义训练集和验证集
def main():
data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path
image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path
train_dir = os.path.join(image_path, "train")
validation_dir = os.path.join(image_path, "val")
assert os.path.exists(train_dir), "cannot find {}".format(train_dir)
assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir)
# create direction for saving weights
if not os.path.exists("save_weights"): #创建save_weights文件夹,用来保存训练模型的1权重
os.makedirs("save_weights") #判断有没有这个文件夹,如果没有就创建一个
#训练中的一些基本参数
im_height = 224 #图像高度
im_width = 224 #图像宽度
batch_size = 32
epochs = 10 #迭代数目
# data generator with data augmentation,对图像进行预处理
train_image_generator = ImageDataGenerator(rescale=1. / 255, #缩放,从原来的的0-255变为0-1之间
horizontal_flip=True) #随机水平方向的翻转
validation_image_generator = ImageDataGenerator(rescale=1. / 255)
#定义训练集图像生成器
train_data_gen = train_image_generator.flow_from_directory(directory=train_dir,
batch_size=batch_size,
shuffle=True, #随机打乱
target_size=(im_height, im_width),
class_mode='categorical')
total_train = train_data_gen.n #获得训练集训练样本的个数
# get class dict
class_indices = train_data_gen.class_indices #获得类别名称所对应的索引
# transform value and key of dict
inverse_dict = dict((val, key) for key, val in class_indices.items())
# write dict into json file,写入json文件中,
json_str = json.dumps(inverse_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
json_file.write(json_str)
#同样的方法,定义验证集图像生成器
val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir,
batch_size=batch_size,
shuffle=False,
target_size=(im_height, im_width),
class_mode='categorical')
total_val = val_data_gen.n
print("using {} images for training, {} images for validation.".format(total_train,
total_val))
#sample_training_images, sample_training_labels = next(train_data_gen) # label 转化为one-hot编码形式
# This function will plot images in the form of a grid with 1 row
# and 5 columns where images are placed in each column.
# def plotImages(images_arr):
# fig, axes = plt.subplots(1, 5, figsize=(20, 20))
# axes = axes.flatten()
# for img, ax in zip(images_arr, axes):
# ax.imshow(img)
# ax.axis('off')
# plt.tight_layout()
# plt.show()
#
#
# plotImages(sample_training_images[:5])
model = AlexNet_v1(im_height=im_height, im_width=im_width, num_classes=5) #实例化AlexNet网络
# model = AlexNet_v2(class_num=5)
# model.build((batch_size, 224, 224, 3)) # when using subclass model
model.summary() #看到模型的参数信息
# using keras high level api for training,使用高层api
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), #优化器,初始学习率为0.0005
loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False), #损失的计算
metrics=["accuracy"]) #打印正确率
#回调函数,用于监控模型具体的行为
callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='./save_weights/myAlex.h5', #保存模型的参数
save_best_only=True, #只保存效果1最好的一次
save_weights_only=True,
monitor='val_loss')] #监控验证集的损失,来判断哪一次模型最好
# tensorflow2.1 recommend to using fit,定义训练过程,将数据保存在history变量中
history = model.fit(x=train_data_gen, #训练集的生成器
steps_per_epoch=total_train // batch_size, #每一轮迭代的次数,即一个epoch有多少个batch
epochs=epochs,
validation_data=val_data_gen, #验证集生成器
validation_steps=total_val // batch_size, #同理,验证集每一轮的步数
callbacks=callbacks) #回调函数,保存模型的规则
# plot loss and accuracy image
history_dict = history.history #获取一个数据字典
train_loss = history_dict["loss"] #训练集损失
train_accuracy = history_dict["accuracy"] #训练集准确率
val_loss = history_dict["val_loss"] #验证集损失
val_accuracy = history_dict["val_accuracy"] #验证集准确率
# figure 1
plt.figure()
plt.plot(range(epochs), train_loss, label='train_loss')
plt.plot(range(epochs), val_loss, label='val_loss')
plt.legend()
plt.xlabel('epochs')
plt.ylabel('loss')
# figure 2
plt.figure()
plt.plot(range(epochs), train_accuracy, label='train_accuracy')
plt.plot(range(epochs), val_accuracy, label='val_accuracy')
plt.legend()
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.show()
#适用于数据集较大的情况
# history = model.fit_generator(generator=train_data_gen,
# steps_per_epoch=total_train // batch_size,
# epochs=epochs,
# validation_data=val_data_gen,
# validation_steps=total_val // batch_size,
# callbacks=callbacks)
# # using keras low level api for training
# loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False)
# optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)
#
# train_loss = tf.keras.metrics.Mean(name='train_loss')
# train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')
#
# test_loss = tf.keras.metrics.Mean(name='test_loss')
# test_accuracy = tf.keras.metrics.CategoricalAccuracy(name='test_accuracy')
#
#
# @tf.function
# def train_step(images, labels):
# with tf.GradientTape() as tape:
# predictions = model(images, training=True)
# loss = loss_object(labels, predictions)
# gradients = tape.gradient(loss, model.trainable_variables)
# optimizer.apply_gradients(zip(gradients, model.trainable_variables))
#
# train_loss(loss)
# train_accuracy(labels, predictions)
#
#
# @tf.function
# def test_step(images, labels):
# predictions = model(images, training=False)
# t_loss = loss_object(labels, predictions)
#
# test_loss(t_loss)
# test_accuracy(labels, predictions)
#
#
# best_test_loss = float('inf')
# for epoch in range(1, epochs+1):
# train_loss.reset_states() # clear history info
# train_accuracy.reset_states() # clear history info
# test_loss.reset_states() # clear history info
# test_accuracy.reset_states() # clear history info
# for step in range(total_train // batch_size):
# images, labels = next(train_data_gen)
# train_step(images, labels)
#
# for step in range(total_val // batch_size):
# test_images, test_labels = next(val_data_gen)
# test_step(test_images, test_labels)
#
# template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
# print(template.format(epoch,
# train_loss.result(),
# train_accuracy.result() * 100,
# test_loss.result(),
# test_accuracy.result() * 100))
# if test_loss.result() < best_test_loss:
# model.save_weights("./save_weights/myAlex.ckpt", save_format='tf')
if __name__ == '__main__':
main()
predict.py
import os
import json
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from model import AlexNet_v1, AlexNet_v2
def main():
im_height = 224
im_width = 224
# load image
img_path = "../tulip.jpg"
assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
img = Image.open(img_path)
# resize image to 224x224,缩放为224x224大小
img = img.resize((im_width, im_height))
plt.imshow(img)
# scaling pixel value to (0-1),缩放为0-1之间
img = np.array(img) / 255.
# Add the image to a batch where it's the only member.在之前拓宽一个维度(batch)
img = (np.expand_dims(img, 0))
# read class_indict,读取json文件,索引类别信息
json_path = './class_indices.json'
assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
with open(json_path, "r") as f:
class_indict = json.load(f)
# create model
model = AlexNet_v1(num_classes=5) #实例化模型
weighs_path = "./save_weights/myAlex.h5" #载入模型
assert os.path.exists(img_path), "file: '{}' dose not exist.".format(weighs_path)
model.load_weights(weighs_path)
# prediction
result = np.squeeze(model.predict(img)) #把图片输入到模型中进行预测
predict_class = np.argmax(result) #获取概率最大的值所对应的索引
print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_class)],
result[predict_class])
plt.title(print_res)
for i in range(len(result)):
print("class: {:10} prob: {:.3}".format(class_indict[str(i)],
result[i]))
plt.show()
if __name__ == '__main__':
main()
也可以用gpu进行训练,这里up也没讲,代码就不放了,直接去克隆大佬的仓库就行!
注意:
1.本模型没有手动加载权重,一般默认会自动初始化
2.必须至少训练出一轮epoch,才会出现.h5文件
3.本次实例没有采用LRN
4.save_weights只有训练集模型参数,验证集只是利用训练集的权重去测试验证
5.对网络感兴趣的,可以看知乎这位大佬的文章:手撕 CNN 经典网络之 AlexNet(理论篇) - 知乎 (zhihu.com)
———————————————————————————————————————————2023.4.12
我们知道,在图中AlexNet网络中,最后一层输出有1000个,同样,代码中的 x = layers.Dense(num_classes)(x)中的num_classes的数量也设置为1000,但是在这个案例中,我们进行的花朵分类只有5个class,也就是说最后输出应该是5,但现实是即使是1000,依旧能成功运行,且所有class的概率加起来也是1,非常奇怪。
我在视频下方看到也有人提出疑问,但up回答的是默认值,不理解~