基于tensorflow实现VGG16 cifar-10分类识别
数据准备
代码需要cifar-10二进制文件和vgg16.npy预训练模型,其中cifar-10是必须要下载的训练对象,而vgg16.npy是官方给出的模型,可以选用,不是必须下载的,小白还是不要用了,体验一下训练的快感
cifar-10
cifar-10二进制文件是训练对象,一共有6万个数据样本,存储在6个二进制文件中,可以将其中5个作为训练集;
将另一个二进制文件作为测试集;
对于每一个样本,以bytes的形式存储:1bytes(label) + 32×32×3bytes(image).(了解存储结构有利于对二进制文件的解析),每个二进制文件都包含了10000个这样的样本。
cifar-10 下载链接:http://www.cs.toronto.edu/~kriz/cifar.html
vgg16.npy
vgg16.npy是在其他训练对象做的训练形成的预训练模型,个人感觉对于cifar-10的训练集存在一定的偏差。
vgg16.npy 按照类似于字典的形式存储,字典的键是各个网络层的名称,字典的值是一个列表,该列表下有两个元素,第一个元素是weight,第二个元素是bias, 可以使用以下代码进行解析(具体使用可以参考我的代码):
import numpy as np
data_dict = np.load("./VGG16.npy", encoding="latin1", allow_pickle=True).item()
for key in data_dict.keys():
print(key)
#conv5_1,fc6,conv5_3,conv5_2,fc8,fc7,conv4_1,conv4_2,conv4_3,conv3_3,conv3_2,conv3_1,conv1_1,conv1_2,conv2_2,conv2_1
vgg16.npy下载链接:https://pan.baidu.com/s/1HMWgpoBDggFxKMcTUDBXog 提取码:d85c(链接失效:VX15001620395)
本代码需要的文件目录结构
代码中可以看到需要以下几个目录需要提前准备好(也可以根据自己的需求更改目录和代码结构):
参数 | 用途 |
---|---|
./event | 事件文件夹 |
./event_old | 旧事件文件夹 |
./ckpt | 模型保存文件夹 |
./cifar-10-batches-bin | 训练对象存放文件夹(包含train和test文件夹) |
程序流图
代码(tensorflow)
!!!简单说一下,代码给了注释,运行应该问题不大,有问题希望大家能及时和我说一下,相互学习,谢谢!
import tensorflow as tf
import numpy as np
import os, shutil
from tqdm import tqdm
#加载已经训练好的模型参数(可以不加)
data_dict = np.load("./VGG16.npy", encoding="latin1", allow_pickle=True).item()
log_save_path = "./event" #事件存放目录
model_save_path = './ckpt/'#模型保存文件夹
channels = 3#图片通道
class_num = 10#目标分类数
epochs = 40 #训练轮数
max_step = 200 #每轮训练多少个batch
batch_size = 250 # 每个把batch数据量的大小
train_flag = True#训练标志
weight_decay = 0.003#惩罚因子
image_size = 32#图像大小
dropout_rate = 0.5
#单通道的数据标准化
def get_stddev(single_channel):
'''
将一个通道的数据标准化
:param single_channel:图片单通道的数据
:return: 数据标准化后的单通道数据
'''
mean, variance = tf.nn.moments(single_channel, axes=[0, 1])
stddv = tf.sqrt(variance)
single_norm = (single_channel - mean) / stddv
return single_norm
#图像数据的标准化
def image_standard(image_data):
'''
将一个图像分解为三通道,将各个通道标准化之后组成标准化后的图像数据
:param image_data: 原始图像数据rgb
:return: 标准化的图像数据
'''
red, green, blue = tf.unstack(image_data, axis=2)
red = get_stddev(red)
green = get_stddev(green)
blue = get_stddev(blue)
image = tf.stack([red, green, blue], axis=2)
return image
#cifar二进制读取 通过一个类创建一个读取类便于读取训练集和验证集
class cifar_reader(object):
def __init__(self):
# cifar-10 二进制以32×32 和一个标签值 存储 格式为:1bytes(label)+32×32×3(image)
self.height = 32
self.width = 32
self.channels = 3
self.label_bytes = 1
self.image_bytes = self.width * self.height * self.channels
self.bytes = self.image_bytes + self.label_bytes
#二进制的读取相对标准, 再次做了一些简单处理
def read_from_bin(self, filelist):
'''
读取二进制文件,并形成数据读取batch
:param filelist: 二进制文件列表
:return: 数据batch(label_batch and iamge_batch)
'''
#1-构建文件队列
file_queue = tf.train.string_input_producer(filelist)
#2-构建文件阅读器
reader = tf.FixedLengthRecordReader(self.bytes)
key, value = reader.read(file_queue)
#3-解析读取到的数据
label_image = tf.decode_raw(value, tf.uint8)
#4-数据预处理
label = tf.cast(tf.slice(label_image, begin=[0], size=[self.label_bytes]), tf.int32)
label = tf.one_hot(label[0], on_value=1.0, depth=10, off_value=0.0)
image = tf.slice(label_image, begin=[self.label_bytes], size=[self.image_bytes])
image_data = tf.reshape(image, [self.channels, self.height, self.width])
image_rgb = tf.transpose(image_data, perm=[1, 2, 0])
image_resize = tf.cast(tf.reshape(image_rgb, [self.height, self.width, self.channels]), tf.float32)
#5-图像数据标准化
image_norm = image_standard(image_resize)
#6-构成数据batch
image_batch, label_batch = tf.train.shuffle_batch([image_norm, label], batch_size=250, num_threads=1, capacity=512, min_after_dequeue=200)
return image_batch, label_batch
#清理旧事件
def clear_event():
'''
清空旧事件,将旧事件移入旧事件文件夹
:return:
'''
filelist = os.listdir("./event/")
if len(filelist)!=0:
print("存在旧事件,开始移动", ">"*100)
for file in tqdm(filelist):
old_path = os.path.join("./event/", file)
new_path = os.path.join("./event_old", file)
shutil.move(old_path, new_path)
print("旧事件移动完毕,开始训练", ">"*100)
# 打印tensor op 的信息
def print_info_layer(tf_op):
print(tf_op.name, tf_op.get_shape().as_list())
# BN主要用来做数据标准化,加速网络训练的
def batch_norm(input):
return tf.contrib.layers.batch_norm(input, decay=0.9, center=True, scale=True, epsilon=1e-3,
is_training=train_flag, updates_collections=None)
#卷积层
def conv(name, input, filter_in, filter_out, finturn=False):
with tf.variable_scope(name):
#finturn决定是否加载训练模型(vgg16.npy)
if finturn:
weight = tf.constant(data_dict[name][0], name="weights")
bias = tf.constant(data_dict[name][1], name="bias")
print(f"{name}层使用了预训练模型参数")
else:
#weight = tf.get_variable(name="weight", shape=[3,3,filter_in, filter_out], initializer=tf.contrib.keras.initializers.he_normal())
weight = tf.Variable(tf.truncated_normal(shape=[3,3,filter_in, filter_out], mean=0.0, stddev=0.1))
bias = tf.Variable(tf.constant(value=0.1, shape=[filter_out]), name="bias")
print(f"{name}层是需要自训练参数的初始层")
conv = tf.nn.relu(batch_norm(tf.nn.bias_add(tf.nn.conv2d(input=input,
filter=weight,
strides=[1,1,1,1],
padding="SAME"), bias)), name=name)
print_info_layer(conv)
return conv
#池化层
def max_poolling(name, input):
pool = tf.nn.relu(tf.nn.max_pool(input, [1,2,2,1], [1,2,2,1], padding="SAME"), name=name)
print_info_layer(pool)
return pool
#全连接层
def full_connect(name, input, fc_in, fc_out, finturn=False):
with tf.variable_scope(name):
if finturn:
weight = tf.constant(data_dict[name][0], name="weight")
bias = tf.constant(data_dict[name][1], name="bias")
print(f"{name}层使用了预训练模型参数")
else:
#weight = tf.get_variable(name="weight", shape=[fc_in, fc_out],initializer=tf.contrib.keras.initializers.he_normal())
weight = tf.Variable(tf.truncated_normal(shape=[fc_in, fc_out], mean=0.0, stddev=0.1))
bias = tf.Variable(tf.constant(value=0.1, shape=[fc_out]), name="bias")
print(f"{name}层是需要自训练参数的初始层")
full_layer = tf.nn.relu(batch_norm(tf.nn.bias_add(tf.matmul(input, weight), bias)), name=name)
print_info_layer(full_layer)
return full_layer
#VGG16网络设计 由于cifar-10图像大小为32,尺寸较小,所以去掉一个池化层
def VGG16(input, channels, dropout, num_class):
conv1_1 = conv(name="conv1_1", input=input, filter_in=channels, filter_out=64, finturn=False)
conv1_2 = conv(name="conv1_2", input=conv1_1, filter_in=64, filter_out=64, finturn=False)
pool1 = max_poolling(name="pool1", input=conv1_2)
conv2_1 = conv(name="conv2_1", input=pool1, filter_in=64, filter_out=128, finturn=False)
conv2_2 = conv(name="conv2_2", input=conv2_1, filter_in=128, filter_out=128, finturn=False)
pool2 = max_poolling(name="pool2", input=conv2_2)
conv3_1 = conv(name="conv3_1", input=pool2, filter_in=128, filter_out=256, finturn=False)
conv3_2 = conv(name="conv3_2", input=conv3_1, filter_in=256, filter_out=256, finturn=False)
conv3_3 = conv(name="conv3_3", input=conv3_2, filter_in=256, filter_out=256, finturn=False)
pool3 = max_poolling(name="pool3", input=conv3_3)
conv4_1 = conv(name="conv4_1", input=pool3, filter_in=256, filter_out=512, finturn=False)
conv4_2 = conv(name="conv4_2", input=conv4_1, filter_in=512, filter_out=512, finturn=False)
conv4_3 = conv(name="conv4_3", input=conv4_2, filter_in=512, filter_out=512, finturn=False)
pool4 = max_poolling(name="pool4", input=conv4_3)
conv5_1 = conv(name="conv5_1", input=pool4, filter_in=512, filter_out=512, finturn=False)
conv5_2 = conv(name="conv5_2", input=conv5_1, filter_in=512, filter_out=512, finturn=False)
conv5_3 = conv(name="conv5_3", input=conv5_2, filter_in=512, filter_out=512, finturn=False)
# pool5 = max_poolling(name="pool5", input=conv5_3)
flatten = tf.reshape(conv5_3, [-1, 2 * 2 * 512])
full_layer6 = full_connect(name="fc6", input=flatten, fc_in=2 * 2 * 512, fc_out=4096)
dropout_6 = tf.nn.dropout(full_layer6, dropout)
full_layer7 = full_connect(name="fc7", input=dropout_6, fc_in=4096, fc_out=4096)
dropout_7 = tf.nn.dropout(full_layer7, dropout)
full_layer8 = full_connect(name="fc8", input=dropout_7, fc_in=4096, fc_out=num_class)
return full_layer8
if __name__ == '__main__':
clear_event()
#训练和测试 二进制文件目录
train_bin_dir = "./cifar-10-batches-bin/train/"
test_bin_dir = "./cifar-10-batches-bin/test/"
train_filelist = [os.path.join(train_bin_dir, bin_file) for bin_file in os.listdir(train_bin_dir) if
bin_file.endswith("bin")]
test_filelist = [os.path.join(test_bin_dir, bin_file) for bin_file in os.listdir(test_bin_dir) if
bin_file.endswith("bin")]
#二进制读取类的实例化
cf_read = cifar_reader()
#创建数据batch
image_batch, label_batch = cf_read.read_from_bin(train_filelist)
test_image_batch, test_label_batch = cf_read.read_from_bin(test_filelist)
#占位符 x训练数据 y_ 真实标签 keep_prob 丢失率 train_flag训练标志 learning_rate学习率
x = tf.placeholder(tf.float32, [None, image_size, image_size, channels])
y_ = tf.placeholder(tf.float32, [None, class_num])
keep_prob = tf.placeholder(tf.float32)
train_flag = tf.placeholder(tf.bool)
learning_rate = tf.placeholder(tf.float32)
y_predict = VGG16(x, channels=channels, dropout=keep_prob, num_class=class_num)#预测值
#计算准确率
with tf.name_scope("accuracy"):
equal_list = tf.equal(tf.argmax(y_, axis=1), tf.argmax(y_predict, axis=1))
accuracy = tf.reduce_mean(tf.cast(equal_list, tf.float32))
#计算交叉熵损失 引入正则化
with tf.name_scope("loss"):
cross_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels= y_, logits=y_predict))
l2 = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()])
#更新网络参数
with tf.name_scope("train_op"):
train_op = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8).minimize(cross_loss+ l2 * weight_decay)
saver = tf.train.Saver()
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
#开启线程读取数据
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord, sess=sess)
#开始事件监听
file_writer = tf.summary.FileWriter(logdir="./event", graph=sess.graph)
sess.run(init_op)
rate = 0.001
for epoch in range(epochs):
print("="*50,f"training:{epoch}", "="*50)
epoch_acc = 0
epoch_loss = 0
#进行数据训练
for step in range(max_step):
batch_x, batch_y = sess.run([image_batch, label_batch])
_, train_loss = sess.run([train_op, cross_loss], feed_dict={x : batch_x, y_ : batch_y,
keep_prob : dropout_rate,
train_flag : True,
learning_rate : rate})
train_acc = sess.run(accuracy, feed_dict={x : batch_x, y_ : batch_y,
keep_prob : 1.0,
train_flag : True,
learning_rate : rate})
#每十步打印训练的损失值和准确值
epoch_loss += (train_loss / max_step)#得到这一轮的平均识别率
epoch_acc += (train_acc / max_step)#得到这一轮的平均损失
if step % 20 == 0:
print(f"epoch:{epoch}, step:{step}, train_loss:{train_loss:.4f}, train_acc:{train_acc:.2%}")
epoch_test_acc = 0
epoch_test_loss = 0
# 进行数据验证
for step in range(10):
batch_x, batch_y = sess.run([test_image_batch, test_label_batch])
test_loss, test_acc = sess.run([cross_loss, accuracy], feed_dict={x : batch_x, y_ : batch_y,
keep_prob : 1.0,
train_flag : False,
learning_rate : rate})
epoch_test_acc += (test_acc / 10) #得到这一轮的平均识别率
epoch_test_loss = (test_loss / 10)#得到这一轮的平均损失
if epoch_acc > 0.96 and epoch_acc < 0.99:
rate = 0.0001
elif epoch_acc >= 0.99 and epoch_acc < 0.993:
rate = 0.00001
elif epoch_acc >= 0.993:
rate = 0.000001
print(f"test_loss:{epoch_test_loss:.4f}, test_acc:{epoch_test_acc:.2%}", ">"*50)
summary = tf.Summary(value=[tf.Summary.Value(tag="train_loss", simple_value=epoch_loss),
tf.Summary.Value(tag="train_acc", simple_value=epoch_acc),
tf.Summary.Value(tag="test_loss", simple_value=epoch_test_loss),
tf.Summary.Value(tag="test_acc", simple_value=epoch_test_acc)])
file_writer.add_summary(summary=summary, global_step=epoch)
#保存模型
saver.save(sess, "./ckpt")
#关闭线程
coord.request_stop()
coord.join(threads=threads)
实验结果:
实验主要从测试集得到测试结果,由于本次学习的目的主要是体验网络的训练,并不分析和过多改进网络,此处直接以tensorboard的训练结果图展现 :
在40轮的训练中得到测试集的平均识别率test_acc、损失test_loss,和在训练集上得到的平均识别率train_acc、损失train_loss如下图所示:
可以看到,经过40轮的训练后该网络能够在测试集上能够达到86%的识别率,而在四张实验结果图中,我们发现在第25~30轮之间有一个较为剧烈的变化,主要是由于在拐点对应的轮次平均训练识别率train_acc达到了96%,学习率发生了变化(变得更小)。
特别感谢
本文的完成参考了一些博主的文章在此附上他们文章的链接表示感谢!!!
https://www.jianshu.com/p/a52991ab86e0
下面这篇参考意义更大,我的一些bug也是从这里看出来的
https://blog.csdn.net/xun__Meng/article/details/89194148?utm_medium=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-19.baidujs&dist_request_id=&depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-19.baidujs