论文链接:https://kopernio.com/viewer?doi=arXiv:1409.1556&route=6
cifar-10-python数据集下载如下链接:
链接:https://pan.baidu.com/s/12boWfE-vGCppKLLR05NTVA
提取码:tt1t
一、简介:
论文全名:《Very Deep Convolutional Networks for Large-Scale Image Recognition》
2014年,牛津大学计算机视觉组(Visual Geometry Group)和Google DeepMind公司的研究员一起研发出了新的深度卷积神经网络:VGGNet,并取得了ILSVRC2014比赛分类项目的第二名(第一名是GoogLeNet,也是同年提出的)和定位项目的第一名。
VGGNet探索了卷积神经网络的深度与其性能之间的关系,成功地构筑了16~19层深的卷积神经网络,证明了增加网络的深度能够在一定程度上影响网络最终的性能,使错误率大幅下降,同时拓展性又很强,迁移到其它图片数据上的泛化性也非常好。到目前为止,VGG仍然被用来提取图像特征。
VGGNet可以看成是加深版本的AlexNet,都是由卷积层、全连接层两大部分构成。
二、代码实现部分:
亲测有效,包含很多重要注释,pycharm直接运行。
最后结果:
测试acc:93.59%
utilsPycharm.py
包括数据集下载、裁剪图像、翻转图像等操作。
import numpy as np
import os
import random
import pandas as pickle
import pickle
# 参数设置
image_size = 32
img_channels = 3
#下载数据集
def download_data():
dirname = 'cifar10-dataset'
origin = 'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
fname = './cifar-10-python.tar.gz'
fpath = './' + dirname
download = False
if os.path.exists(fpath) or os.path.isfile(fname): #os.path.isfile 判断某一对象(需提供绝对路径)是否为文件
download = False
print("DataSet already exist!")
else:
download = True
if download:
print('Downloading data from', origin)
# 打开文件
def unpickFile(file):
with open(file, 'rb') as fo: # rb: 以二进制格式打开一个文件用于只读。文件指针将会放在文件的开头。这是默认模式。
dict = pickle.load(fo, encoding='iso-8859-1') # 属于单字节编码,最多能表示的字符范围是0-255,应用于英文系列。比如,字母a的编码为0x61=97
return dict
# 从读入的文件中获取图片数据(data)和标签信息(labels)
def load_data_one(file):
batch = unpickFile(file)
data = batch['data']
labels = batch['labels']
print("Loading %s : %d." % (file, len(data)))
return data, labels
# 将从文件中获取的信息进行处理,得到可以输入到神经网络中的数据。
def load_data(files, data_dir, label_count): # files = <class 'list'>: ['data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5']
global image_size, img_channels # global关键字的作用是可以申明一个局部变量为全局变量
data, labels = load_data_one(data_dir + '/' + files[0]) # data_dir = './cifar-10-python'
for f in files[1:]:
data_n, labels_n = load_data_one(data_dir + '/' + f) # data_n shape=(10000,3072)
data = np.append(data, data_n, axis=0)
labels = np.append(labels, labels_n, axis=0)
labels = np.array([[float(i == label) for i in range(label_count)] for label in labels]) # labels shape=(50000,10)
data = data.reshape([-1, img_channels, image_size, image_size]) # data shape=(50000,3,32,32)
data = data.transpose([0, 2, 3, 1]) # data shape=(50000,32,32,3)
return data, labels
# 数据准备
def prepare_data():
print("======Loading data======")
download_data()
data_dir = './cifar-10-python'
image_dim = image_size * image_size * img_channels # image_dim=3072
# meta = <class 'dict'>: {'num_cases_per_batch': 10000,
# 'label_names': ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'], 'num_vis': 3072}
meta = unpickFile(data_dir + '/batches.meta')
print(meta)
# label_names = <class 'list'>: ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
label_names = meta['label_names']
label_count = len(label_names) # label_count = 10
# 依次读取data_batch_1-5的内容
# train_files = <class 'list'>: ['data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5']
train_files = ['data_batch_%d' % d for d in range(1, 6)]
train_data, train_labels = load_data(train_files, data_dir, label_count)
test_data, test_labels = load_data(['test_batch'], data_dir, label_count)
# train_data shape=(50000,32,32,3) train_label shape=(50000,10)
# test_data shape=(10000,32,32,3) test_label shape=(10000,10)
print("Train data:", np.shape(train_data), np.shape(train_labels))
print("Test data :", np.shape(test_data), np.shape(test_labels))
print("======Load finished======")
print("======Shuffling data======")
# 重新打乱训练集的顺序
indices = np.random.permutation(len(train_data)) #对0-len(train_data)之间的序列进行随机排序
train_data = train_data[indices]
train_labels = train_labels[indices]
print("======Prepare Finished======")
return train_data, train_labels, test_data, test_labels
# 随机裁剪图像
def _random_crop(batch, crop_shape, padding=None): # crop_shape shape=(32,32)
oshape = np.shape(batch[0]) # oshape =(32,32,3)
if padding: # padding=4
oshape = (oshape[0] + 2*padding, oshape[1] + 2*padding) # oshape =(40,40)
new_batch = []
npad = ((padding, padding), (padding, padding), (0, 0)) # npad = <class 'tuple'>: ((4, 4), (4, 4), (0, 0))
for i in range(len(batch)): #len(batch)=250 batch shape =(250,32,32,3)
new_batch.append(batch[i])
if padding:
# batch[i]:是要进行填充的对象 pad_width:有三维度与要填充的维度对应,每个维度有两个数分别表示对应维度要填充的行数或列数,或者通道数等。
# constant_values = 0 是要进行填充的元素对于图像像素值应该在(0-255)之间。
new_batch[i] = np.lib.pad(batch[i], pad_width=npad,mode='constant', constant_values=0)
# random.randint(参数1,参数2) 参数1、参数2必须是整数,函数返回参数1和参数2之间的任意整数
nh = random.randint(0, oshape[0] - crop_shape[0])
nw = random.randint(0, oshape[1] - crop_shape[1])
new_batch[i] = new_batch[i][nh:nh + crop_shape[0], nw:nw + crop_shape[1]]
return new_batch
# 左右随机翻转图像
def _random_flip_leftright(batch):
for i in range(len(batch)):
# getrandbits(k):生成一个k比特长的随机整数,random.getrandbits(16)
if bool(random.getrandbits(1)):
batch[i] = np.fliplr(batch[i]) # fliplr()在左右方向上翻转每行的元素,列保持不变,但是列的显示顺序变了。
return batch
# 数据归一化
def data_normalization(x_train, x_test):
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train[:, :, :, 0] = (x_train[:, :, :, 0] - np.mean(x_train[:, :, :, 0])) / np.std(x_train[:, :, :, 0])
x_train[:, :, :, 1] = (x_train[:, :, :, 1] - np.mean(x_train[:, :, :, 1])) / np.std(x_train[:, :, :, 1])
x_train[:, :, :, 2] = (x_train[:, :, :, 2] - np.mean(x_train[:, :, :, 2])) / np.std(x_train[:, :, :, 2])
x_test[:, :, :, 0] = (x_test[:, :, :, 0] - np.mean(x_test[:, :, :, 0])) / np.std(x_test[:, :, :, 0])
x_test[:, :, :, 1] = (x_test[:, :, :, 1] - np.mean(x_test[:, :, :, 1])) / np.std(x_test[:, :, :, 1])
x_test[:, :, :, 2] = (x_test[:, :, :, 2] - np.mean(x_test[:, :, :, 2])) / np.std(x_test[:, :, :, 2])
return x_train, x_test
# 数据增强
def data_augmentation(batch):
batch = _random_flip_leftright(batch) # 随机左右翻转
batch = _random_crop(batch, [32, 32], 4) # 随机裁剪
return batch
# 学习率设置
def learning_rate_set(epoch_num):
if epoch_num < 81:
return 0.1
elif epoch_num < 121:
return 0.01
else:
return 0.001
appPycharm.py
VGG网络结构搭建部分。
import tensorflow as tf
import time
from utilsPycharm import (prepare_data, data_normalization, data_augmentation, learning_rate_set)
# 参数设置
class_num = 10
image_size = 32
img_channels = 3
iterations = 200
batch_size = 250
total_epoch = 160
weight_decay = 0.0003
dropout_rate = 0.5
momentum_rate = 0.9
log_save_path = './vgg_16_logs'
model_save_path = './model/'
# 偏置值
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape, dtype=tf.float32)
return tf.Variable(initial)
# 卷积层
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
# 池化层
def max_pool(input, k_size=1, stride=1, name=None):
return tf.nn.max_pool(input, ksize=[1, k_size, k_size, 1], strides=[1, stride, stride, 1],
padding='SAME', name=name)
def batch_norm(input):
return tf.contrib.layers.batch_norm(input, decay=0.9, center=True, scale=True, epsilon=1e-3,
is_training=train_flag, updates_collections=None)
def run_testing(sess, ep):
acc = 0.0
loss = 0.0
pre_index = 0
add = 1000
for it in range(10):
batch_x = test_x[pre_index:pre_index+add] # batch_x shape = (1000,32,32,3)
batch_y = test_y[pre_index:pre_index+add] # batch_y shape = (1000,10)
pre_index = pre_index + add
loss_, acc_ = sess.run([cross_entropy, accuracy],
feed_dict={x: batch_x, y_: batch_y, keep_prob: 1.0, train_flag: False})
loss += loss_ / 10.0
acc += acc_ / 10.0
summary = tf.Summary(value=[tf.Summary.Value(tag="test_loss", simple_value=loss),
tf.Summary.Value(tag="test_accuracy", simple_value=acc)])
return acc, loss, summary
if __name__ == '__main__':
train_x, train_y, test_x, test_y = prepare_data()
train_x, test_x = data_normalization(train_x, test_x)
# define placeholder x, y_ , keep_prob, learning_rate
x = tf.placeholder(tf.float32,[None, image_size, image_size, 3]) # x = Tensor("Placeholder:0", shape=(?, 32, 32, 3), dtype=float32)
y_ = tf.placeholder(tf.float32, [None, class_num]) # y_ = Tensor("Placeholder_1:0", shape=(?, 10), dtype=float32)
keep_prob = tf.placeholder(tf.float32) # keep_prob = Tensor("Placeholder_2:0", dtype=float32)
learning_rate = tf.placeholder(tf.float32) # learning_rate = Tensor("Placeholder_3:0", dtype=float32)
train_flag = tf.placeholder(tf.bool) # train_flag = Tensor("Placeholder_4:0", dtype=bool)
# build_network
# the first convolution
W_conv1_1 = tf.get_variable('conv1_1', shape=[3, 3, 3, 64], initializer=tf.contrib.keras.initializers.he_normal())
b_conv1_1 = bias_variable([64])
output = tf.nn.relu(batch_norm(conv2d(x, W_conv1_1) + b_conv1_1))
# output = Tensor("Relu:0", shape=(?, 32, 32, 64), dtype=float32)
W_conv1_2 = tf.get_variable('conv1_2', shape=[3, 3, 64, 64], initializer=tf.contrib.keras.initializers.he_normal())
b_conv1_2 = bias_variable([64])
output = tf.nn.relu(batch_norm(conv2d(output, W_conv1_2) + b_conv1_2))
# output = Tensor("Relu_1:0", shape=(?, 32, 32, 64), dtype=float32)
output = max_pool(output, 2, 2, "pool1")
# output = Tensor("pool1:0", shape=(?, 16, 16, 64), dtype=float32)
# the second convolution
W_conv2_1 = tf.get_variable('conv2_1', shape=[3, 3, 64, 128], initializer=tf.contrib.keras.initializers.he_normal())
b_conv2_1 = bias_variable([128])
output = tf.nn.relu(batch_norm(conv2d(output, W_conv2_1) + b_conv2_1))
# output = Tensor("Relu_2:0", shape=(?, 16, 16, 128), dtype=float32)
W_conv2_2 = tf.get_variable('conv2_2', shape=[3, 3, 128, 128], initializer=tf.contrib.keras.initializers.he_normal())
b_conv2_2 = bias_variable([128])
output = tf.nn.relu(batch_norm(conv2d(output, W_conv2_2) + b_conv2_2))
# output =Tensor("Relu_3:0", shape=(?, 16, 16, 128), dtype=float32)
output = max_pool(output, 2, 2, "pool2")
# output = Tensor("pool2:0", shape=(?, 8, 8, 128), dtype=float32)
# the third convolution
W_conv3_1 = tf.get_variable('conv3_1', shape=[3, 3, 128, 256], initializer=tf.contrib.keras.initializers.he_normal())
b_conv3_1 = bias_variable([256])
output = tf.nn.relu( batch_norm(conv2d(output,W_conv3_1) + b_conv3_1))
# output = Tensor("Relu_4:0", shape=(?, 8, 8, 256), dtype=float32)
W_conv3_2 = tf.get_variable('conv3_2', shape=[3, 3, 256, 256], initializer=tf.contrib.keras.initializers.he_normal())
b_conv3_2 = bias_variable([256])
output = tf.nn.relu(batch_norm(conv2d(output, W_conv3_2) + b_conv3_2))
# output = Tensor("Relu_5:0", shape=(?, 8, 8, 256), dtype=float32)
W_conv3_3 = tf.get_variable('conv3_3', shape=[3, 3, 256, 256], initializer=tf.contrib.keras.initializers.he_normal())
b_conv3_3 = bias_variable([256])
output = tf.nn.relu( batch_norm(conv2d(output, W_conv3_3) + b_conv3_3))
# output = Tensor("Relu_6:0", shape=(?, 8, 8, 256), dtype=float32)
output = max_pool(output, 2, 2, "pool3")
# output = Tensor("pool3:0", shape=(?, 4, 4, 256), dtype=float32)
# the fourth convolution
W_conv4_1 = tf.get_variable('conv4_1', shape=[3, 3, 256, 512], initializer=tf.contrib.keras.initializers.he_normal())
b_conv4_1 = bias_variable([512])
output = tf.nn.relu(batch_norm(conv2d(output, W_conv4_1) + b_conv4_1))
# output = Tensor("Relu_7:0", shape=(?, 4, 4, 512), dtype=float32)
W_conv4_2 = tf.get_variable('conv4_2', shape=[3, 3, 512, 512], initializer=tf.contrib.keras.initializers.he_normal())
b_conv4_2 = bias_variable([512])
output = tf.nn.relu(batch_norm(conv2d(output, W_conv4_2) + b_conv4_2))
# output = Tensor("Relu_8:0", shape=(?, 4, 4, 512), dtype=float32)
W_conv4_3 = tf.get_variable('conv4_3', shape=[3, 3, 512, 512], initializer=tf.contrib.keras.initializers.he_normal())
b_conv4_3 = bias_variable([512])
output = tf.nn.relu(batch_norm(conv2d(output, W_conv4_3) + b_conv4_3))
# output = Tensor("Relu_9:0", shape=(?, 4, 4, 512), dtype=float32)
output = max_pool(output, 2, 2)
# output = Tensor("MaxPool:0", shape=(?, 2, 2, 512), dtype=float32)
# the fifth convolution
W_conv5_1 = tf.get_variable('conv5_1', shape=[3, 3, 512, 512], initializer=tf.contrib.keras.initializers.he_normal())
b_conv5_1 = bias_variable([512])
output = tf.nn.relu(batch_norm(conv2d(output, W_conv5_1) + b_conv5_1))
# output = Tensor("Relu_10:0", shape=(?, 2, 2, 512), dtype=float32)
W_conv5_2 = tf.get_variable('conv5_2', shape=[3, 3, 512, 512], initializer=tf.contrib.keras.initializers.he_normal())
b_conv5_2 = bias_variable([512])
output = tf.nn.relu(batch_norm(conv2d(output, W_conv5_2) + b_conv5_2))
# output = Tensor("Relu_11:0", shape=(?, 2, 2, 512), dtype=float32)
W_conv5_3 = tf.get_variable('conv5_3', shape=[3, 3, 512, 512], initializer=tf.contrib.keras.initializers.he_normal())
b_conv5_3 = bias_variable([512])
output = tf.nn.relu(batch_norm(conv2d(output, W_conv5_3) + b_conv5_3))
# output = Tensor("Relu_12:0", shape=(?, 2, 2, 512), dtype=float32)
#output = max_pool(output, 2, 2)
# output = Tensor("Relu_12:0", shape=(?, 2, 2, 512), dtype=float32)
output = tf.reshape(output, [-1, 2*2*512])
# output = Tensor("Reshape:0", shape=(?, 2048), dtype=float32)
# the first full connection layer
W_fc1 = tf.get_variable('fc1', shape=[2048, 4096], initializer=tf.contrib.keras.initializers.he_normal())
b_fc1 = bias_variable([4096])
output = tf.nn.relu(batch_norm(tf.matmul(output, W_fc1) + b_fc1) )
# output = Tensor("Relu_13:0", shape=(?, 4096), dtype=float32)
output = tf.nn.dropout(output, keep_prob)
# output = Tensor("dropout/mul_1:0", shape=(?, 4096), dtype=float32)
# the second full connection layer
W_fc2 = tf.get_variable('fc7', shape=[4096, 4096], initializer=tf.contrib.keras.initializers.he_normal())
b_fc2 = bias_variable([4096])
output = tf.nn.relu(batch_norm(tf.matmul(output, W_fc2) + b_fc2))
# output = Tensor("Relu_14:0", shape=(?, 4096), dtype=float32)
output = tf.nn.dropout(output, keep_prob)
# output = Tensor("dropout_1/mul_1:0", shape=(?, 4096), dtype=float32)
# the third connection layer
W_fc3 = tf.get_variable('fc3', shape=[4096, 10], initializer=tf.contrib.keras.initializers.he_normal())
b_fc3 = bias_variable([10])
output = tf.nn.relu(batch_norm(tf.matmul(output, W_fc3) + b_fc3))
# output = Tensor("Relu_15:0", shape=(?, 10), dtype=float32)
# output = tf.reshape(output,[-1,10])
# loss function: cross_entropy
# train_step: training operation
# cross_entropy = Tensor("Mean:0", shape=(), dtype=float32)
# labels:实际的标签, logits:神经网络最后一层的输出,tf.nn.softmax_cross_entropy_with_logits:返回是一个向量而不是一个数,
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=output))
# tf.add_n([p1, p2, p3....])函数是实现一个列表的元素的相加。就是输入的对象是一个列表,列表里的元素可以是向量,矩阵等
l2 = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()]) # l2 = Tensor("AddN:0", shape=(), dtype=float32)
# use_nesterov: 为True时,使用 Nesterov Momentum
train_step = tf.train.MomentumOptimizer(learning_rate, momentum_rate, use_nesterov=True).minimize(cross_entropy + l2 * weight_decay)
# correct_prediction = Tensor("Equal:0", shape=(?,), dtype=bool)
correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # accuracy = Tensor("Mean_1:0",shape=(),dtype=float32)
# initial an saver to save model
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
summary_writer = tf.summary.FileWriter(log_save_path,sess.graph) # 指定一个文件用来保存图
# epoch = 160
# make sure [bath_size * iteration = data_set_number]
for ep in range(1, total_epoch+1):
lr = learning_rate_set(ep)
pre_index = 0
train_acc = 0.0
train_loss = 0.0
start_time = time.time()
print("\n epoch %d/%d:" % (ep, total_epoch))
time.localtime()
# iterations = 200
for it in range(1, iterations+1):
batch_x = train_x[pre_index:pre_index+batch_size] # batch_x shape=(250,32,32,3)
batch_y = train_y[pre_index:pre_index+batch_size] # batch_y shape=(250,10)
batch_x = data_augmentation(batch_x) # batch_x shape=(250,32,32,3)
_, batch_loss = sess.run([train_step, cross_entropy],
feed_dict={x: batch_x, y_: batch_y, keep_prob: dropout_rate,
learning_rate: lr, train_flag: True})
batch_acc = accuracy.eval(feed_dict={x: batch_x, y_: batch_y, keep_prob: 1.0, train_flag: True}) # batch_Acc = 0.104
train_loss += batch_loss
train_acc += batch_acc
pre_index += batch_size # pre_index = 250
if it == iterations:
train_loss /= iterations
train_acc /= iterations
loss_, acc_ = sess.run([cross_entropy, accuracy], feed_dict={x: batch_x, y_: batch_y, keep_prob: 1.0, train_flag: True})
train_summary = tf.Summary(value=[tf.Summary.Value(tag="train_loss", simple_value=train_loss),
tf.Summary.Value(tag="train_accuracy", simple_value=train_acc)])
val_acc, val_loss, test_summary = run_testing(sess, ep)
summary_writer.add_summary(train_summary, ep)
summary_writer.add_summary(test_summary, ep)
summary_writer.flush()
print("iteration: %d/%d, cost_time: %ds, train_loss: %.4f, ""train_acc: %.4f, test_loss: %.4f, test_acc: %.4f"
% (it, iterations, int(time.time()-start_time), train_loss, train_acc, val_loss, val_acc))
else:
print("iteration: %d/%d, train_loss: %.4f, train_acc: %.4f" % (it, iterations, train_loss / it, train_acc / it))
save_path = saver.save(sess, model_save_path)
print("Model saved in file: %s" % save_path)