VGG代码复现-tensorflow版本

最新推荐文章于 2023-03-28 15:08:06 发布

山大_辰奕

最新推荐文章于 2023-03-28 15:08:06 发布

阅读量424

点赞数

分类专栏： VGG 文章标签：神经网络 tensorflow 深度学习

本文链接：https://blog.csdn.net/qq_41921145/article/details/111035195

版权

VGG 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

论文链接：https://kopernio.com/viewer?doi=arXiv:1409.1556&route=6
cifar-10-python数据集下载如下链接：
链接：https://pan.baidu.com/s/12boWfE-vGCppKLLR05NTVA
提取码：tt1t

一、简介：
论文全名：《Very Deep Convolutional Networks for Large-Scale Image Recognition》
2014年，牛津大学计算机视觉组（Visual Geometry Group）和Google DeepMind公司的研究员一起研发出了新的深度卷积神经网络：VGGNet，并取得了ILSVRC2014比赛分类项目的第二名（第一名是GoogLeNet，也是同年提出的）和定位项目的第一名。
VGGNet探索了卷积神经网络的深度与其性能之间的关系，成功地构筑了16~19层深的卷积神经网络，证明了增加网络的深度能够在一定程度上影响网络最终的性能，使错误率大幅下降，同时拓展性又很强，迁移到其它图片数据上的泛化性也非常好。到目前为止，VGG仍然被用来提取图像特征。
VGGNet可以看成是加深版本的AlexNet，都是由卷积层、全连接层两大部分构成。
二、代码实现部分：
亲测有效，包含很多重要注释，pycharm直接运行。
最后结果：
测试acc：93.59% 在这里插入图片描述

utilsPycharm.py
包括数据集下载、裁剪图像、翻转图像等操作。

import numpy as np
import os
import random
import pandas as pickle
import pickle

# 参数设置
image_size = 32
img_channels = 3

#下载数据集
def download_data():
    dirname = 'cifar10-dataset'
    origin = 'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
    fname = './cifar-10-python.tar.gz'
    fpath = './' + dirname


    download = False
    if os.path.exists(fpath) or os.path.isfile(fname):   #os.path.isfile 判断某一对象（需提供绝对路径）是否为文件
        download = False
        print("DataSet already exist!")
    else:
        download = True
    if download:
        print('Downloading data from', origin)


# 打开文件
def unpickFile(file):
    with open(file, 'rb') as fo:  # rb: 以二进制格式打开一个文件用于只读。文件指针将会放在文件的开头。这是默认模式。
        dict = pickle.load(fo, encoding='iso-8859-1')  # 属于单字节编码,最多能表示的字符范围是0-255,应用于英文系列。比如,字母a的编码为0x61=97
    return dict

# 从读入的文件中获取图片数据(data)和标签信息(labels)
def load_data_one(file):
    batch = unpickFile(file)
    data = batch['data']
    labels = batch['labels']
    print("Loading %s : %d." % (file, len(data)))
    return data, labels

# 将从文件中获取的信息进行处理，得到可以输入到神经网络中的数据。
def load_data(files, data_dir, label_count):   # files = <class 'list'>: ['data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5']
    global image_size, img_channels   # global关键字的作用是可以申明一个局部变量为全局变量
    data, labels = load_data_one(data_dir + '/' + files[0])   # data_dir = './cifar-10-python'
    for f in files[1:]:
        data_n, labels_n = load_data_one(data_dir + '/' + f)   # data_n shape=(10000,3072)
        data = np.append(data, data_n, axis=0)
        labels = np.append(labels, labels_n, axis=0)
    labels = np.array([[float(i == label) for i in range(label_count)] for label in labels])   # labels shape=(50000,10)
    data = data.reshape([-1, img_channels, image_size, image_size])  # data shape=(50000,3,32,32)
    data = data.transpose([0, 2, 3, 1])   # data shape=(50000,32,32,3)
    return data, labels

# 数据准备
def prepare_data():
    print("======Loading data======")
    download_data()
    data_dir = './cifar-10-python'
    image_dim = image_size * image_size * img_channels  # image_dim=3072
    #  meta = <class 'dict'>: {'num_cases_per_batch': 10000,
    # 'label_names': ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'], 'num_vis': 3072}
    meta = unpickFile(data_dir + '/batches.meta')


    print(meta)
    # label_names = <class 'list'>: ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
    label_names = meta['label_names']
    label_count = len(label_names) # label_count = 10

    # 依次读取data_batch_1-5的内容
    # train_files = <class 'list'>: ['data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5']
    train_files = ['data_batch_%d' % d for d in range(1, 6)]
    train_data, train_labels = load_data(train_files, data_dir, label_count)
    test_data, test_labels = load_data(['test_batch'], data_dir, label_count)
    # train_data shape=(50000,32,32,3) train_label shape=(50000,10)
    # test_data shape=(10000,32,32,3) test_label shape=(10000,10)

    print("Train data:", np.shape(train_data), np.shape(train_labels))
    print("Test data :", np.shape(test_data), np.shape(test_labels))
    print("======Load finished======")


    print("======Shuffling data======")
    # 重新打乱训练集的顺序
    indices = np.random.permutation(len(train_data))  #对0-len(train_data)之间的序列进行随机排序
    train_data = train_data[indices]
    train_labels = train_labels[indices]
    print("======Prepare Finished======")

    return train_data, train_labels, test_data, test_labels

# 随机裁剪图像
def _random_crop(batch, crop_shape, padding=None): # crop_shape shape=(32,32)
    oshape = np.shape(batch[0])   # oshape =(32,32,3)


    if padding:  # padding=4
        oshape = (oshape[0] + 2*padding, oshape[1] + 2*padding)  # oshape =(40,40)
    new_batch = []
    npad = ((padding, padding), (padding, padding), (0, 0))  # npad = <class 'tuple'>: ((4, 4), (4, 4), (0, 0))
    for i in range(len(batch)):  #len(batch)=250 batch shape =(250,32,32,3)
        new_batch.append(batch[i])
        if padding:
            # batch[i]:是要进行填充的对象 pad_width:有三维度与要填充的维度对应，每个维度有两个数分别表示对应维度要填充的行数或列数，或者通道数等。
            # constant_values = 0 是要进行填充的元素对于图像像素值应该在（0-255）之间。
            new_batch[i] = np.lib.pad(batch[i], pad_width=npad,mode='constant', constant_values=0)
        # random.randint(参数1，参数2) 参数1、参数2必须是整数，函数返回参数1和参数2之间的任意整数
        nh = random.randint(0, oshape[0] - crop_shape[0])
        nw = random.randint(0, oshape[1] - crop_shape[1])
        new_batch[i] = new_batch[i][nh:nh + crop_shape[0], nw:nw + crop_shape[1]]
    return new_batch

# 左右随机翻转图像
def _random_flip_leftright(batch):
        for i in range(len(batch)):
            # getrandbits(k):生成一个k比特长的随机整数，random.getrandbits(16)
            if bool(random.getrandbits(1)):
                batch[i] = np.fliplr(batch[i])   # fliplr()在左右方向上翻转每行的元素，列保持不变，但是列的显示顺序变了。
        return batch


# 数据归一化
def data_normalization(x_train, x_test):

    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')

    x_train[:, :, :, 0] = (x_train[:, :, :, 0] - np.mean(x_train[:, :, :, 0])) / np.std(x_train[:, :, :, 0])
    x_train[:, :, :, 1] = (x_train[:, :, :, 1] - np.mean(x_train[:, :, :, 1])) / np.std(x_train[:, :, :, 1])
    x_train[:, :, :, 2] = (x_train[:, :, :, 2] - np.mean(x_train[:, :, :, 2])) / np.std(x_train[:, :, :, 2])

    x_test[:, :, :, 0] = (x_test[:, :, :, 0] - np.mean(x_test[:, :, :, 0])) / np.std(x_test[:, :, :, 0])
    x_test[:, :, :, 1] = (x_test[:, :, :, 1] - np.mean(x_test[:, :, :, 1])) / np.std(x_test[:, :, :, 1])
    x_test[:, :, :, 2] = (x_test[:, :, :, 2] - np.mean(x_test[:, :, :, 2])) / np.std(x_test[:, :, :, 2])
    return x_train, x_test

# 数据增强
def data_augmentation(batch):
    batch = _random_flip_leftright(batch)     # 随机左右翻转
    batch = _random_crop(batch, [32, 32], 4)  # 随机裁剪
    return batch

# 学习率设置
def learning_rate_set(epoch_num):
    if epoch_num < 81:
        return 0.1
    elif epoch_num < 121:
        return 0.01
    else:
        return 0.001

appPycharm.py
VGG网络结构搭建部分。

import tensorflow as tf
import time
from utilsPycharm import (prepare_data, data_normalization, data_augmentation, learning_rate_set)

# 参数设置
class_num = 10
image_size = 32
img_channels = 3
iterations = 200
batch_size = 250
total_epoch = 160
weight_decay = 0.0003
dropout_rate = 0.5
momentum_rate = 0.9
log_save_path = './vgg_16_logs'
model_save_path = './model/'

# 偏置值
def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape, dtype=tf.float32)
    return tf.Variable(initial)

# 卷积层
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

# 池化层
def max_pool(input, k_size=1, stride=1, name=None):
    return tf.nn.max_pool(input, ksize=[1, k_size, k_size, 1], strides=[1, stride, stride, 1],
                          padding='SAME', name=name)

def batch_norm(input):
    return tf.contrib.layers.batch_norm(input, decay=0.9, center=True, scale=True, epsilon=1e-3,
                                        is_training=train_flag, updates_collections=None)
def run_testing(sess, ep):
    acc = 0.0
    loss = 0.0
    pre_index = 0
    add = 1000
    for it in range(10):
        batch_x = test_x[pre_index:pre_index+add]  # batch_x  shape = (1000,32,32,3)
        batch_y = test_y[pre_index:pre_index+add]  # batch_y  shape = (1000,10)
        pre_index = pre_index + add
        loss_, acc_  = sess.run([cross_entropy, accuracy],
                                feed_dict={x: batch_x, y_: batch_y, keep_prob: 1.0, train_flag: False})
        loss += loss_ / 10.0
        acc += acc_ / 10.0
    summary = tf.Summary(value=[tf.Summary.Value(tag="test_loss", simple_value=loss),
                                tf.Summary.Value(tag="test_accuracy", simple_value=acc)])
    return acc, loss, summary

if __name__ == '__main__':


    train_x, train_y, test_x, test_y = prepare_data()
    train_x, test_x = data_normalization(train_x, test_x)

    # define placeholder x, y_ , keep_prob, learning_rate
    x = tf.placeholder(tf.float32,[None, image_size, image_size, 3])  # x = Tensor("Placeholder:0", shape=(?, 32, 32, 3), dtype=float32)
    y_ = tf.placeholder(tf.float32, [None, class_num])                # y_ = Tensor("Placeholder_1:0", shape=(?, 10), dtype=float32)
    keep_prob = tf.placeholder(tf.float32)                            # keep_prob = Tensor("Placeholder_2:0", dtype=float32)
    learning_rate = tf.placeholder(tf.float32)                        # learning_rate = Tensor("Placeholder_3:0", dtype=float32)
    train_flag = tf.placeholder(tf.bool)                              # train_flag = Tensor("Placeholder_4:0", dtype=bool)

    # build_network
    # the first convolution
    W_conv1_1 = tf.get_variable('conv1_1', shape=[3, 3, 3, 64], initializer=tf.contrib.keras.initializers.he_normal())
    b_conv1_1 = bias_variable([64])
    output = tf.nn.relu(batch_norm(conv2d(x, W_conv1_1) + b_conv1_1))
    # output = Tensor("Relu:0", shape=(?, 32, 32, 64), dtype=float32)

    W_conv1_2 = tf.get_variable('conv1_2', shape=[3, 3, 64, 64], initializer=tf.contrib.keras.initializers.he_normal())
    b_conv1_2 = bias_variable([64])
    output = tf.nn.relu(batch_norm(conv2d(output, W_conv1_2) + b_conv1_2))
    # output = Tensor("Relu_1:0", shape=(?, 32, 32, 64), dtype=float32)
    output = max_pool(output, 2, 2, "pool1")
    # output = Tensor("pool1:0", shape=(?, 16, 16, 64), dtype=float32)

    # the second convolution
    W_conv2_1 = tf.get_variable('conv2_1', shape=[3, 3, 64, 128], initializer=tf.contrib.keras.initializers.he_normal())
    b_conv2_1 = bias_variable([128])
    output = tf.nn.relu(batch_norm(conv2d(output, W_conv2_1) + b_conv2_1))
    # output = Tensor("Relu_2:0", shape=(?, 16, 16, 128), dtype=float32)

    W_conv2_2 = tf.get_variable('conv2_2', shape=[3, 3, 128, 128], initializer=tf.contrib.keras.initializers.he_normal())
    b_conv2_2 = bias_variable([128])
    output = tf.nn.relu(batch_norm(conv2d(output, W_conv2_2) + b_conv2_2))
    # output =Tensor("Relu_3:0", shape=(?, 16, 16, 128), dtype=float32)
    output = max_pool(output, 2, 2, "pool2")
    # output = Tensor("pool2:0", shape=(?, 8, 8, 128), dtype=float32)

    # the third convolution
    W_conv3_1 = tf.get_variable('conv3_1', shape=[3, 3, 128, 256], initializer=tf.contrib.keras.initializers.he_normal())
    b_conv3_1 = bias_variable([256])
    output = tf.nn.relu( batch_norm(conv2d(output,W_conv3_1) + b_conv3_1))
    # output = Tensor("Relu_4:0", shape=(?, 8, 8, 256), dtype=float32)

    W_conv3_2 = tf.get_variable('conv3_2', shape=[3, 3, 256, 256], initializer=tf.contrib.keras.initializers.he_normal())
    b_conv3_2 = bias_variable([256])
    output = tf.nn.relu(batch_norm(conv2d(output, W_conv3_2) + b_conv3_2))
    # output = Tensor("Relu_5:0", shape=(?, 8, 8, 256), dtype=float32)

    W_conv3_3 = tf.get_variable('conv3_3', shape=[3, 3, 256, 256], initializer=tf.contrib.keras.initializers.he_normal())
    b_conv3_3 = bias_variable([256])
    output = tf.nn.relu( batch_norm(conv2d(output, W_conv3_3) + b_conv3_3))
    # output = Tensor("Relu_6:0", shape=(?, 8, 8, 256), dtype=float32)
    output = max_pool(output, 2, 2, "pool3")
    # output = Tensor("pool3:0", shape=(?, 4, 4, 256), dtype=float32)

    # the fourth convolution
    W_conv4_1 = tf.get_variable('conv4_1', shape=[3, 3, 256, 512], initializer=tf.contrib.keras.initializers.he_normal())
    b_conv4_1 = bias_variable([512])
    output = tf.nn.relu(batch_norm(conv2d(output, W_conv4_1) + b_conv4_1))
    # output = Tensor("Relu_7:0", shape=(?, 4, 4, 512), dtype=float32)

    W_conv4_2 = tf.get_variable('conv4_2', shape=[3, 3, 512, 512], initializer=tf.contrib.keras.initializers.he_normal())
    b_conv4_2 = bias_variable([512])
    output = tf.nn.relu(batch_norm(conv2d(output, W_conv4_2) + b_conv4_2))
    # output =  Tensor("Relu_8:0", shape=(?, 4, 4, 512), dtype=float32)

    W_conv4_3 = tf.get_variable('conv4_3', shape=[3, 3, 512, 512], initializer=tf.contrib.keras.initializers.he_normal())
    b_conv4_3 = bias_variable([512])
    output = tf.nn.relu(batch_norm(conv2d(output, W_conv4_3) + b_conv4_3))
    # output = Tensor("Relu_9:0", shape=(?, 4, 4, 512), dtype=float32)
    output = max_pool(output, 2, 2)
    # output = Tensor("MaxPool:0", shape=(?, 2, 2, 512), dtype=float32)

    # the fifth convolution
    W_conv5_1 = tf.get_variable('conv5_1', shape=[3, 3, 512, 512], initializer=tf.contrib.keras.initializers.he_normal())
    b_conv5_1 = bias_variable([512])
    output = tf.nn.relu(batch_norm(conv2d(output, W_conv5_1) + b_conv5_1))
    # output = Tensor("Relu_10:0", shape=(?, 2, 2, 512), dtype=float32)

    W_conv5_2 = tf.get_variable('conv5_2', shape=[3, 3, 512, 512], initializer=tf.contrib.keras.initializers.he_normal())
    b_conv5_2 = bias_variable([512])
    output = tf.nn.relu(batch_norm(conv2d(output, W_conv5_2) + b_conv5_2))
    # output = Tensor("Relu_11:0", shape=(?, 2, 2, 512), dtype=float32)

    W_conv5_3 = tf.get_variable('conv5_3', shape=[3, 3, 512, 512], initializer=tf.contrib.keras.initializers.he_normal())
    b_conv5_3 = bias_variable([512])
    output = tf.nn.relu(batch_norm(conv2d(output, W_conv5_3) + b_conv5_3))
    # output = Tensor("Relu_12:0", shape=(?, 2, 2, 512), dtype=float32)
    #output = max_pool(output, 2, 2)

    # output = Tensor("Relu_12:0", shape=(?, 2, 2, 512), dtype=float32)
    output = tf.reshape(output, [-1, 2*2*512])
    # output = Tensor("Reshape:0", shape=(?, 2048), dtype=float32)

    # the first full connection layer
    W_fc1 = tf.get_variable('fc1', shape=[2048, 4096], initializer=tf.contrib.keras.initializers.he_normal())
    b_fc1 = bias_variable([4096])
    output = tf.nn.relu(batch_norm(tf.matmul(output, W_fc1) + b_fc1) )
    # output = Tensor("Relu_13:0", shape=(?, 4096), dtype=float32)
    output = tf.nn.dropout(output, keep_prob)
    # output = Tensor("dropout/mul_1:0", shape=(?, 4096), dtype=float32)

    # the second full connection layer
    W_fc2 = tf.get_variable('fc7', shape=[4096, 4096], initializer=tf.contrib.keras.initializers.he_normal())
    b_fc2 = bias_variable([4096])
    output = tf.nn.relu(batch_norm(tf.matmul(output, W_fc2) + b_fc2))
    # output = Tensor("Relu_14:0", shape=(?, 4096), dtype=float32)
    output = tf.nn.dropout(output, keep_prob)
    # output = Tensor("dropout_1/mul_1:0", shape=(?, 4096), dtype=float32)

    # the third connection layer
    W_fc3 = tf.get_variable('fc3', shape=[4096, 10], initializer=tf.contrib.keras.initializers.he_normal())
    b_fc3 = bias_variable([10])
    output = tf.nn.relu(batch_norm(tf.matmul(output, W_fc3) + b_fc3))
    # output = Tensor("Relu_15:0", shape=(?, 10), dtype=float32)

    # output  = tf.reshape(output,[-1,10])

    # loss function: cross_entropy
    # train_step: training operation
    # cross_entropy = Tensor("Mean:0", shape=(), dtype=float32)
    # labels:实际的标签， logits:神经网络最后一层的输出，tf.nn.softmax_cross_entropy_with_logits：返回是一个向量而不是一个数，
    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=output))

    # tf.add_n([p1, p2, p3....])函数是实现一个列表的元素的相加。就是输入的对象是一个列表，列表里的元素可以是向量，矩阵等
    l2 = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()])  # l2 = Tensor("AddN:0", shape=(), dtype=float32)
    # use_nesterov: 为True时，使用 Nesterov Momentum
    train_step = tf.train.MomentumOptimizer(learning_rate, momentum_rate, use_nesterov=True).minimize(cross_entropy + l2 * weight_decay)

    #  correct_prediction = Tensor("Equal:0", shape=(?,), dtype=bool)
    correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))  # accuracy = Tensor("Mean_1:0",shape=(),dtype=float32)

    # initial an saver to save model
    saver = tf.train.Saver()

    with tf.Session() as sess:


        sess.run(tf.global_variables_initializer())
        summary_writer = tf.summary.FileWriter(log_save_path,sess.graph)   # 指定一个文件用来保存图

        # epoch = 160
        # make sure [bath_size * iteration = data_set_number]

        for ep in range(1, total_epoch+1):
            lr = learning_rate_set(ep)
            pre_index = 0
            train_acc = 0.0
            train_loss = 0.0
            start_time = time.time()

            print("\n epoch %d/%d:" % (ep, total_epoch))
            time.localtime()
            # iterations = 200
            for it in range(1, iterations+1):
                batch_x = train_x[pre_index:pre_index+batch_size]  # batch_x shape=(250,32,32,3)
                batch_y = train_y[pre_index:pre_index+batch_size]  # batch_y shape=(250,10)

                batch_x = data_augmentation(batch_x)   # batch_x shape=(250,32,32,3)


                _, batch_loss = sess.run([train_step, cross_entropy],
                                         feed_dict={x: batch_x, y_: batch_y, keep_prob: dropout_rate,
                                                    learning_rate: lr, train_flag: True})
                batch_acc = accuracy.eval(feed_dict={x: batch_x, y_: batch_y, keep_prob: 1.0, train_flag: True}) # batch_Acc = 0.104
                train_loss += batch_loss
                train_acc += batch_acc
                pre_index += batch_size  # pre_index = 250


                if it == iterations:
                    train_loss /= iterations
                    train_acc /= iterations

                    loss_, acc_ = sess.run([cross_entropy, accuracy], feed_dict={x: batch_x, y_: batch_y, keep_prob: 1.0, train_flag: True})
                    train_summary = tf.Summary(value=[tf.Summary.Value(tag="train_loss", simple_value=train_loss),
                                               tf.Summary.Value(tag="train_accuracy", simple_value=train_acc)])

                    val_acc, val_loss, test_summary = run_testing(sess, ep)

                    summary_writer.add_summary(train_summary, ep)
                    summary_writer.add_summary(test_summary, ep)
                    summary_writer.flush()


                    print("iteration: %d/%d, cost_time: %ds, train_loss: %.4f, ""train_acc: %.4f, test_loss: %.4f, test_acc: %.4f"
                          % (it, iterations, int(time.time()-start_time), train_loss, train_acc, val_loss, val_acc))
                else:
                    print("iteration: %d/%d, train_loss: %.4f, train_acc: %.4f" % (it, iterations, train_loss / it, train_acc / it))


        save_path = saver.save(sess, model_save_path)
        print("Model saved in file: %s" % save_path)

山大_辰奕

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
2
评论
VGG代码复现-tensorflow版本

论文链接：https://kopernio.com/viewer?doi=arXiv:1409.1556&route=6一、简介：论文全名：《Very Deep Convolutional Networks for Large-Scale Image Recognition》2014年，牛津大学计算机视觉组（Visual Geometry Group）和Google DeepMind公司的研究员一起研发出了新的深度卷积神经网络：VGGNet，并取得了ILSVRC2014比赛分类项目的第二名（第
复制链接

扫一扫