基于Tensorflow的AlexNet经典卷积网络模型的实现

最新推荐文章于 2024-09-04 13:39:32 发布

xiaopengyaonixi

最新推荐文章于 2024-09-04 13:39:32 发布

阅读量402

点赞数

分类专栏： python

本文链接：https://blog.csdn.net/W3Chhhhhh/article/details/106620627

版权

python 专栏收录该内容

20 篇文章 1 订阅

订阅专栏

1.背景

Hinton的学生Alex Krizhevsky于2012年借助深度学习的相关理论提出了深度卷积神经网路模型AlexNet。同年在ILSVRC大赛上，该模型取得了top-5错误率仅为15.3%的好成绩，相比较于第二名的top-5错误率为16.2%以明显的优势胜出。从此Alex成为了CNN领域比较有标志性的一个网络模型。

AlexNet模型中自带了很多的开创新特点：

AlexNet使用ReLu作为CNN的激活函数取得了成功，原因在于ReLu激活函数能够在较深的网络中有效克服sigmoid存在梯度弥散的问题；
AlexNet在卷积层和池化层之间添加了一层LRN层，LRN层的主要思想来源于模仿神物神经系统“侧抑制”，该机制能够为局部神经元开辟活动的竞争环境。换言之，这样做会使得影响力较大的值被放大，同时会抑制其他神经元，进一步来提高模型的泛化能力。在tensorflow中有nn.lrn()的实现。后来这种机制被发现对于模型的提升影响不是很大，LRN层仅对ReLu这种没有上确定边界的激活函数效果显著；
AlexNet全部使用最大值池化，一改之前CNN普遍使用连续但是不重叠的平均池化。

模型的整体架构如下：

2.基于tensorflow的重现

以下实现的方式是将整个AlexNet方法一块GPU上而不是拆分成两个模型放在两块GPU上运行，所以我们将其看成整个网络。

"""
alexnet定义
"""
import datetime
import math
import time

import tensorflow as tf


def inference_o(images):
    """
    前向传播定义
    :return:
    """
    parameters = []

    with tf.name_scope("conv1"):
        kernel = tf.Variable(tf.truncated_normal([11, 11, 3, 96], dtype=tf.float32, stddev=1e-1), name="weights")
        conv = tf.nn.conv2d(images, kernel, [1, 4, 4, 1], padding="SAME")  # 96@56x56
        biases = tf.Variable(tf.constant(0.0, shape=[96], dtype=tf.float32), trainable=True, name="biases")
        conv1 = tf.nn.relu(tf.nn.bias_add(conv, biases))

        # 打印第一个卷积层的网络结构
        print(conv1.op.name, ' ', conv1.get_shape().as_list())

        parameters += [kernel, biases]

    # 添加一个LRN层和对打池化层
    lrn1 = tf.nn.lrn(conv1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name="lrn1")
    pool1 = tf.nn.max_pool(lrn1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID", name="pool1")  # 96@27x27
    # 打印池化层网络结构
    print(pool1.op.name, ' ', pool1.get_shape().as_list())

    with tf.name_scope("conv2"):
        kernel = tf.Variable(tf.truncated_normal([5, 5, 96, 256], dtype=tf.float32, stddev=1e-1), name="weights")
        conv = tf.nn.conv2d(pool1, kernel, [1, 1, 1, 1], padding="SAME")  # 256@27x27
        biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32), trainable=True, name="biases")
        conv2 = tf.nn.relu(tf.nn.bias_add(conv, biases))

        # 打印第二个卷积层的网络结构
        print(conv2.op.name, '', conv2.get_shape().as_list())

        parameters += [kernel, biases]

    # 添加一个LRN层和对打池化层
    lrn2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name="lrn2")
    pool2 = tf.nn.max_pool(lrn2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID", name="pool2")  # 256@13x13
    # 打印池化层网络结构
    print(pool2.op.name, ' ', pool2.get_shape().as_list())

    with tf.name_scope("conv3"):
        kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 384], dtype=tf.float32, stddev=1e-1), name="weights")
        conv = tf.nn.conv2d(pool2, kernel, [1, 1, 1, 1], padding="SAME")  # 384@13x13
        biases = tf.Variable(tf.constant(0.0, shape=[384], dtype=tf.float32), trainable=True, name="biases")
        conv3 = tf.nn.relu(tf.nn.bias_add(conv, biases))

        # 打印第三个卷积层的网络结构
        print(conv3.op.name, ' ', conv3.get_shape().as_list())

        parameters += [kernel, biases]

    with tf.name_scope("conv4"):
        kernel = tf.Variable(tf.truncated_normal([3, 3, 384, 384], dtype=tf.float32, stddev=1e-1),
                             name="weights")  # 384@13x13
        conv = tf.nn.conv2d(conv3, kernel, [1, 1, 1, 1], padding="SAME")
        biases = tf.Variable(tf.constant(0.0, shape=[384], dtype=tf.float32), trainable=True, name="biases")
        conv4 = tf.nn.relu(tf.nn.bias_add(conv, biases))

        # 打印第三个卷积层的网络结构
        print(conv4.op.name, ' ', conv4.get_shape().as_list())

        parameters += [kernel, biases]

    with tf.name_scope("conv5"):
        kernel = tf.Variable(tf.truncated_normal([3, 3, 384, 256], dtype=tf.float32, stddev=1e-1),
                             name="weights")  # 256@13x13
        conv = tf.nn.conv2d(conv4, kernel, [1, 1, 1, 1], padding="SAME")
        biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32), trainable=True, name="biases")
        conv5 = tf.nn.relu(tf.nn.bias_add(conv, biases))

        # 打印第三个卷积层的网络结构
        print(conv5.op.name, ' ', conv5.get_shape().as_list())

        parameters += [kernel, biases]

    # 添加一个最大池化层
    pool5 = tf.nn.max_pool(conv5, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID", name="pool5")  # 256@6x6
    print(pool5.op.name, ' ', pool5.get_shape().as_list())

    # 把数据拉成一条
    pool_shape = pool5.get_shape().as_list()
    nodes = pool_shape[1] * pool_shape[2] * pool_shape[3]  # 6 x 6 x 256 = 9216
    reshaped = tf.reshape(pool5, [pool_shape[0], nodes])
    print("the reshaped length is:  %d" % nodes)

    # 创建第一个全连接层
    with tf.name_scope("fc_1"):
        fc1_weights = tf.Variable(tf.truncated_normal([nodes, 4096], dtype=tf.float32, stddev=1e-1), name="weights")
        fc1_bias = tf.Variable(tf.constant(0.0, shape=[4096], dtype=tf.float32), trainable=True, name="biases")
        fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_weights) + fc1_bias)
        parameters += [fc1_weights, fc1_bias]

        # 打印第一个全连接层的结构信息
        print(fc1.op.name, ' ', fc1.get_shape().as_list())  # 4096

    # 创建第二个全连接层
    with tf.name_scope("fc_2"):
        fc2_weights = tf.Variable(tf.truncated_normal([4096, 4096], dtype=tf.float32, stddev=1e-1), name="weights")
        fc2_bias = tf.Variable(tf.constant(0.0, shape=[4096], dtype=tf.float32), trainable=True, name="biases")
        fc2 = tf.nn.relu(tf.matmul(reshaped, fc1_weights) + fc1_bias)
        parameters += [fc2_weights, fc2_bias]

        # 打印第一个全连接层的结构信息
        print(fc1.op.name, ' ', fc2.get_shape().as_list())  # 4096

    return fc2, parameters


if __name__ == '__main__':
    with tf.Graph().as_default():
        # 创建模拟图片数据
        image_size = 224
        batch_size = 32
        num_batches = 100

        images = tf.Variable(tf.random_normal([batch_size, image_size, image_size, 3], dtype=tf.float32, stddev=1e-1))

        fc_2, parameters = inference_o(images)

        init_op = tf.global_variables_initializer()

        # 配置会话
        config = tf.ConfigProto()
        config.gpu_options.allocator_type = "BFC"

        with tf.Session(config=config) as sess:
            sess.run(init_op)

            num_steps_burn_in = 10
            total_dura = 0.0
            total_dura_squared = 0.0

            back_total_dura = 0.0
            back_total_dura_squared = 0.0

            for i in range(num_batches + num_steps_burn_in):
                start_time = time.time()

                _ = sess.run(fc_2)
                duration = time.time() - start_time

                if i >= num_steps_burn_in:
                    if i % 10 == 0:
                        print(
                            "%s: step %d, duration=%.3f " % (datetime.datetime.now(), i - num_steps_burn_in, duration))

                    total_dura += duration
                    total_dura_squared += duration * duration

            average_time = total_dura / num_batches

            # 打印前向传播的运算时间的信息
            print("%s: Forward accross %d steps, %.3f +/- %.3f sec / batch" % (
                datetime.datetime.now(),
                num_batches,
                average_time,
                math.sqrt(total_dura_squared / num_batches - average_time * average_time)
            ))

            # =================== 测试反向传播过程 ====================
            grad = tf.gradients(tf.nn.l2_loss(fc_2), parameters)
            for i in range(num_batches + num_steps_burn_in):
                start_time = time.time()
                _ = sess.run(grad)
                duration = time.time() - start_time

                if i >= num_steps_burn_in:
                    if i % 10 == 0:
                        print("%s: step %d, duration=%.3f" % (datetime.datetime.now(), i - num_steps_burn_in, duration))
                    back_total_dura += duration
                    back_total_dura_squared += duration * duration

            back_avg_t = back_total_dura / num_batches

            # 打印反向传播的运算时间信息
            print("%s: Forward-backward accorss %d steps, %.3f +/- %.3f sec / batch" % (
                datetime.datetime.now(),
                num_batches,
                back_avg_t,
                math.sqrt(back_total_dura_squared / (num_batches - back_avg_t * back_avg_t))
            ))