Tensorflow实现VGGNet

Tensorflow实现VGGNet

这里写图片描述

VGGNet 在2014年获得了imageNet的亚军,其探索了卷积神经网络的深度和性能之间的关系,通过反复堆叠3x3的卷积神经网络和2x2的池化层,成功构建了16-19层深的卷积神经网络。VGGNet结构很简洁,拓展性很强,很容易迁移到其他图像数据上。
VGGNet中全部是用“小型”卷积核,通过不断加深网络深度来提升性能。上图为VGGNet的结构。

虽然从A 到E么以及网络逐渐加深,但是网络的参数数量增加的不多,因为该网络的主要参数实在后面的三个全链接层中。其中D和E就是常说的VGGNet-16和VGGNet-19。

C中相比B,多了1x1的卷积层,该层主要意义是为了“线性变换”,而输入的通道数和输出通道数不变,没有发生降维。

VGGNet拥有5段卷积,每一段中拥有2-3个卷积层,同时每一段卷积层后面有一个最大池化层。每一段内的卷积核个数相同,越靠后的卷积段,卷积核个数越多。

TRICKs:

经常出现多个完全一样的3x3卷积层串联在一起的情况。
两个3x3的卷积层串联在一起,相当于一个5x5的卷积层,[3个3x3的卷积层,相当于一个 7x7的卷积层]但是参数数量变得更少了,而且还具有更多的非线性变换(每一层后面都会有一个Relu激活函数)

作者在论文中总结了几个观点:

(1)LRN作用不大

(2)越深的网络效果越好

(3)1x1的卷积也是很有效的,但是没有3x3的有效,大一些的卷积核可以学到打大的特征空间。

# 导入库
import tensorflow as tf # 1.3
import math
import time
from datetime import datetime

VGGNet中具有很多层,可以降每一个卷积层(参数初始化、卷积、relu)放在一个函数中,方便后面编写网络。

def conv_op(input_op, name, kh, kw, n_out, dh, dw, p):
    """
    input_op: 输入的tensor
    name: 用于name_scope中,该层的名称
    kh kw 卷积核的参数
    n_out 卷积核数量 (输出通道数)
    dh dw strides的参数
    p 输出的参数列表,(可有可无)
    """
    n_in = input_op.get_shape()[-1].value # 首先获取输入通道数。比如224×224×3  得到3
    # 卷积核[kh, kw, n_in, n_out]
    with tf.name_scope(name) as scope:
        kernel = tf.get_variable(name=scope+'w', shape=[kh, kw, n_in, n_out], dtype=tf.float32, 
                                 initializer=tf.contrib.layers.xavier_initializer_conv2d()) # 文中会介绍
        conv = tf.nn.conv2d(input_op, kernel, [1, dh, dw, 1], padding="SAME")
        biases = tf.Variable(tf.constant(0.0, shape=[n_out], dtype=tf.float32), trainable=True, name='b')
        bias = tf.nn.bias_add(conv, biases)
        activation = tf.nn.relu(bias, name=scope)
        p += [kernel, biases] # 加载列表参数只是为了后面加算梯度是有用,实际中这是不需要的
        return activation

同样,我们也设置一个全链接层,来减少网络构建时的代码量

def fc_op(input_op, name, n_out, p):
    n_in = input_op.get_shape()[-1].value # 获得输入通道数
    with tf.name_scope(name) as scope:
        kernel = tf.get_variable(name=scope+'w', shape=[n_in,n_out], dtype=tf.float32, 
                                 initializer=tf.contrib.layers.xavier_initializer())
        biases = tf.Variable(tf.constant(0.1, shape=[n_out], dtype=tf.float32), name='b')
        a = tf.matmul(input_op, kernel)+biases
        activation = tf.nn.relu(a, name=scope)
        # 上面两句还可以合成一句
        # tf.nn.relu_layer(input_op, kernel, biases, name='scope')
        p += [kernel, biases]
        return activation
# 定义池化层函数
def max_pool_op(input_op, name, kh, kw, dh, dw):
    return tf.nn.max_pool(input_op, ksize=[1, kh, kw, 1], strides=[1,dh,dw,1], padding="SAME", name=name)

# 还可以定义函数降每一层输出大小打印出来
def print_activation(t):
    print(t.op.name, ": ", t.get_shape().as_list())
# 开始创建VGGNet-16
def inference(input_op, keep_prob):
    p = [] # 初始化p
    # 第一段有两个卷积 64 64
    conv1_1 = conv_op(input_op, name='conv1_1', kh=3,kw=3,n_out=64, dh=1,dw=1, p=p)
    conv1_2 = conv_op(conv1_1, name='conv1_2', kh=3,kw=3,n_out=64, dh=1,dw=1, p=p)
    pool1 = max_pool_op(conv1_2, name='pool1', kh=2, kw=2, dh=2, dw=2)
    print_activation( pool1)

    # 第二段有两个个卷积 128 128
    conv2_1 = conv_op(pool1, name='conv2_1', kh=3,kw=3,n_out=128, dh=1,dw=1, p=p)
    conv2_2 = conv_op(conv2_1, name="conv2_2", kh=3,kw=3,n_out=128, dh=1,dw=1, p=p)
    pool2 = max_pool_op(conv2_2, name='pool2', kh=2, kw=2,dh=2, dw=2)
    print_activation(pool2)

    # 第三段有三个卷积 256 256 256
    conv3_1 = conv_op(pool2, name='conv3_1', kh=3, kw=3, n_out=256, dh=1, dw=1, p=p)
    conv3_2 = conv_op(conv3_1, name='conv3_2', kh=3, kw=3, n_out=256, dh=1, dw=1, p=p)
    conv3_3 = conv_op(conv3_2, name='conv3_3', kh=3, kw=3, n_out=256, dh=1, dw=1, p=p)
    pool3 = max_pool_op(conv3_3, name='pool3', kh=2,kw=2,dh=2,dw=2)

    # 第四段有三个卷积 512 512 512
    conv4_1 = conv_op(pool3, name='conv4_1',kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
    conv4_2 = conv_op(conv4_1, name='conv4_2', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
    conv4_3 = conv_op(conv4_2, name='conv4_3', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
    pool4 = max_pool_op(conv4_3, name='pool4', kh=2, kw=2, dh=2, dw=2)

    # 第五段有三个卷积 512 512 512
    conv5_1 = conv_op(pool4, name='con5_1', kh=3,kw=3, n_out=512, dh=1, dw=1, p=p)
    conv5_2 = conv_op(conv5_1, name='con5_2', kh=3,kw=3, n_out=512, dh=1, dw=1, p=p)
    conv5_3 = conv_op(conv5_2, name='conv5_3', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
    pool5 = max_pool_op(conv5_3, name='pool5', kh=2,kw=2, dh=2,dw=2)
    print_activation(pool5)
    # 在全连接层之前,需要降pool5进行扁平化
    shp = pool5.get_shape().as_list()
    flattened_shape = shp[1] * shp[2] * shp[3]
    resh1 = tf.reshape(pool5, [-1, flattened_shape], name='resh1')
    # FC1 4096
    fc6 = fc_op(resh1, name='fc6', n_out=4096, p=p)
    # 全连接层加上dropout
    fc6_drop = tf.nn.dropout(fc6, keep_prob, name='fc6_drop')

    # fc2 4096
    fc7 = fc_op(fc6_drop, name='fc7', n_out=4096, p=p)
    fc7_drop = tf.nn.dropout(fc7, keep_prob, name='fc7_drop')
    # fc3 1000
    fc8 = fc_op(fc7_drop, name='fc8', n_out=1000, p=p)
    # softmax
    softmax = tf.nn.softmax(fc8)
    prediction = tf.argmax(softmax, 1) # 概率最大的类别
    return prediction, softmax, fc8, p

搭建完网络就可以搭建计算loss的过程,还有优化、训练等过程。然后创建session。开始训练

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    image_size = 224
    image = tf.Variable(tf.random_normal([32, image_size, image_size, 3], dtype=tf.float32, stddev=0.1))
    keep_prob = tf.placeholder(tf.float32) # 一个数的占位符
    a, b, c, d = inference(image, keep_prob)
    print(sess.run(a, feed_dict={keep_prob:0.5}))
以下是tensorflow实现vggnet16结构的python代码: ```python import tensorflow as tf # 定义vggnet16结构 def VGG16(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='vgg_16'): with tf.variable_scope(scope, 'vgg_16', [inputs]) as sc: end_points_collection = sc.name + '_end_points' # 使用 collection 将所有的端点收集起来,方便最后输出 with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], outputs_collections=end_points_collection): net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') net = slim.max_pool2d(net, [2, 2], scope='pool5') # 将卷积层输出的结果转化为全连接层的输入格式,以便后面的全连接层对全部的features做预测 net = slim.flatten(net) net = slim.fully_connected(net, 4096, scope='fc6') net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = slim.fully_connected(net, 4096, scope='fc7') net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout7') logits = slim.fully_connected(net, num_classes, activation_fn=None, scope='fc8') end_points = slim.utils.convert_collection_to_dict(end_points_collection) if spatial_squeeze: logits = tf.squeeze(logits, [1, 2], name='fc8/squeezed') end_points = {sc.name + '/fc8': logits} return logits, end_points ```
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值