现在开始看论文,第一篇就是alexnet,正好进行个整理,以及代码实现。
一、我就不讲alexnet的历史了,直接上网络:
alexnet是一种分类网络,有五层卷积层,三层全连接层构成。
二:细节:
输入是2242243的图片,第一层卷积核为1111,步长为44,padding为VALID,使用了96个卷积核,卷积结束后,224-11/4+1,大小为555596,再经过最大池化33的过滤器,步长为2,输出272796,再经过lrn层。
第二层与第一层类似,卷积核是55,步长11,使用256个卷积核,padding为SAME,输出是2727256,接着就是池化层与上面一样的参数,然后是lrn层。输出1313256.
第三四层相同,都是33的卷积核,使用384个卷积核,步长没有池化和lrn,最后输出1313384.
第五层,卷积核和步长
卷积与三四层区别就是用了256个卷积核。卷积运算后是最大池化。至此前面的卷积运算结束,最后输出66256的输出。
跟着三层全连接层,前两层参数是4096,最后一层是1000个神经元。
前两层全连接层在激活后还进行了dropout。注:测试时失活概率要为0(不失活)(原因:因为在测试时要尽量让所有神经元都发挥作用)
三、alexnet在那个时候领先的点:
(1)、用了dropout,有效的缓解了过拟合。
(2)、重叠池化,池化大小为3*3步长为2。这个有利于更好的获取特征。
以下为代码实现:
注:因为个人显卡因素,网络中的一些参数被缩小了。
import tensorflow as tf
def inference(images, batch_size, n_classes, keep_prob):
"""
Build the model
Args:
images: image batch, 4D tensor, tf.float32, [batch_size, width, height, channels]
Returns:
output tensor with the computed logits, float, [batch_size, n_classes]
"""
#conv1, shape = [kernel size, kernel size, channels, kernel numbers]
with tf.variable_scope('conv1') as scope:
weights = tf.get_variable('weights',
shape = [11,11,3, 96],
dtype = tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))
biases = tf.get_variable('biases',
shape=[96],
dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
conv = tf.nn.conv2d(images, weights, strides=[1, 4, 4, 1], padding='VALID')
pre_activation = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(pre_activation, name= scope.name)
#pool1 and norm1
with tf.variable_scope('pooling1_lrn') as scope:
pool1 = tf.nn.max_pool(conv1, ksize=[1,3,3,1],strides=[1,2,2,1],
padding='VALID', name='pooling1')
norm1 = tf.nn.lrn(pool1, depth_radius=4, bias=1.0, alpha=0.001/9.0,
beta=0.75,name='norm1')
#conv2
with tf.variable_scope('conv2') as scope:
weights = tf.get_variable('weights',
shape=[5,5,96,256],
dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))
biases = tf.get_variable('biases',
shape=[256],
dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
conv = tf.nn.conv2d(norm1, weights, strides=[1,1,1,1],padding='SAME')
pre_activation = tf.nn.bias_add(conv, biases)
conv2 = tf.nn.relu(pre_activation, name='conv2')
#pool2 and norm2
with tf.variable_scope('pooling2_lrn') as scope:
norm2 = tf.nn.lrn(conv2, depth_radius=4, bias=1.0, alpha=0.001/9.0,
beta=0.75,name='norm2')
pool2 = tf.nn.max_pool(norm2, ksize=[1,3,3,1], strides=[1,1,1,1],
padding='SAME',name='pooling2')
#conv3
with tf.variable_scope('conv3') as scope:
weights = tf.get_variable('weights',
shape=[3, 3, 256, 384],
dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32))
biases = tf.get_variable('biases',
shape=[384],
dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
conv = tf.nn.conv2d(pool2, weights, strides=[1, 1, 1, 1], padding='SAME')
pre_activation = tf.nn.bias_add(conv, biases)
conv3 = tf.nn.relu(pre_activation, name='conv3')
# conv4
with tf.variable_scope('conv4') as scope:
weights = tf.get_variable('weights',
shape=[3, 3, 384, 384],
dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32))
biases = tf.get_variable('biases',
shape=[384],
dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
conv = tf.nn.conv2d(conv3, weights, strides=[1, 1, 1, 1], padding='SAME')
pre_activation = tf.nn.bias_add(conv, biases)
conv4 = tf.nn.relu(pre_activation, name='conv4')
#conv5
with tf.variable_scope('conv5') as scope:
weights = tf.get_variable('weights',
shape=[3, 3, 384, 256],
dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32))
biases = tf.get_variable('biases',
shape=[256],
dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
conv = tf.nn.conv2d(conv4, weights, strides=[1, 1, 1, 1], padding='SAME')
pre_activation = tf.nn.bias_add(conv, biases)
conv5 = tf.nn.relu(pre_activation, name='conv5')
with tf.variable_scope('pooling5') as scope:
pool5 = tf.nn.max_pool(conv5, [1, 3, 3, 1], [1, 2, 2, 1],padding='SAME',name='pool5')
#local1
with tf.variable_scope('local1') as scope:
reshape = tf.reshape(pool5, shape=[batch_size, -1])
dim = reshape.get_shape()[1].value
weights = tf.get_variable('weights',
shape=[dim,512],
dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
biases = tf.get_variable('biases',
shape=[512],
dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
local1 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name='local1')
local1 = tf.nn.dropout(local1, keep_prob=keep_prob)
#local2
with tf.variable_scope('local2') as scope:
weights = tf.get_variable('weights',
shape=[512,512],
dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
biases = tf.get_variable('biases',
shape=[512],
dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
local2 = tf.nn.relu(tf.matmul(local1, weights) + biases, name='local2')
local2 = tf.nn.dropout(local2, keep_prob=keep_prob)
#local3
with tf.variable_scope('local3') as scope:
weights = tf.get_variable('weights',
shape=[512, 100],
dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
biases = tf.get_variable('biases',
shape=[100],
dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
local3 = tf.nn.relu(tf.matmul(local2, weights) + biases, name='local3')
#softmax_linear
with tf.variable_scope('softmax_linear') as scope:
weights = tf.get_variable('weights',
[100, n_classes],
dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32))
biases = tf.get_variable('biases',
shape=[n_classes],
dtype = tf.float32,
initializer=tf.constant_initializer(0.1))
softmax_linear = tf.matmul(local3, weights) + biases
return softmax_linear
def losses(logits, labels):
"""
Compute loss from logits and labels
Args:
logits: logits tensor, float, [batch_size, n_classes]
labels: label tensor, tf.int32, [batch_size]
Returns:
loss tensor of float type
"""
with tf.variable_scope('loss') as scope:
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits\
(logits=logits, labels=labels, name='xentropy_per_example')
loss = tf.reduce_mean(cross_entropy, name='loss')
tf.summary.scalar(scope.name+'/loss', loss)
return loss
def trainning(loss, learning_rate):
"""
Training ops, the Op returned by this function is what must be passed to
'sess.run()' call to cause the model to train.
Args:
loss: loss tensor, from losses()
Returns:
train_op: The op for trainning
"""
with tf.name_scope('optimizer'):
optimizer = tf.train.AdamOptimizer(learning_rate= learning_rate)
global_step = tf.Variable(0, name='global_step', trainable=False)
train_op = optimizer.minimize(loss, global_step= global_step)
return train_op
def evaluation(logits, labels):
"""
Evaluate the quality of the logits at predicting the label.
Args:
logits: Logits tensor, float - [batch_size, NUM_CLASSES].
labels: Labels tensor, int32 - [batch_size], with values in the
range [0, NUM_CLASSES).
Returns:
A scalar int32 tensor with the number of examples (out of batch_size)
that were predicted correctly.
"""
with tf.variable_scope('accuracy') as scope:
correct = tf.nn.in_top_k(logits, labels, 1)
correct = tf.cast(correct, tf.float16)
accuracy = tf.reduce_mean(correct)
tf.summary.scalar(scope.name+'/accuracy', accuracy)
return accuracy