1 AlexNet网络结构
AlexNet网络的新技术点:
1)成功使用ReLU作为CNN的激活函数,成功解决了Sigmoid函数在网络较深时的梯度弥散问题。
2)训练时使用Dropout随机忽略一部分神经元,以避免过拟合。
3)在CNN中使用重叠的最大池化,避免平均池化的模糊效果,并提出步长比池化核的尺寸小,这样池化层的输出之间会有重叠和覆盖,提升了特征的丰富性。
4)提出了LRN层,对局部神经元的活动创建竞争机制,使得其中响应较大的值变得相对更大,并抑制其他反馈较小的神经元,增强模型的泛化能力。
5)使用CUDA加速深度卷积网络的训练;
6)数据增强,随机地从256X256的原始图像中截取224X224大小的区域(以及水平翻转的镜像),减轻过拟合。
实验中没有用imageNet的数据,只是随机生成的数据,只用来测时间,PS:本文没有GPU,全程是用cpu跑的()
2 代码实现
__author__ = 'Administrator'
#AlexNet网络
import tensorflow as tf
import time
import datetime
import math
batch_size=32
num_batches=100
def print_activations(t):
'''
显示t的名称和尺寸
:param t: tensor量
'''
print(t.op.name,' ',t.get_shape().as_list())
def inference(images):
parameters=[]
#卷积层1
with tf.name_scope('conv1') as scope:
kernel=tf.Variable(tf.truncated_normal([11,11,3,96],stddev=0.1,dtype=tf.float32),name='weights')
biases=tf.Variable(tf.constant(0.0,shape=[96],dtype=tf.float32),trainable=True,name='biases')
conv=tf.nn.conv2d(images,kernel,[1,4,4,1],padding='SAME')
bias=tf.nn.bias_add(conv,biases) #卷积结束
conv1=tf.nn.relu(bias,name=scope)
print_activations(conv1)
parameters+=[kernel, biases]
#lrn1 pool1
lrn1=tf.nn.lrn(conv1,4,1,0.001/9,0.75)
pool1=tf.nn.max_pool(lrn1,[1,3,3,1],[1,2,2,1],padding='VALID',name='pool1')
print_activations((pool1))
#卷积层2
with tf.name_scope('conv2') as scope:
kernel=tf.Variable(tf.truncated_normal([5,5,96,256],stddev=0.1,dtype=tf.float32),name='weights')
biases=tf.Variable(tf.constant(0.0,shape=[256],dtype=tf.float32),trainable=True,name='biases')
conv=tf.nn.conv2d(pool1,kernel,[1,1,1,1],padding='SAME')
bias=tf.nn.bias_add(conv,biases) #卷积结束
conv2=tf.nn.relu(bias,name=scope)
print_activations(conv2)
parameters+=[kernel, biases]
#lrn2 pool2
lrn2=tf.nn.lrn(conv2,4,1,0.001/9,0.75)
pool2=tf.nn.max_pool(lrn2,[1,3,3,1],[1,2,2,1],padding='VALID',name='pool2')
print_activations(pool2)
#卷积层3
with tf.name_scope('conv3') as scope:
kernel=tf.Variable(tf.truncated_normal([3,3,256,384],stddev=0.1,dtype=tf.float32),name='weights')
biases=tf.Variable(tf.constant(0.0,shape=[384],dtype=tf.float32),trainable=True,name='biases')
conv=tf.nn.conv2d(pool2,kernel,[1,1,1,1],padding='SAME')
bias=tf.nn.bias_add(conv,biases) #卷积结束
conv3=tf.nn.relu(bias,name=scope)
print_activations(conv3)
parameters+=[kernel, biases]
#卷积层4
with tf.name_scope('conv4') as scope:
kernel=tf.Variable(tf.truncated_normal([3,3,384,384],stddev=0.1,dtype=tf.float32),name='weights')
biases=tf.Variable(tf.constant(0.0,shape=[384],dtype=tf.float32),trainable=True,name='biases')
conv=tf.nn.conv2d(conv3,kernel,[1,1,1,1],padding='SAME')
bias=tf.nn.bias_add(conv,biases) #卷积结束
conv4=tf.nn.relu(bias,name=scope)
print_activations(conv4)
parameters+=[kernel, biases]
#卷积层5
with tf.name_scope('conv5') as scope:
kernel=tf.Variable(tf.truncated_normal([3,3,384,256],stddev=0.1,dtype=tf.float32),name='weights')
biases=tf.Variable(tf.constant(0.0,shape=[256],dtype=tf.float32),trainable=True,name='biases')
conv=tf.nn.conv2d(conv4,kernel,[1,1,1,1],padding='SAME')
bias=tf.nn.bias_add(conv,biases) #卷积结束
conv5=tf.nn.relu(bias,name=scope)
print_activations(conv5)
parameters+=[kernel, biases]
#pool3
pool3=tf.nn.max_pool(conv5,[1,3,3,1],[1,2,2,1],padding='VALID',name='pool3')
print_activations(pool3)
return pool3,parameters
#Alex全连接层
#全连接层1
#pool3_flat=tf.reshape(pool3,[batch_size,None])
# w_fc1=tf.Variable(tf.truncated_normal([None,4096],stddev=0.1,dtype=tf.float32))
# b_fc1=tf.Variable(tf.constant(0.0,[4096],dtype=tf.float32))
# f_fc1=tf.nn.relu(tf.matmul(pool3_flat,w_fc1)+b_fc1)
# f_fc1=tf.nn.dropout(f_fc1,keep_pre)
#
# #全连接层2
# w_fc2=tf.Variable(tf.truncated_normal([4096,4096],stddev=0.1,dtype=tf.float32))
# b_fc2=tf.Variable(tf.constant(0.0,[4096],dtype=tf.float32))
# f_fc2=tf.nn.relu(tf.matmul(f_fc1,w_fc2)+b_fc2)
#
# #全连接层3
# w_fc3=tf.Variable(tf.truncated_normal([4096,1000],stddev=0.1,dtype=tf.float32))
# b_fc2=tf.Variable(tf.constant(0.0,[1000],dtype=tf.float32))
# f_fc2=tf.matmul(f_fc2,w_fc2)+b_fc2
def time_tensorflow_run(session,target,info_string):
'''
评估计算时间的函数
:param session: tensorflow的session
:param target: 执行的目标
:param info_string: string
:return:无
'''
num_steps_burn_in=10
total_duration=0.0
total_duration_square=0.0
for i in range(num_batches+num_steps_burn_in):
start_time=time.time()
_=session.run(target)
duration=time.time()-start_time
if i>=num_steps_burn_in:
if not i%10:
print('%s:step %d, duration=%.3f'%(datetime.datetime.now(),i-num_steps_burn_in,duration))
total_duration+=duration
total_duration_square+=duration*duration
mn=total_duration/num_batches
vr=math.fabs(total_duration_square/num_batches-mn*mn)
sd=math.sqrt(vr)
print('%s: %s across %d steps, %.3f +/-%.3f sec /batch'%
(datetime.datetime.now(),info_string,num_batches,mn,sd))
def run_benchmark():
with tf.Graph().as_default():
image_size=224
images=tf.Variable(tf.random_normal([batch_size,image_size,image_size,3],
dtype=tf.float32,stddev=1e-1))
pool,parameters=inference(images)
init=tf.global_variables_initializer()
sess=tf.InteractiveSession()
sess.run(init)
time_tensorflow_run(sess,pool,'Forward')
objective=tf.nn.l2_loss(pool)
grad=tf.gradients(objective,parameters)
time_tensorflow_run(sess,grad,'Forward-backward')
run_benchmark()
3 实验结果
conv1 [32, 56, 56, 96]
pool1 [32, 27, 27, 96]
conv2 [32, 27, 27, 256]
pool2 [32, 13, 13, 256]
conv3 [32, 13, 13, 384]
conv4 [32, 13, 13, 384]
conv5 [32, 13, 13, 256]
pool3 [32, 6, 6, 256]
2017-08-24 22:15:05.174433:step 0, duration=0.750
2017-08-24 22:15:12.671433:step 10, duration=0.748
2017-08-24 22:15:20.167433:step 20, duration=0.751
2017-08-24 22:15:27.665433:step 30, duration=0.750
2017-08-24 22:15:35.158433:step 40, duration=0.749
2017-08-24 22:15:42.651433:step 50, duration=0.749
2017-08-24 22:15:50.142433:step 60, duration=0.750
2017-08-24 22:15:57.643433:step 70, duration=0.750
2017-08-24 22:16:05.145433:step 80, duration=0.750
2017-08-24 22:16:12.646433:step 90, duration=0.750
2017-08-24 22:16:19.400433: Forward across 100 steps, 0.825 +/-0.249 sec /batch
2017-08-24 22:16:52.836633:step 0, duration=3.023
2017-08-24 22:17:23.461633:step 10, duration=3.083
2017-08-24 22:17:53.880833:step 20, duration=3.025
2017-08-24 22:18:24.242233:step 30, duration=3.018
2017-08-24 22:18:54.597233:step 40, duration=3.031
2017-08-24 22:19:24.979833:step 50, duration=3.055
2017-08-24 22:19:55.435033:step 60, duration=3.053
2017-08-24 22:20:25.889633:step 70, duration=3.038
2017-08-24 22:20:56.347233:step 80, duration=3.028
2017-08-24 22:21:26.645433:step 90, duration=3.043
2017-08-24 22:21:53.933033: Forward-backward across 100 steps, 3.345 +/-1.008 sec /batch
pool1 [32, 27, 27, 96]
conv2 [32, 27, 27, 256]
pool2 [32, 13, 13, 256]
conv3 [32, 13, 13, 384]
conv4 [32, 13, 13, 384]
conv5 [32, 13, 13, 256]
pool3 [32, 6, 6, 256]
2017-08-24 22:15:05.174433:step 0, duration=0.750
2017-08-24 22:15:12.671433:step 10, duration=0.748
2017-08-24 22:15:20.167433:step 20, duration=0.751
2017-08-24 22:15:27.665433:step 30, duration=0.750
2017-08-24 22:15:35.158433:step 40, duration=0.749
2017-08-24 22:15:42.651433:step 50, duration=0.749
2017-08-24 22:15:50.142433:step 60, duration=0.750
2017-08-24 22:15:57.643433:step 70, duration=0.750
2017-08-24 22:16:05.145433:step 80, duration=0.750
2017-08-24 22:16:12.646433:step 90, duration=0.750
2017-08-24 22:16:19.400433: Forward across 100 steps, 0.825 +/-0.249 sec /batch
2017-08-24 22:16:52.836633:step 0, duration=3.023
2017-08-24 22:17:23.461633:step 10, duration=3.083
2017-08-24 22:17:53.880833:step 20, duration=3.025
2017-08-24 22:18:24.242233:step 30, duration=3.018
2017-08-24 22:18:54.597233:step 40, duration=3.031
2017-08-24 22:19:24.979833:step 50, duration=3.055
2017-08-24 22:19:55.435033:step 60, duration=3.053
2017-08-24 22:20:25.889633:step 70, duration=3.038
2017-08-24 22:20:56.347233:step 80, duration=3.028
2017-08-24 22:21:26.645433:step 90, duration=3.043
2017-08-24 22:21:53.933033: Forward-backward across 100 steps, 3.345 +/-1.008 sec /batch