import tensorflow as tf
import math
import time
from datetime import datetime
batch_size = 32
num_batches = 100
def myprint(conv):
print(conv.op.name, " ", conv.get_shape().as_list())
def inference_op(images):
parameters = []
# 在命名空间conv1下实现第一个卷积层
with tf.name_scope("conv1"):
kernel = tf.Variable(tf.truncated_normal([11,11,3,96], dtype=tf.float32, stddev=1e-1),name = "weights")
conv = tf.nn.conv2d(images, kernel, [1,4,4,1], padding="SAME")
biases = tf.Variable(tf.constant(0.0, shape = [96], dtype=tf.float32), trainable=True, name="biases")
conv1 = tf.nn.relu(tf.nn.bias_add(conv, biases))
# 打印第一个卷积层的网络结构
myprint(conv1)
parameters += [kernel,biases]
# 添加一个LRN层和最大池化层
lrn1 = tf.nn.lrn(conv1, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name = "lrn1")
pool1 = tf.nn.max_pool(lrn1, ksize=[1,3,3,1], strides=[1,2,2,1], padding="VALID", name = "pool1")
myprint(pool1)
with tf.name_scope("conv2"):
kernel = tf.Variable(tf.truncated_normal([5,5,96,256], dtype=tf.float32, stddev=1e-1),name = "weights")
conv = tf.nn.conv2d(pool1, kernel, [1,1,1,1], padding="SAME")
biases = tf.Variable(tf.constant(0.0, shape = [256], dtype=tf.float32), trainable=True, name="biases")
conv2 = tf.nn.relu(tf.nn.bias_add(conv, biases))
# 打印第一个卷积层的网络结构
myprint(conv2)
parameters += [kernel,biases]
# 添加一个LRN层和最大池化层
lrn2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name = "lrn2")
pool2 = tf.nn.max_pool(lrn2, ksize=[1,3,3,1], strides=[1,2,2,1], padding="VALID", name = "pool2")
myprint(pool2)
with tf.name_scope("conv3"):
kernel = tf.Variable(tf.truncated_normal([3,3,256,384], dtype=tf.float32, stddev=1e-1),name = "weights")
conv = tf.nn.conv2d(pool2, kernel, [1,1,1,1], padding="SAME")
biases = tf.Variable(tf.constant(0.0, shape = [384], dtype=tf.float32), trainable=True, name="biases")
conv3 = tf.nn.relu(tf.nn.bias_add(conv, biases))
# 打印第一个卷积层的网络结构
myprint(conv3)
parameters += [kernel,biases]
with tf.name_scope("conv4"):
kernel = tf.Variable(tf.truncated_normal([3,3,384,384], dtype=tf.float32, stddev=1e-1),name = "weights")
conv = tf.nn.conv2d(conv3, kernel, [1,1,1,1], padding="SAME")
biases = tf.Variable(tf.constant(0.0, shape = [384], dtype=tf.float32), trainable=True, name="biases")
conv4 = tf.nn.relu(tf.nn.bias_add(conv, biases))
# 打印第一个卷积层的网络结构
myprint(conv4)
parameters += [kernel,biases]
with tf.name_scope("conv5"):
kernel = tf.Variable(tf.truncated_normal([3,3,384,256], dtype=tf.float32, stddev=1e-1),name = "weights")
conv = tf.nn.conv2d(conv4, kernel, [1,1,1,1], padding="SAME")
biases = tf.Variable(tf.constant(0.0, shape = [256], dtype=tf.float32), trainable=True, name="biases")
conv5 = tf.nn.relu(tf.nn.bias_add(conv, biases))
# 打印第一个卷积层的网络结构
myprint(conv5)
parameters += [kernel,biases]
pool5 = tf.nn.max_pool(conv5, ksize=[1,3,3,1], strides=[1,2,2,1], padding="VALID", name = "pool5")
myprint(pool5)
pool_shape = pool5.get_shape().as_list()
nodes = pool_shape[1]*pool_shape[2]*pool_shape[3]
reshaped = tf.reshape(pool5, [pool_shape[0], nodes])
# 接下来创建全连层
with tf.name_scope("fc_1"):
fc1_weights = tf.Variable(tf.truncated_normal([nodes, 4096], dtype=tf.float32, stddev=1e-1), name = "weights")
fc1_bias = tf.Variable(tf.constant(0.0, shape=[4096], dtype=tf.float32), trainable=True, name = "biases")
fc_1 = tf.nn.relu(tf.matmul(reshaped, fc1_weights) + fc1_bias)
parameters += [fc1_weights, fc1_bias]
myprint(fc_1)
with tf.name_scope("fc_2"):
fc2_weights = tf.Variable(tf.truncated_normal([4096, 4096], dtype=tf.float32, stddev=1e-1), name = "weights")
fc2_bias = tf.Variable(tf.constant(0.0, shape=[4096], dtype=tf.float32), trainable=True, name = "biases")
fc_2 = tf.nn.relu(tf.matmul(fc_1, fc2_weights) + fc2_bias)
parameters += [fc2_weights, fc2_bias]
myprint(fc_2)
return fc_2, parameters
with tf.Graph().as_default():
image_size = 224
images = tf.Variable(tf.random_normal([batch_size, image_size, image_size, 3], dtype=tf.float32, stddev=1e-1))
fc_2, parameters = inference_op(images=images)
# print(parameters)
init_op = tf.global_variables_initializer()
# 配置GPU的分配策略
config = tf.ConfigProto()
config.gpu_options.allocator_type = "BFC"
with tf.Session(config = config) as sess:
sess.run(init_op)
num_steps_burn_in = 10
total_dura = 0.0
total_dura_squared = 0.0
back_total_dura = 0.0
back_total_dura_squared = 0.0
# data
for i in range(num_batches + num_steps_burn_in):
start_time = time.time()
_ = sess.run(fc_2)
duration = time.time() - start_time
if i >= num_steps_burn_in:
if i % 10 == 0:
print('%s: step %d, duration = %.3f' %
(datetime.now(), i - num_steps_burn_in, duration))
total_dura += duration
total_dura_squared += duration * duration
average_time = total_dura / num_batches
data = sess.run(fc_2)
print('%s: Forward across %d steps, %.3f +/- %.3f sec / batch' %
(datetime.now(), num_batches, average_time,
math.sqrt(total_dura_squared / num_batches - average_time * average_time)))
# grad 是数据对parameters求导的结果
# 因此两者的维度是一致的
grad = tf.gradients(tf.nn.l2_loss(fc_2), parameters)
for i in range(num_batches + num_steps_burn_in):
start_time = time.time()
_ = sess.run(grad)
duration = time.time() - start_time
if i >= num_steps_burn_in:
if i % 10 == 0:
print('%s: step %d, duration = %.3f' %
(datetime.now(), i - num_steps_burn_in, duration))
back_total_dura += duration
back_total_dura_squared += duration * duration
back_avg_t = back_total_dura / num_batches
# 打印反向传播的运算时间信息
print('%s: Forward-backward across %d steps, %.3f +/- %.3f sec / batch' %
(datetime.now(), num_batches, back_avg_t,
math.sqrt(back_total_dura_squared / num_batches - back_avg_t * back_avg_t)))
print(sess.run(grad))