在tensorboard有两个概率一个是tensor一个是op,其中加减乘除等运算都是一个op。
以下介绍tensorboard的一些常用使用方法
1 tf.name_scpoe在tensorboard中的作用:
我以之前写的mnist代码为例,代码链接,(在原始代码里面加入writer = tf.summary.FileWriter(‘./graph/mnist’,sess.graph)),开始并没有加name_scope
,然后打开tensorboard模型可视化结果为:
可以看出,特别的乱,里面的线特别多,为了解决这个问题i,我们加入tf.name_scope,看看效果,具体代码如下:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('./data/mnist',one_hot=True)
print(mnist.validation.num_examples)
print(mnist.train.num_examples)
print(mnist.test.num_examples)
with tf.name_scope('input_data') as scope:
x = tf.placeholder(tf.float32, shape=[None, 784])
x_input = tf.reshape(x, [-1, 28, 28, 1])
with tf.name_scope('input_label') as scope:
labels = tf.placeholder(tf.float32, [None, 10])
with tf.name_scope('conv1') as scope:
w1 = tf.Variable(tf.truncated_normal(shape=[3,3,1,32],stddev=0.05),name='w1')
b1 = tf.Variable(tf.zeros(32),name='b1')
conv1 = tf.nn.conv2d(x_input,w1,strides=[1,1,1,1],padding='SAME',name='conv1')
with tf.name_scope('relu1') as scope:
relu1 = tf.nn.relu(tf.add(conv1,b1),name='relu1')
with tf.name_scope('conv2') as scope:
w2 = tf.Variable(tf.truncated_normal(shape=[3,3,32,64],stddev=0.05),name='w2')
b2 = tf.Variable(tf.zeros(64),name='b2')
conv2 = tf.nn.conv2d(relu1,w2,strides=[1,2,2,1],padding='SAME',name='conv2')
with tf.name_scope('relu2') as scope:
relu2 = tf.nn.relu(conv2+b2,name='relu2')
with tf.name_scope('conv3') as scope:
w3 = tf.Variable(tf.truncated_normal(shape=[3,3,64,128],mean=0,stddev=0.05),name='w3')
b3 = tf.Variable(tf.zeros(128),name='b3')
conv3 = tf.nn.conv2d(relu2,w3,strides=[1,2,2,1],padding='SAME')
with tf.name_scope('relu3') as scope:
relu3 = tf.nn.relu(conv3+b3,name='relu3')
with tf.name_scope('fc1') as scope:
x_flat = tf.reshape(relu3,shape=[-1,7*7*128])
w_fc1=tf.Variable(tf.truncated_normal(shape=[7*7*128,1024],stddev=0.05,mean=0),name='w_fc1')
b_fc1 = tf.Variable(tf.zeros(1024),name='b_fc1')
fc1 = tf.add(tf.matmul(x_flat,w_fc1),b_fc1)
with tf.name_scope('relu_fc1') as scope:
relu_fc1 = tf.nn.relu(fc1,name='relu_fc1')
with tf.name_scope('keep_prob') as scope:
keep_prob = tf.placeholder(tf.float32)
with tf.name_scope('drop_1') as scope:
drop_1 = tf.nn.dropout(relu_fc1,keep_prob=keep_prob,name='drop_1')
with tf.name_scope('bn_fc1') as scope:
bn_fc1 = tf.layers.batch_normalization(drop_1,name='bn_fc1')
with tf.name_scope('fc2') as scope:
w_fc2 = tf.Variable(tf.truncated_normal(shape=[1024,512],stddev=0.05,mean=0),name='w_fc2')
b_fc2 = tf.Variable(tf.zeros(512),name='b_fc2')
fc2 = tf.add(tf.matmul(bn_fc1,w_fc2),b_fc2)
with tf.name_scope('relu_fc2') as scope:
relu_fc2 = tf.nn.relu(fc2,name='relu_fc2')
with tf.name_scope('drop_2') as scope:
drop_2 = tf.nn.dropout(relu_fc2,keep_prob=keep_prob,name='drop_2')
with tf.name_scope('bn_fc2') as scope:
bn_fc2 = tf.layers.batch_normalization(drop_2,name='bn_fc2')
with tf.name_scope('fc3') as scope:
w_fc3 = tf.Variable(tf.truncated_normal(shape=[512,10],stddev=0.05,mean=0),name='w_fc3')
b_fc3 = tf.Variable(tf.zeros(10),name='b_fc3')
fc3 = tf.add(tf.matmul(bn_fc2,w_fc3),b_fc3)
with tf.name_scope('metrics') as scope:
model_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=fc3,labels=labels))
train_opt = tf.train.AdamOptimizer(learning_rate=0.1).minimize(model_loss)
correct_prediction = tf.equal(tf.argmax(fc3,1), tf.argmax(labels,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
num_batches = 800
batch_size = 64
learning_rate = 0.002
keep_pro = 0.75
#with tf.name_scope('train') as scope:
train_opt = tf.train.AdamOptimizer(learning_rate).minimize(model_loss)
with tf.Session(config=tf.ConfigProto(device_count={'gpu':0})) as sess:
writer = tf.summary.FileWriter('./graph/mnist',sess.graph)
sess.run(tf.global_variables_initializer())
for batch_i in range(num_batches):
batch_xs,batch_ys = mnist.train.next_batch(batch_size)
sess.run(train_opt,{x:batch_xs,labels:batch_ys,keep_prob:keep_pro})
if batch_i % 100 == 0:
loss,acc = sess.run([model_loss,accuracy],{x:mnist.validation.images,labels:mnist.validation.labels,
keep_prob:keep_pro})
print('Batch: {:>2}: Training loss: {:>3.5f}, Training accuracy: {:>3.5f}'.format(batch_i, loss, acc))
print('Final validation accuracy: {:>3.5f}'.format(acc))
acc = sess.run(accuracy, {x: mnist.validation.images,labels: mnist.validation.labels,keep_prob:keep_pro})
print('Final test accuracy: {:>3.5f}'.format(acc))
correct = 0
for i in range(100):
correct += sess.run(accuracy,feed_dict={x: [mnist.test.images[i]],labels: [mnist.test.labels[i]]
,keep_prob:keep_pro})
print("Accuracy on 100 samples:", correct/100)
writer.close()
具体模型的图如下:
这样整个图看上去就好很多,显得不是那么的乱。这也就是tf.name_scope在tensorboard中的作用。这个地方换成tf.variable_acope也是一样的效果。也就是tf.name_scope和tf.variable_acope是一个方块,它包含的op全部在里面,可以展开。
图中旁边的那些op是可以加入到大图里里面,右键就可以add,只是加了以后图不好看,这样感觉就很好。
tensorboard 查看每个计算op占的内存和计算时间
其实只需要添加以下几行代码就可以:
run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
run_metadata = tf.RunMetadata()
writer.add_run_metadata(run_metadata,'step%d'%step)
同时需要在sess里面后面加options=run_options,run_metadata=run_metadata
具体看下面的代码,因为前面构建模型代码一样这里就不重复,
with tf.Session(config=tf.ConfigProto(device_count={'gpu':0})) as sess:
step = 0
writer = tf.summary.FileWriter('./graph/mnist',sess.graph)
sess.run(tf.global_variables_initializer())
for batch_i in range(num_batches):
batch_xs,batch_ys = mnist.train.next_batch(batch_size)
run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
run_metadata = tf.RunMetadata()
sess.run([train_opt],{x:batch_xs,labels:batch_ys,keep_prob:keep_pro},options=run_options,run_metadata=run_metadata)
if batch_i % 100 == 0:
loss,acc = sess.run([model_loss,accuracy],{x:mnist.validation.images,labels:mnist.validation.labels,
keep_prob:keep_pro})
print('Batch: {:>2}: Training loss: {:>3.5f}, Training accuracy: {:>3.5f}'.format(batch_i, loss, acc))
print('Final validation accuracy: {:>3.5f}'.format(acc))
acc = sess.run(accuracy, {x: mnist.validation.images,labels: mnist.validation.labels,keep_prob:keep_pro})
print('Final test accuracy: {:>3.5f}'.format(acc))
writer.add_run_metadata(run_metadata,'step%d'%step)
#writer.add_summary(summary_str,step)
step += 1
correct = 0
for i in range(100):
correct += sess.run(accuracy,feed_dict={x: [mnist.test.images[i]],labels: [mnist.test.labels[i]]
,keep_prob:keep_pro})
print("Accuracy on 100 samples:", correct/100)
writer.close()
然后,打开我们的tensorboard,可以看到以下的界面:
这个图是需要我们选择第几次迭代
这个图是打开某个name_scope查看的结果,可以看到内存使用的一些情况,当然也可以从全局看到,颜色越深的占用内存越大。也可以看到运行时间
这个有助于帮助了解我们的模型计算瓶颈在哪里。
但是因为在运行的时候,加入了这些元素,可能会导致运行速度稍微慢一些。
查看训练的状态
包括查看loss,accuracy,或者卷积的输出,等很多参数,主要有以下的几种方法:
tf.summary.tensor_summary
tf.summary.scalar 对某个变量进行输出
tf.summary.histogram 输出某个矩阵为直方图,查看统计量
tf.summary.image 输出图片
tf.summary.audio 输出音频
tf.summary.merge 可以把其中的一些放在一个组,比如可以把训练和测试的结果分开
tf.summary.merge_all 这个将所有的输出放在一个组
接下来我们查看loss的变换的accuracy的变化,同时我们把输入的图片输出来,使用tf.summary.scalar和tf.summary.image 具体的代码我们看如下:
只需要添加这几行
loss_summary = tf.summary.scalar('loss_summary',model_loss)
acc_summary = tf.summary.scalar('accuracy',accuracy)
tf.summary.image('input_image',x_input)
merged_summary = tf.summary.merge_all()
同时在sess.run里面加入merged_summary:
_,summary_str = sess.run([train_opt,merged_summary],{x:batch_xs,labels:batch_ys,keep_prob:keep_pro},options=run_options,run_metadata=run_metadata)
然后再把运行结果添加进来
writer.add_summary(summary_str,step)
然后我们看看效果
这张图就是我们的loss和accuracy的变化
上图是我们每次输入的图片,下面还有很多张,都可以自己查看,我的图截不全。
如何我们想查看某个变量的一些统计量,比如均值,方差,最大值,最小值等,同时绘制他的直方图,也是一样的,只要把想要输出的变量添加到tf.summary.scalar就可以。
接下来我们查看卷积的feature_map,只要看如下代码:
def show_feature_map(conv,dim,name):
con_channels = tf.split(conv,num_or_size_splits=dim,axis=3)
with tf.name_scope(name):
num_channels = len(con_channels)
for i in range(num_channels):
tf.summary.image('channel-%d'%(i),con_channels[i],max_outputs=1)
show_feature_map(conv1, 32, "conv1_feature_map")
我们查看结果显示:
这就是第一层卷积的输出图像,可以看到,还是挺清晰的。越往后越模糊,我这个地方就值截图这两个了。