输入import tensorflow as tf应该出现_tensorflow使用timeline进行模型的性能分析

用tensorflow训练完网络后,有时想看一下graph中每个op是如何计算已经耗时,这个时候就可以用到timeline工具,就可以有针对性的进行优化了。

直接上代码,分别针对pb和tf-serving模型,只需要在sess.run的基础上加上options和run_metadata参数:

# -*- coding: UTF-8 -*-
import cv2
import tensorflow as tf
import os,time
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from tensorflow.python.client import timeline

run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
run_metadata = tf.RunMetadata()

tf_serving_model = '/xxx/tf_serving_model/1/'

def inference_tf_serving():
    im_file = '1.jpg'
    with open(im_file, 'rb') as f:
        img_str = f.read()

        # with tf.Session(graph=tf.Graph(), config=tf.ConfigProto(log_device_placement=True)) as sess:
        with tf.Session(graph=tf.Graph()) as sess:

            tf.saved_model.loader.load(sess, [tf.saved_model.tag_constants.SERVING], tf_serving_model)
            sess.run(tf.global_variables_initializer())
            # # 查看图中的op
            # for op in sess.graph.get_operations():
            #     print(op.name, op.values())
            # # 获取张量的第二种方法

            input_images = sess.graph.get_tensor_by_name('images:0')
            boxes_tf = sess.graph.get_tensor_by_name('import/xxx:0')
            scores_tf = sess.graph.get_tensor_by_name('import_1/xxx:0')
            classes_tf = sess.graph.get_tensor_by_name('import_1/xxx:0')

            t1 = time.time()
            boxes, scores, classes = sess.run([boxes_tf, scores_tf, classes_tf], feed_dict={input_images: img_str}
                                              ,options=run_options, run_metadata=run_metadata)
            t2 = time.time()
            print('time:',t2-t1)
            # timeline
            tl = timeline.Timeline(run_metadata.step_stats)
            ctf = tl.generate_chrome_trace_format()
            with open('xxx.json', 'w') as f:
                f.write(ctf)
            print('done ... ')
    return


def load_graph(frozen_graph_filename):
    # We load the protobuf file from the disk and parse it to retrieve the
    # unserialized graph_def
    with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    # Then, we import the graph_def into a new Graph and returns it
    with tf.Graph().as_default() as graph:
        # The name var will prefix every op/nodes in your graph
        # Since we load everything in a new graph, this is not needed
        tf.import_graph_def(graph_def)
    return graph

def inference_pb():
    model_path = 'xxx.pb'
    graph = load_graph(model_path)
    im_file = '1.jpg'
    with open(im_file, 'rb') as f:
        img_str = f.read()

        # with tf.Session(graph=tf.Graph(), config=tf.ConfigProto(log_device_placement=True)) as sess:
        with tf.Session(graph=graph) as sess:

            # # 查看图中的op
            # for op in sess.graph.get_operations():
            #     print(op.name, op.values())
            # # 获取张量的第二种方法

            input_images = sess.graph.get_tensor_by_name('import/tf_image_string:0')
            boxes_tf = sess.graph.get_tensor_by_name('import/xxx:0')
            scores_tf = sess.graph.get_tensor_by_name('import/xxx:0')
            classes_tf = sess.graph.get_tensor_by_name('import/xxx:0')

            t1 = time.time()
            boxes,scores, classes = sess.run([boxes_tf,scores_tf, classes_tf], feed_dict={input_images: img_str}
                                            ,options=run_options, run_metadata=run_metadata)
            t2 = time.time()
            print('time:',t2-t1)
            # timeline
            tl = timeline.Timeline(run_metadata.step_stats)
            ctf = tl.generate_chrome_trace_format()
            with open('xxx.json', 'w') as f:
                f.write(ctf)
            print('done ... ')
    return

if __name__ == '__main__':
    # inference_tf_serving()
    inference_pb()

打开chrome, 进到这个页面: chrome://tracing, 然后load刚刚生成的JSON文件就可以了。

e04b8efbefc4ec7485e414df7884bf35.png

可以看到,NHWCToNCHW这个op就耗时一半了,点击相应的op就能看到该op的执行过程和耗时等。使用wsad键可进行放大缩小等操作,从左到右就是整个graph的执行顺序。

参考:

tensorflow性能调优实践

https://blog.csdn.net/mtj66/article/details/79967241

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值