使用 Python API 从头创建网络(重点)
1、从头创建engine的9个基本步骤
step1:创建logger
step2:创建builder
step3:创建network
step4:向network中添加网络层
step5:设置并标记输出
step6:创建config并设置最大batchsize和最大工作空间
step7:创建engine
step8:序列化保存engine
step9:释放资源
2、示例代码
import tensorrt as trt
logger = trt. Logger( trt. Logger. WARNING)
with trt. Builder( logger) as builder, builder. create_network( ) as network:
input_tensor = network. add_input( name= INPUT_NAME, dtype= trt. float32, shape= INPUT_SHAPE)
conv1_w = weights[ 'conv1.weight' ] . numpy( )
conv1_b = weights[ 'conv1.bias' ] . numpy( )
conv1 = network. add_convolution( input = input_tensor, num_output_maps= 20 , kernel_shape= ( 5 , 5 ) , kernel= conv1_w, bias= conv1_b)
conv1. stride = ( 1 , 1 )
pool1 = network. add_pooling( input = conv1. get_output( 0 ) , type = trt. PoolingType. MAX, window_size= ( 2 , 2 ) )
pool1. stride = ( 2 , 2 )
conv2_w = weights[ 'conv2.weight' ] . numpy( )
conv2_b = weights[ 'conv2.bias' ] . numpy( )
conv2 = network. add_convolution( pool1. get_output( 0 ) , 50 , ( 5 , 5 ) , conv2_w, conv2_b)
conv2. stride = ( 1 , 1 )
pool2 = network. add_pooling( conv2. get_output( 0 ) , trt. PoolingType. MAX, ( 2 , 2 ) )
pool2. stride = ( 2 , 2 )
fc1_w = weights[ 'fc1.weight' ] . numpy( )
fc1_b = weights[ 'fc1.bias' ] . numpy( )
fc1 = network. add_fully_connected( input = pool2. get_output( 0 ) , num_outputs= 500 , kernel= fc1_w, bias= fc1_b)
relu1 = network. add_activation( fc1. get_output( 0 ) , trt. ActivationType. RELU)
fc2_w = weights[ 'fc2.weight' ] . numpy( )
fc2_b = weights[ 'fc2.bias' ] . numpy( )
fc2 = network. add_fully_connected( relu1. get_output( 0 ) , OUTPUT_SIZE, fc2_w, fc2_b)
fc2. get_output( 0 ) . name = OUTPUT_NAME
network. mark_output( fc2. get_output( 0 ) )
with builder. create_builder_config( ) as config
config. max_batch_size= 32
config. max_workspace_size = 10 << 20
engine = builder. build_cuda_engine( network,config)
with open ( “sample. engine”, “wb”) as f:
f. write( engine. serialize( ) )