model_path = "./model.engine"
import tensorrt as trt
verbose = True
IN_NAME = 'input' #输入节点名字
OUT_NAME = 'output' #输出节点名字
IN_H = 512
IN_W = 512
BATCH_SIZE = 1
EXPLICIT_BATCH = 1 << (int)(
trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
import tensorrt as trt #必须安装
import pycuda.driver as cuda #必须安装
import pycuda.autoinit
import numpy as np
import time
import cv2
import os
from tensorflow import keras
import threading
# 加载TRT引擎
class OxfordPets1(keras.utils.Sequence):
# 在__init__方法中指定batch_size,img_size,input_img_paths,target_img_paths
def __init__(self, batch_size, img_size, input_img_paths):
self.batch_size = batch_size # 批量大小
self.img_size = img_size # 图像大小
self.input_img_paths = input_img_paths # 输入图像路径
#self.target_img_paths = target_img_paths # 标注图像路径
#self.on_epoch_end()
def __len__(self):
# 计算迭代次数
return len(self.input_img_paths) // self.batch_size
def __getitem__(self, idx):
"""
获取每一个batch数据
"""
i = idx * self.batch_size
# 获取输入的图像数据
batch_input_img_paths = self.input_img_paths[i: i + self.batch_size]
# 获取标签数据
#batch_target_img_paths = self.target_img_paths[i: i + self.batch_size]
# 构建特征值数据:获取图像数据中每个像素的数据存储在x中
x = np.zeros((self.batch_size,) + self.img_size + (3,), dtype="float32")
#x = np.zeros((batch_size,) + self.img_size + (1,), dtype="float32")
for j, path in enumerate(batch_input_img_paths):
#img = load_img(path, target_size=self.img_size)
img = np.load(path)['arr_0']
img = np.array(img)
x[j] = img
return x
def on_epoch_end(self):
self.indexes = np.arange(len(self.input_img_paths))
with open(model_path, 'rb') as f:
engine_data = f.read()
#print(engine_data)
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
trt.init_libnvinfer_plugins(TRT_LOGGER, '')
runtime = trt.Runtime(TRT_LOGGER)
engine = runtime.deserialize_cuda_engine(engine_data)
# 创建执行上下文
context = engine.create_execution_context()
# 分配内存
# 创建输入和输出缓冲区
# 分配输入和输出内存
input_shape = (1,3,512,512) # 输入数据的形状 如果是三通道(1,3,512,512)
output_shape = (1,3,512,512) # 输出数据的形状 如果是三通道(1,3,512,512)
input_dir = "./Data/Inference_data/SZJ1-19_20231024/11101"
input_img_paths = sorted(
[
os.path.join(input_dir, fname)
for fname in os.listdir(input_dir)
if fname.endswith(".npz")
]
)
val_input_img_paths = input_img_paths[:]
data = OxfordPets1(1, (512,512), val_input_img_paths)
data=np.array(data,dtype = 'float32')
print(data.shape)
data = data.reshape(1200,3,512,512)
out_position = './result3/11101'
if not os.path.exists(out_position):
os.makedirs(out_position)
T1 = time.time()
for index in range(1200):
input_data = data[index].reshape((1,3,512,512,)).astype(np.float32)
output_data = np.empty(output_shape, dtype=np.float32)
# 在GPU上分配内存
d_input = cuda.mem_alloc(input_data.nbytes)
d_output = cuda.mem_alloc(output_data.nbytes)
# 创建CUDA流
stream = cuda.Stream()
# 将输入数据从主机内存复制到GPU内存
cuda.memcpy_htod_async(d_input, input_data, stream)
# 执行TensorRT推理
#T1 = time.time()
bindings = [int(d_input), int(d_output)]
stream_handle = stream.handle
context.execute_async_v2(bindings=bindings, stream_handle=stream_handle)
# 将输出数据从GPU内存复制到主机内存
cuda.memcpy_dtoh_async(output_data, d_output, stream)
# 等待推理完成
stream.synchronize()
#cv2.imwrite(out_position+'/11101_'+str(index+1)+'_bac.tiff',output_data[0,:,:,0])
cv2.imwrite(out_position+'/11101_'+str(index+1)+'_ine.tiff',output_data[0,:,:,1])
#cv2.imwrite(out_position+'/11101_'+str(index+1)+'_cen.tiff',output_data[0,:,:,2])
T2 = time.time()
print('程序运行时间:%s秒' % ((T2 - T1)))
# 打印输出结果
print((output_data.shape))
07-03
10-19
7853
09-23
2122
10-09
“相关推荐”对你有帮助么?
-
非常没帮助
-
没帮助
-
一般
-
有帮助
-
非常有帮助
提交