import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np
import cv2
def get_engine(engine_path):
# If a serialized engine exists, use it instead of building an engine.
print("Reading engine from file {}".format(engine_path))
with open(engine_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
return runtime.deserialize_cuda_engine(f.read())
TRT_LOGGER = trt.Logger()
# engine = get_engine("yolov4_1.trt")
def compute_sim(emb1, emb2):
from numpy.linalg import norm
emb1 = emb1.flatten()
emb2 = emb2.flatten()
sim = np.dot(emb1, emb2)/(norm(emb1)*norm(emb2))
return sim
# engine = get_engine("mobilefacenet-res2-6-10-2-dim512/onnx/face_reg_mnet.engine")
# print(engine)
# for binding in engine:
# size = trt.volume(engine.get_binding_shape(binding)) * 1
# dims = engine.get_binding_shape(binding)
# print(size)
# print(dims)
# print(binding)
# print(engine.binding_is_input(binding))
# dtype = trt.nptype(engine.get_binding_dtype(binding))
# print("dtype = ", dtype)
engine = get_engine("mobilefacenet-res2-6-10-2-dim512/onnx/face_reg_mnet.engine")
context = engine.create_execution_context()
def get_embedding(img):
resized = cv2.resize(img, (112, 112), interpolation=cv2.INTER_LINEAR)
img_in = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
img_in = np.transpose(img_in, (2, 0, 1)).astype(np.float32)
img_in = np.expand_dims(img_in, axis=0)
# img_in /= 255.0
img_in = np.ascontiguousarray(img_in)
print("Shape of the network input: ", img_in.shape)
# print(img_in)
# with get_engine("mobilefacenet-res2-6-10-2-dim512/onnx/face_reg_mnet.engine") as engine, engine.create_execution_context() as context:
h_input = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(0)), dtype=np.float32)
h_output = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(1)), dtype=np.float32)
# Allocate device memory for inputs and outputs.
d_input = cuda.mem_alloc(h_input.nbytes)
d_output = cuda.mem_alloc(h_output.nbytes)
# Create a stream in which to copy inputs/outputs and run inference.
stream = cuda.Stream()
# set the host input data
h_input = img_in
# print(h_input)
# Transfer input data to the GPU.
cuda.memcpy_htod_async(d_input, h_input, stream)
# Run inference.
context.execute_async_v2(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)
# Transfer predictions back from the GPU.
cuda.memcpy_dtoh_async(h_output, d_output, stream)
# Synchronize the stream
stream.synchronize()
# Return the host output.
# print(h_output)
return h_output
img1 = cv2.imread("./s_117.jpg")
emb1 = get_embedding(img1)
img2 = cv2.imread("./s_115.jpg")
emb2 = get_embedding(img2)
print(compute_sim(emb1, emb2))