Python实现RT-DETR的Tensorrt模型推理

魔障阿Q
于 2024-06-29 21:37:26 发布
阅读量150
点赞数 5
文章标签： python 计算机视觉深度学习人工智能
本文链接：https://blog.csdn.net/qq_44908396/article/details/140070991
版权
'''
@Author  ：***
@Date    ：2023/10/29 13:29 
'''
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
import random
import os
import numpy as np
import cv2
import time


random.seed(3)
CLASS_COLORS = [[random.randint(0, 255) for _ in range(3)] for _ in range(80)]


class HostDeviceMem(object):
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem
    def __str__(self):
        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
    def __repr__(self):
        return self.__str__()

def alloc_buf_N(engine,data):
    '''
    :brief:分配输入输出内存
    :param engine:读取后的模型
    :param data:待推理数据
    :return:
    '''
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    data_type = []
    for ind,binding in enumerate(engine):
        if engine.binding_is_input(binding):
            size = data.shape[0]*data.shape[1]*data.shape[2]*data.shape[3]#待修改
            dtype = trt.nptype(engine.get_binding_dtype(binding))
            data_type.append(dtype)
            # Allocate memory on the host,在主机上分配内存
            host_mem = cuda.pagelocked_empty(size, dtype)
            # Allocate memory on the device,在显卡上分配内存
            device_mem = cuda.mem_alloc(host_mem.nbytes)
            # Append the device buffer to device bindings.将设备缓冲区附加到设备绑定
            bindings.append(int(device_mem))
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            #size = trt.volume(engine.get_binding_shape(binding)[1:]) * batch_size
            size = engine.get_binding_shape(4)[0]*6#待修改
            print(engine.get_binding_shape(4))
            #print(size,engine.max_batch_size,engine.get_binding_shape(binding)[1:],"---")
            host_mem = cuda.pagelocked_empty(size, data_type[0])
            device_mem = cuda.mem_alloc(host_mem.nbytes)
            bindings.append(int(device_mem))
            #print(bindings)
            outputs.append(HostDeviceMem(host_mem, device_mem))
            #print(outputs)
    return inputs, outputs, bindings, stream

def do_inference_v2(context, inputs, bindings, outputs, stream, data,batch_size):
    '''
    :brief:执行推理
    :param context: engine的context
    :param inputs: 输入，包括输入尺寸，reshape后的数据，缩放比例等
    :param bindings: alloc_buf_N函数返回的内容
    :param outputs: alloc_buf_N函数返回的内容
    :param stream: alloc_buf_N函数返回的内容
    :param data: 输入数据
    :param batch_size: 批次大小
    :return: 推理结果
    '''
    for inp in inputs:
        cuda.memcpy_htod_async(inp.device, inp.host, stream)
    # set batchsize，设置batchsize
    context.set_binding_shape(0, data.shape)
    # Run inference.执行推理
    context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle)

    # Transfer predictions back from the GPU.从GPU发回预测
    for out in outputs:
        cuda.memcpy_dtoh_async(out.host, out.device, stream)

    # Writes the contents of the system buffers back to disk to ensure data synchronization.将系统缓冲区的内容写回磁盘以确保数据同步。
    stream.synchronize()

    # Return only the host outputs.仅返回主机输出
    return [out.host for out in outputs]

trt_logger = trt.Logger(trt.Logger.INFO)

def load_engine(engine_path):
    '''
    :brief:加载engine模型
    :param engine_path: engine模型路径
    :return: 读取后的对象
    '''
    TRT_LOGGER = trt.Logger(trt.Logger.ERROR)
    with open(engine_path, 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
        return runtime.deserialize_cuda_engine(f.read())

def bbox_cxcywh_to_xyxy(x):
    '''
    :brief:未使用
    :param x:
    :return:
    '''
    bbox = np.zeros_like(x)
    bbox[...,:2] = x[...,:2] - 0.5 * x[...,2:]
    bbox[...,2:] = x[...,:2] + 0.5 * x[...,2:]
    return bbox

def getFileList(dir, Filelist, ext=None):
    '''
    :brief:获取文件夹及其子文件夹中文件列表
    :param dir: 文件夹根目录
    :param Filelist: 存储路径的列表，传入时为空
    :param ext: 文件扩展名，可不写
    :return: 返回Filelist
    '''
    newDir = dir
    if os.path.isfile(dir):
        if ext is None:
            Filelist.append(dir)
        else:
            if ext in dir[-3:]:
                Filelist.append(dir)

    elif os.path.isdir(dir):
        for s in os.listdir(dir):
            newDir = os.path.join(dir, s)
            getFileList(newDir, Filelist, ext)

    return Filelist

def result_devide_for_batch(output,batch_size):
    '''
    :brief:相当于reshape功能，将1*7200的数据返回成batchsize*300*6的形状
    :param output: 推理返回的结果
    :param batch_size: 批大小
    :return: reshape后的列表，3维
    '''
    res = []
    res_2 = []
    flag = len(output)/batch_size
    for i in range(0,batch_size):
        res.append(output[int(i*flag):int((i+1)*flag)])
    for j in range(0,len(res)):
        res_temp = []
        for z in range(0,int(len(res[j])/6)):
            res_temp.append(res[j][int(z*6):int((z+1)*6)])
        res_2.append(res_temp)
    return res_2

def deal_rect_for_img(list_2,labels_num,conf_thr):
    list_res = []
    list_label_id = []
    for i in range(0,len(list_2)):
        if len(list_res)==0:
            if list_2[i][0] not in list_label_id:
                if list_2[i][1] >= conf_thr:
                    list_res.append(list_2[i])
                    list_label_id.append(list_2[i][0])
        else:
            if len(list_res)<labels_num:
                if list_2[i][0] not in list_label_id:
                    if list_2[i][1]>=conf_thr:
                        list_res.append(list_2[i])
                        list_label_id.append(list_2[i][0])
            else:
                break
    return list_res

def img_loader(img_path, input_h, input_w, batch_size, img_format):
    '''
    :brief:按batch加载图像，形成模型输入所需要的维度
    :param img_path: 图像路径
    :param input_h: 模型输入的高
    :param input_w: 模型输入的宽
    :param batch_size: 批大小
    :param img_format: 图像类型，png or jpg
    :return: 按照批次返回图像路径列表，原图列表，归一化后的图像列表，图像shape列表，图像缩放比例列表
    '''
    res_img_path = []
    res_img_path_temp = []
    # res_img = []
    # res_img_temp = []
    # res_img_loader = []
    # res_img_loader_temp = []
    res_im_shape = []
    res_im_shape_temp = []
    res_scale_factor = []
    res_scale_factor_temp = []
    img_path_list = []
    img_path_list = getFileList(img_path, img_path_list, img_format)
    count = 1
    flag = len(img_path_list)%batch_size
    for src_path in img_path_list:
        img = cv2.imread(src_path)
        img_copy = img.copy()
        image_h, image_w = img.shape[:2]
        ratio_h = input_h / image_h
        ratio_w = input_w / image_w
        im_shape = np.array([float(input_h), float(input_w)]).astype('float32')#待修改
        scale_factor = np.array([float(ratio_h), float(ratio_w)]).astype('float32')
        img = cv2.resize(img, (0, 0), fx=ratio_w, fy=ratio_h, interpolation=2)
        img = img[:, :, ::-1] / 255.0
        img = img.transpose(2, 0, 1)
        if count%batch_size!=0:
            if flag!=0 and src_path==img_path_list[-1]:
                for i in range(0,batch_size-flag+1):
                    #res_img_loader_temp.append(img)
                    res_im_shape_temp.append(im_shape)
                    res_scale_factor_temp.append(scale_factor)
                    #res_img_temp.append(img_copy)
                    res_img_path_temp.append(src_path)

                #res_img_loader.append(np.array(res_img_loader_temp).astype("float32"))
                res_im_shape.append(np.array(res_im_shape_temp).astype("float32"))
                res_scale_factor.append(np.array(res_scale_factor_temp).astype("float32"))
                #res_img.append(np.array(res_img_temp).astype("float32"))
                res_img_path.append(res_img_path_temp)
            else:
                #res_img_loader_temp.append(img)
                res_im_shape_temp.append(im_shape)
                res_scale_factor_temp.append(scale_factor)
                #res_img_temp.append(img_copy)
                res_img_path_temp.append(src_path)
        else:
            #res_img_loader_temp.append(img)
            res_im_shape_temp.append(im_shape)
            res_scale_factor_temp.append(scale_factor)
            #res_img_temp.append(img_copy)
            res_img_path_temp.append(src_path)

            #res_img_loader.append(np.array(res_img_loader_temp).astype("float32"))
            res_im_shape.append(np.array(res_im_shape_temp).astype("float32"))
            res_scale_factor.append(np.array(res_scale_factor_temp).astype("float32"))
            #res_img.append(np.array(res_img_temp).astype("float32"))
            res_img_path.append(res_img_path_temp)

            #res_img_loader_temp = []
            res_im_shape_temp = []
            res_scale_factor_temp = []
            #res_img_temp = []
            res_img_path_temp = []
        count = count + 1
        #np.array(res_img_loader).astype("float32"),,np.array(res_img).astype("float32")
    return res_img_path,np.array(res_im_shape).astype("float32"),np.array(res_scale_factor).astype("float32")

def load_img_based_img_path(res_img_path_batch,input_h,input_w):
    res_img_loader = []
    res_img = []
    start = time.time()
    for i in range(0,len(res_img_path_batch)):
        img = cv2.imread(res_img_path_batch[i])
        img_copy = img.copy()
        image_h, image_w = img.shape[:2]
        ratio_h = input_h / image_h
        ratio_w = input_w / image_w
        img = cv2.resize(img, (0, 0), fx=ratio_w, fy=ratio_h, interpolation=2)
        img = img[:, :, ::-1] / 255.0
        img = img.transpose(2, 0, 1)
        res_img_loader.append(img)
        res_img.append(img_copy)
    end = time.time()
    print("加载图像：",end-start)
    return np.array(res_img_loader).astype("float32"),np.array(res_img).astype("float32")#待修改


def infer(engine_path,img_path,out_path,num_rect,range_list,conf_thr,CLASS_NAMES,img_format="png"):
    '''
    :brief:预标注函数
    :param engine_path: 预标注engine模型路径
    :param img_path: 待标注图像路径
    :param xml_path: 生成的标注文件存放路径
    :param num_rect: 每张图最多绘制num_rect个框
    :param conf_thr: 低于conf_thr不画框
    :param CLASS_NAMES: 标注类别
    :param img_format:
    :return: 图像类型，png or jpg
    '''
    # if not os.path.exists(engine_path.replace(engine_path.split("\\")[-1].split(".e")[0],engine_path.split("\\")[-1].split(".e")[0] + "_1")):
    #     model_decoder(engine_path)
    # engine_path = engine_path.replace(engine_path.split("\\")[-1].split(".e")[0],engine_path.split("\\")[-1].split(".e")[0] + "_1")
    engine = load_engine(engine_path)
    #os.remove(engine_path)
    context = engine.create_execution_context()
    channel,batch_size,input_h,input_w = engine.get_binding_shape(1)[1],engine.get_binding_shape(1)[0],engine.get_binding_shape(1)[2],engine.get_binding_shape(1)[3]
    print(batch_size,input_h,input_w)
    # res_img_loader,res_img,
    print("数据加载中，请稍后")
    res_img_path,res_im_shape, res_scale_factor = img_loader(img_path,input_h,input_w,batch_size,img_format)
    print("数据加载完成，开始推理")
    for i in range(0,len(res_img_path)):
        res_img_loader ,res_img = load_img_based_img_path(res_img_path[i],input_h,input_w)
        res_img_loader = np.ascontiguousarray(res_img_loader, dtype=np.float32)
        inputs_alloc_buf, outputs_alloc_buf, bindings_alloc_buf, stream_alloc_buf = alloc_buf_N(engine, res_img_loader)
        inputs_alloc_buf[0].host = res_im_shape[i]
        inputs_alloc_buf[1].host = res_img_loader.reshape(-1)
        inputs_alloc_buf[2].host = res_scale_factor[i]
        start = time.time()
        net_output = do_inference_v2(context, inputs_alloc_buf, bindings_alloc_buf, outputs_alloc_buf, stream_alloc_buf,res_img_loader,batch_size)
        end = time.time()
        print("推理耗时：",end-start)
        print("----------------------------------------------------------------- ----------------------------------------")
        res = result_devide_for_batch(net_output[1],batch_size)
        for e in range(0,len(res)):
            res_cut = deal_rect_for_img(res[e],len(CLASS_NAMES),conf_thr)
            src_img = res_img[e].copy()

            for r in range(0,num_rect):
                if res[e][r][1]<conf_thr:
                    print(res[e][r][1], res_img_path[i][e])
                    continue
                x1, y1, x2, y2, label_id = int(res[e][r][2]), int(res[e][r][3]), int(res[e][r][4]), int(res[e][r][5]), int(res[e][r][0])
                label = CLASS_NAMES[int(label_id)]
                red,green,blue = random.randint(64,192),random.randint(64,192),random.randint(64,192)
                cv2.rectangle(res_img[e], (x1, y1), (x2, y2), (red, green, blue), 2)
                x_change = x1 - random.randint(1,10) if x1 - random.randint(1,10)>0 else x1
                y_change = y1 - random.randint(1,10) if y1 - random.randint(1,10)>0 else y1
                cv2.putText(res_img[e], str(label) + ": " + str(res[e][r][1])[0:4], (x_change, y_change),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (red, green, blue), 1)
            if len(res_cut)==0:
                no_labels = os.path.join(out_path,"no_labels")
                if not os.path.isdir(no_labels):
                    os.mkdir(no_labels)
                #cv2.imwrite(os.path.join(no_labels, res_img_path[i][e].split("\\")[-1]), src_img)
            for z in range(0,len(res_cut)):
                new_dir = os.path.join(out_path,CLASS_NAMES[int(res_cut[z][0])])
                if not os.path.isdir(new_dir):
                    os.mkdir(new_dir)
                new_dir = os.path.join(new_dir,range_list[int(10*res_cut[z][1])])
                if not os.path.isdir(new_dir):
                    os.mkdir(new_dir)
                # if flag_jl == True or flag_hd==True:
                #     cv2.imwrite(os.path.join(new_dir,res_img_path[i][e].split("\\")[-1].split(".pn")[0] + "_res.png"), res_img[e])
                #     cv2.imwrite(os.path.join(new_dir, res_img_path[i][e].split("\\")[-1]),src_img)
                cv2.imwrite(os.path.join(new_dir, res_img_path[i][e].split("\\")[-1].split(".pn")[0] + "_res.png"),res_img[e])
                #cv2.imwrite(os.path.join(new_dir, res_img_path[i][e].split("\\")[-1]), src_img)

def auto_label():
    CLASS_NAMES = ["class1","class2","class3"]
    range_list = ["0.0-0.1","0.1-0.2","0.2-0.3","0.3-0.4","0.4-0.5","0.5-0.6","0.6-0.7","0.7-0.8","0.8-0.9","0.9-1.0"]
    img_format = "jpg"
    conf_thr = 0.01 #阈值以下不存
    max_num_each_img = 30 #最多画几个框
    img_path = r"C:\Users\***\Desktop\test_pic_2" #待推理图像路径
    out_path = r"C:\Users\***\Desktop\test_pic_out" #输出路径
    #engine模型路径
    engine_path = r"./best_model.engine"
    if not os.path.exists(engine_path):
        print("模型路径不存在，请检查后重试")
        return
    if not os.path.isdir(img_path):
        print("图像路径不存在，请检查后重试")
        return
    infer(engine_path, img_path, out_path,max_num_each_img, range_list,conf_thr, CLASS_NAMES, img_format)


if __name__ == '__main__':
    auto_label()
代码存在重复读取数据的问题，推理速度被降低了一些，但整体不影响代码功能
代码还存在很多待优化的地方，欢迎大家提出自己的建议