Openvino部署YoloX：模型转换和int8量化

最新推荐文章于 2024-08-01 11:23:07 发布

一顿轩三碗

最新推荐文章于 2024-08-01 11:23:07 发布

阅读量570

点赞数 1

文章标签： openvino 人工智能

本文链接：https://blog.csdn.net/weixin_45156863/article/details/133715143

版权

1、项目结构

2、ONNX转OpenVino模型

mo --framework=onnx --data_type=FP32 --input_shape=[1,3,480,640] -m .\\pruned_model\\baseline_sim.onnx --output_dir .\\yolox_vino_model

将模型从onnx转到openvino支持的模型，

3、OpenVino量化

3.1、导入库

# %% 导入依赖库
import copy
import os
import time
import cv2
import torch
import numpy as np


from openvino.tools.pot.api import DataLoader, Metric
from openvino.tools.pot.engines.ie_engine import IEEngine
from openvino.tools.pot.graph import load_model, save_model
from openvino.tools.pot.graph.model_utils import compress_model_weights
from openvino.tools.pot.pipeline.initializer import create_pipeline
from PIL import Image
from openvino.runtime import Core
import addict
from pathlib import Path
from torchmetrics.detection.mean_ap import MeanAveragePrecision

3.2、定义工具函数

# %% 定义预处理和后处理函数
def sigmoid(x):
    return 1/(1+np.exp(-x))
def desigmoid(y):
    return -np.log(1 / y - 1)

def preprocess_input(image):
    image /= 255.0
    image -= np.array([0.485, 0.456, 0.406])
    image /= np.array([0.229, 0.224, 0.225])
    return image

conf_threshold = desigmoid(0.8)

3.3、定义量化过程中校准数据集的加载类

# %% 定义数据加载类

class YOLOXDataLoader(DataLoader):
    """ 
    Inherit from DataLoader function and implement for YOLOX.
    """

    def __init__(self, annotation_lines, input_shape, num_classes):
        # super(YOLOXDataLoader, self).__init__()
        self.annotation_lines = annotation_lines
        self.length = len(self.annotation_lines)

        self.input_shape = input_shape
        self.num_classes = num_classes

        self.step_now = -1

    def __len__(self):
        return self.length

    def __getitem__(self, index):
        index = index % self.length

        self.step_now += 1
        print(self.step_now)

        # ---------------------------------------------------#
        #   训练时进行数据的随机增强
        #   验证时不进行数据的随机增强
        # ---------------------------------------------------#

        image, box = self.get_random_data(self.annotation_lines[index], self.input_shape)
        image = np.transpose(preprocess_input(np.array(image, dtype=np.float32)), (2, 0, 1))
        image = np.expand_dims(image, axis=0).astype(np.float32)
        box = np.array(box, dtype=np.float32)
        if len(box) != 0:
            box[:, 2:4] = box[:, 2:4] - box[:, 0:2]
            box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2
        return image, box

    def rand(self, a=0, b=1):
        return np.random.rand() * (b - a) + a

    def get_random_data(self, annotation_line, input_shape):
        line = annotation_line.strip().split(' ')
        # ------------------------------#
        #   读取图像并转换成RGB图像
        # ------------------------------#
        image = Image.open(line[0])
        # image = cvtColor(image)
        # ------------------------------#
        #   获得图像的高宽与目标高宽
        # ------------------------------#
        iw, ih = image.size
        h, w = input_shape
        # ------------------------------#
        #   获得预测框
        # ------------------------------#
        # boxes = line[1].split(' ')
        boxes = line[1:]

        if len(boxes) == 0:
            a = 0
        box_info = []
        for box in boxes:
            try:
                if box != '':
                    info = list(map(float, box.split(',')))
                    box_info.append(info)
            except:
                return
        box = np.array(box_info)


        scale = min(w / iw, h / ih)
        nw = int(iw * scale)
        nh = int(ih * scale)
        dx = (w - nw) // 2
        dy = (h - nh) // 2

        # ---------------------------------#
        #   将图像多余的部分加上灰条
        # ---------------------------------#
        image = image.resize((nw, nh), Image.BICUBIC)
        new_image = Image.new('RGB', (w, h), (128, 128, 128))
        new_image.paste(image, (dx, dy))
        image_data = np.array(new_image, np.float32)

        # ---------------------------------#
        #   对真实框进行调整
        # ---------------------------------#
        if len(box) > 0:
            np.random.shuffle(box)
            box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
            box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
            box[:, 0:2][box[:, 0:2] < 0] = 0
            box[:, 2][box[:, 2] > w] = w
            box[:, 3][box[:, 3] > h] = h
            box_w = box[:, 2] - box[:, 0]
            box_h = box[:, 3] - box[:, 1]
            box = box[np.logical_and(box_w > 1, box_h > 1)]  # discard invalid box

        return image_data, box

3.4、定义测试指标类，用于评价量化后模型的性能

# %% 定义指标类Metric
class MAPMetric(Metric):
    def __init__(self, map_value="map"):
        """
        Mean Average Precision Metric. Wraps torchmetrics implementation, see
        https://torchmetrics.readthedocs.io/en/latest/references/modules.html#map

        :map_value: specific metric to return. Default: "map"
                    Change `to one of the values in the list below to return a different value
                    ['mar_1', 'mar_10', 'mar_100', 'mar_small', 'mar_medium', 'mar_large',
                     'map', 'map_50', 'map_75', 'map_small', 'map_medium', 'map_large']
                    See torchmetrics documentation for more details.
        """
        # assert (
        #     map_value
        #     in torchmetrics.detection.map.MARMetricResults.__slots__
        #     + torchmetrics.detection.map.MAPMetricResults.__slots__
        # )

        self._name = map_value

        self.metric = MeanAveragePrecision()
        super().__init__()

    @property
    def value(self):
        """
        Returns metric value for the last model output.
        Possible format: {metric_name: [metric_values_per_image]}
        """
        return {self._name: [0]}

    @property
    def avg_value(self):
        """
        Returns average metric value for all model outputs.
        Possible format: {metric_name: metric_value}
        """
        return {self._name: self.metric.compute()[self._name].item()}

    def update(self, output, target):
        """
        Convert network output and labels to the format that torchmetrics' MAP
        implementation expects, and call `metric.update()`.

        :param output: model output
        :param target: annotations for model output
        """
        targetboxes = []
        targetlabels = []
        predboxes = []
        predlabels = []
        scores = []

        for i in range(target[0].shape[0]):
            # [tx, ty, tw, th,category] = single_target
            [tx, ty, tw, th,category] = [target[0][i,0],target[0][i,1],target[0][i,2],target[0][i,3],target[0][i,4]]


            targetbox = [round(tx-tw/2), round(ty-th/2), round(tx+tw/2), round(ty+th/2)]
            targetboxes.append(targetbox)
            targetlabels.append(category)
        strides = [8,16,32]

        for k,single_output in enumerate(output):
            stride = strides[k]
            for i in range(int(60/2**k)):
                for j in range(int(80/2**k)):
                    isObj = single_output[0, 4, i, j]
                    if isObj > conf_threshold:
                        conf = np.max(single_output[0, 5:, i, j])*sigmoid(isObj)
                        if conf>0.5:
                            x = single_output[0, 0, i, j]
                            x += j
                            x *= stride
                            y = single_output[0, 1, i, j]
                            y += i
                            y *= stride
                            w = single_output[0, 2, i, j]
                            w = np.exp(w)
                            w *= stride
                            h = single_output[0, 3, i, j]
                            h = np.exp(h)
                            h *= stride
                            label = np.argmax(single_output[0, 5:, i, j])
                            predbox = [round(x-w/2), round(y-h/2), round(x+w/2), round(y+h/2)]
                            predboxes.append(predbox)
                            predlabels.append(label)
                            scores.append(conf)
        boxes_id = cv2.dnn.NMSBoxes(predboxes,scores,0.8,0.25)

        preds = [
            dict(
                boxes=torch.Tensor(np.array(predboxes)[boxes_id,...]).float(),
                labels=torch.Tensor(np.array(predlabels)[boxes_id,...]).short(),
                scores=torch.Tensor(np.array(scores)[boxes_id,...]),
            )
        ]
        targets = [
            dict(
                boxes=torch.Tensor(targetboxes).float(),
                labels=torch.Tensor(targetlabels).short(),
            )
        ]
        self.metric.update(preds, targets)
        # return self.metric.

    def reset(self):
        """
        Resets metric
        """
        self.metric.reset()

    def get_attributes(self):
        """
        Returns a dictionary of metric attributes {metric_name: {attribute_name: value}}.
        Required attributes: 'direction': 'higher-better' or 'higher-worse'
                             'type': metric type
        """
        return {self._name: {"direction": "higher-better", "type": "mAP"}}

3.5、配置量化参数

# %% 设置POT的量化参数
ir_path = Path("./yolox_vino_model/baseline_sim.xml")
model_config = addict.Dict(
    {
        "model_name": ir_path.stem,
        "model": ir_path,
        "weights": ir_path.with_suffix(".bin"),
    }
)

# Engine config
engine_config = addict.Dict({"device": "CPU"})

# 标准DefaultQuantization配置。对于本教程，stat_subset_size将被忽略
# 因为只有不到300张图片。对于生产使用，建议使用300。
default_algorithms = [
    {
        "name": "DefaultQuantization",
        # "name": "AccuracyAwareQuantization",
        "stat_subset_size": 300,
        "params": {
            "target_device": "ANY",
            "preset": "mixed",  # Choose between "mixed" and "performance".
        },
    }
]

print(f"model_config: {model_config}")

3.6、定义量化管线并运行

# %% 定义并运行量化流水线
# 步骤1:创建数据加载器
annotation_path = "./2007_val.txt"
# ----------------------------------------------------------------------#
#   验证集的划分在train.py代码里面进行
#   2007_test.txt和2007_val.txt里面没有内容是正常的。训练不会使用到。
#   当前划分方式下，验证集和训练集的比例为1:9
# ----------------------------------------------------------------------#
val_split = 0.1
with open(annotation_path, encoding='utf-8') as f:
    lines = f.readlines()
np.random.seed(10101)
np.random.shuffle(lines)
np.random.seed(None)
data_loader = YOLOXDataLoader(annotation_lines=lines[0:2000], input_shape=[480, 640], num_classes=5)

# 步骤2:加载模型。
ir_model = load_model(model_config=model_config)
original_model = copy.deepcopy(ir_model)

# 步骤3:初始化度量。对于defaultquantiization，
# 指定度量值是可选的:metric可以设置为None。
metric = MAPMetric(map_value="map")

# 步骤4:初始化引擎进行度量计算和统计信息收集.
engine = IEEngine(config=engine_config, data_loader=data_loader, metric=None)

# 步骤5:创建压缩算法的管道。
#  default_algorithms '参数在该单元格上面的Config单元格中定义。
pipeline = create_pipeline(default_algorithms, engine)

# 步骤6:执行流水线以量化模型
algorithm_name = pipeline.algo_seq[0].name
compressed_model = pipeline.run(ir_model)

# 步骤7(可选):将模型权重压缩到量化精度
# 为了减小最终的“.bin”文件的大小。
compress_model_weights(compressed_model)

# 步骤8:将压缩后的模型保存到所需的路径。
# 将' save_path '设置为应该存储压缩模型的目录。
preset = pipeline._algo_seq[0].config["preset"]
algorithm_name = pipeline.algo_seq[0].name
compressed_model_paths = save_model(
    model=compressed_model,
    save_path="./optimized_model",
    model_name=f"{ir_model.name}_{preset}_{algorithm_name}",
)

# 步骤9 (可选): 评估原始模型和压缩模型，并打印结果
original_metric_results = pipeline.evaluate(original_model)
if original_metric_results:
    print(f"Accuracy of the original model:  {next(iter(original_metric_results.values())):.5f}")

quantized_metric_results = pipeline.evaluate(compressed_model)
if quantized_metric_results:
    print(f"Accuracy of the quantized model: {next(iter(quantized_metric_results.values())):.5f}")


compressed_model_path = compressed_model_paths[0]["model"]
print("The quantized model is stored at", compressed_model_path)

3.7、量化结果

根据量化算法配置生成了三个模型相关的文件，模型的权重文件(bin)下降为约原始模型的1/4，模型拓扑结构(xml)大小有所上升

4、模型测试

4.1、测试代码

# %% 导入依赖库
import os
import time
import cv2
import torch
import numpy as np
from openvino.tools.pot.api import DataLoader, Metric
from openvino.tools.pot.engines.ie_engine import IEEngine
from openvino.tools.pot.graph import load_model, save_model
from openvino.tools.pot.graph.model_utils import compress_model_weights
from openvino.tools.pot.pipeline.initializer import create_pipeline
from PIL import Image
from openvino.runtime import Core
import torchmetrics
import addict
from pathlib import Path
from torchmetrics.detection.mean_ap import MeanAveragePrecision




# %% Load the network in Inference Engine
ie = Core()
# model_ir = ie.read_model(model="./yolox_vino_model/baseline.xml")
# model_ir = ie.read_model(model="./yolox_vino_model/prune_iter2.xml")
model_ir = ie.read_model(model="./optimized_model/baseline_sim_mixed_DefaultQuantization.xml")

compiled_model_ir = ie.compile_model(model=model_ir, device_name="CPU")
inputs_names = compiled_model_ir.inputs
outputs_names = compiled_model_ir.outputs

print(inputs_names)
print(outputs_names)


class MAPMetric(Metric):
    def __init__(self, map_value="map"):
        """
        Mean Average Precision Metric. Wraps torchmetrics implementation, see
        https://torchmetrics.readthedocs.io/en/latest/references/modules.html#map

        :map_value: specific metric to return. Default: "map"
                    Change `to one of the values in the list below to return a different value
                    ['mar_1', 'mar_10', 'mar_100', 'mar_small', 'mar_medium', 'mar_large',
                     'map', 'map_50', 'map_75', 'map_small', 'map_medium', 'map_large']
                    See torchmetrics documentation for more details.
        """
        # assert (
        #     map_value
        #     in torchmetrics.detection.map.MARMetricResults.__slots__
        #     + torchmetrics.detection.map.MAPMetricResults.__slots__
        # )

        self._name = map_value

        self.metric = MeanAveragePrecision()
        super().__init__()

    @property
    def value(self):
        """
        Returns metric value for the last model output.
        Possible format: {metric_name: [metric_values_per_image]}
        """
        return {self._name: [0]}

    @property
    def avg_value(self):
        """
        Returns average metric value for all model outputs.
        Possible format: {metric_name: metric_value}
        """
        return {self._name: self.metric.compute()[self._name].item()}

    def update(self, output, target):
        """
        Convert network output and labels to the format that torchmetrics' MAP
        implementation expects, and call `metric.update()`.

        :param output: model output
        :param target: annotations for model output
        """
        targetboxes = []
        targetlabels = []
        predboxes = []
        predlabels = []
        scores = []

        # image_width = target[0][3]
        # image_height = target[0][2]

        for i in range(target.shape[0]):
            [tx, ty, tw, th,category] = [target[i,0],target[i,1],target[i,2],target[i,3],target[i,4]]

            targetbox = [round(tx-tw/2), round(ty-th/2), round(tx+tw/2), round(ty+th/2)]
            targetboxes.append(targetbox)
            targetlabels.append(category)
        strides = [8,16,32]
        for k,single_output in enumerate(output):
            stride = strides[k]
            for i in range(int(60/2**k)):
                for j in range(int(80/2**k)):
                    isObj = single_output[0, 4, i, j]
                    if isObj > conf_threshold:
                        conf = np.max(single_output[0, 5:, i, j])*sigmoid(isObj)
                        if conf>0.5:
                            x = single_output[0, 0, i, j]
                            x += j
                            x *= stride
                            y = single_output[0, 1, i, j]
                            y += i
                            y *= stride
                            w = single_output[0, 2, i, j]
                            w = np.exp(w)
                            w *= stride
                            h = single_output[0, 3, i, j]
                            h = np.exp(h)
                            h *= stride
                            label = np.argmax(single_output[0, 5:, i, j])
                            predbox = [round(x-w/2), round(y-h/2), round(x+w/2), round(y+h/2)]
                            predboxes.append(predbox)
                            predlabels.append(label)
                            scores.append(conf)
        boxes_id = cv2.dnn.NMSBoxes(predboxes,scores,0.5,0.25)
        # print(boxes_id)


            # for pred in single_output[0, 0, ::]:
            #     image_id, label, conf, xmin, ymin, xmax, ymax = pred
            #     xmin *= image_width
            #     xmax *= image_width
            #     ymin *= image_height
            #     ymax *= image_height



        preds = [
            dict(
                boxes=torch.Tensor(np.array(predboxes)[boxes_id,...]).float(),
                labels=torch.Tensor(np.array(predlabels)[boxes_id,...]).short(),
                scores=torch.Tensor(np.array(scores)[boxes_id,...]),
            )
        ]
        targets = [
            dict(
                boxes=torch.Tensor(targetboxes).float(),
                labels=torch.Tensor(targetlabels).short(),
            )
        ]
        self.metric.update(preds, targets)

    def reset(self):
        """
        Resets metric
        """
        self.metric.reset()

    def get_attributes(self):
        """
        Returns a dictionary of metric attributes {metric_name: {attribute_name: value}}.
        Required attributes: 'direction': 'higher-better' or 'higher-worse'
                             'type': metric type
        """
        return {self._name: {"direction": "higher-better", "type": "mAP"}}

class YOLOXDataLoader(DataLoader):
    """ 
    Inherit from DataLoader function and implement for YOLOX.
    """

    def __init__(self, annotation_lines, input_shape, num_classes):
        # super(YOLOXDataLoader, self).__init__()
        self.annotation_lines = annotation_lines
        self.length = len(self.annotation_lines)

        self.input_shape = input_shape
        self.num_classes = num_classes

        self.step_now = -1

    def __len__(self):
        return self.length

    def __getitem__(self, index):
        index = index % self.length

        self.step_now += 1
        # print(self.step_now)

        # ---------------------------------------------------#
        #   训练时进行数据的随机增强
        #   验证时不进行数据的随机增强
        # ---------------------------------------------------#

        image, box = self.get_random_data(self.annotation_lines[index], self.input_shape)
        image = np.transpose(preprocess_input(np.array(image, dtype=np.float32)), (2, 0, 1))
        image = np.expand_dims(image, axis=0).astype(np.float32)
        box = np.array(box, dtype=np.float32)
        if len(box) != 0:
            box[:, 2:4] = box[:, 2:4] - box[:, 0:2]
            box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2
        return image, box

    def rand(self, a=0, b=1):
        return np.random.rand() * (b - a) + a

    def get_random_data(self, annotation_line, input_shape):
        line = annotation_line.strip().split(' ')
        # ------------------------------#
        #   读取图像并转换成RGB图像
        # ------------------------------#
        image = Image.open(line[0])
        # image = cvtColor(image)
        # ------------------------------#
        #   获得图像的高宽与目标高宽
        # ------------------------------#
        iw, ih = image.size
        h, w = input_shape
        # ------------------------------#
        #   获得预测框
        # ------------------------------#
        # boxes = line[1].split(' ')
        boxes = line[1:]

        if len(boxes) == 0:
            a = 0
        box_info = []
        for box in boxes:
            try:
                if box != '':
                    info = list(map(float, box.split(',')))
                    box_info.append(info)
            except:

                return

        # box = np.array([np.array(list(map(float, box.split(',')))) for box in line[1:]])  # 除去第一位的图片位置信息
        box = np.array(box_info)


        scale = min(w / iw, h / ih)
        nw = int(iw * scale)
        nh = int(ih * scale)
        dx = (w - nw) // 2
        dy = (h - nh) // 2

        # ---------------------------------#
        #   将图像多余的部分加上灰条
        # ---------------------------------#
        image = image.resize((nw, nh), Image.BICUBIC)
        new_image = Image.new('RGB', (w, h), (128, 128, 128))
        new_image.paste(image, (dx, dy))
        image_data = np.array(new_image, np.float32)

        # ---------------------------------#
        #   对真实框进行调整
        # ---------------------------------#
        if len(box) > 0:
            np.random.shuffle(box)
            box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
            box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
            box[:, 0:2][box[:, 0:2] < 0] = 0
            box[:, 2][box[:, 2] > w] = w
            box[:, 3][box[:, 3] > h] = h
            box_w = box[:, 2] - box[:, 0]
            box_h = box[:, 3] - box[:, 1]
            box = box[np.logical_and(box_w > 1, box_h > 1)]  # discard invalid box

        return image_data, box

def preprocess_input(image):
    image /= 255.0
    image -= np.array([0.485, 0.456, 0.406])
    image /= np.array([0.229, 0.224, 0.225])

    return image

def de_reprocess_input(image):
    img = image*np.array([0.229, 0.224, 0.225])
    img += np.array([[0.485, 0.456, 0.406]])

    img *= 255.0

    return img

def sigmoid(x):
    if x>=0: #对sigmoid函数优化，避免出现极大的数据溢出
        return 1.0 / (1 + np.exp(-x))
    else:
        return np.exp(x)/(1+np.exp(x))


def desigmoid(y):
    return -np.log(1 / y - 1)



# %% pre inference
path = r"D:\WorkSpace\graduate-student\study\graduation_study\codes\3-yolox-quantization\openvino-int8-quantization\dataset\images"
files = os.listdir(path)
np.random.seed(1000)
np.random.shuffle(files)
np.random.seed(None)
conf_threshold = desigmoid(0.5)
print(conf_threshold)
request = compiled_model_ir.create_infer_request()


# 步骤1:创建数据加载器
annotation_path = "./2007_train.txt"
# ----------------------------------------------------------------------#
#   验证集的划分在train.py代码里面进行
#   2007_test.txt和2007_val.txt里面没有内容是正常的。训练不会使用到。
#   当前划分方式下，验证集和训练集的比例为1:9
# ----------------------------------------------------------------------#
val_split = 0.1
with open(annotation_path, encoding='utf-8') as f:
    lines = f.readlines()
np.random.seed(10101)
np.random.shuffle(lines)
np.random.seed(None)
data_loader = YOLOXDataLoader(annotation_lines=lines[0:2000], input_shape=[480, 640], num_classes=5)
metric = MAPMetric()
total_t = 0
total_infer_time = 0
total_decode_time = 0
for i in range(len(data_loader)):#
    img,box = data_loader[i]
    empty_img =         np.array(de_reprocess_input( np.transpose(np.squeeze(img,axis=0),(1, 2, 0))),dtype=np.uint8)

    input_img = img.astype(np.float32)


    time_start = time.time()
    request.infer(inputs={inputs_names[0]: input_img})
    time_infer = time.time()

    rects = []
    labels = []
    confs = []
    strides = [8,16,32]
    for k in range(3):
        result = request.get_output_tensor(k).data
        stride = strides[k]
        for i in range(result.shape[2]):
            for j in range(result.shape[3]):
                isObj = result[0, 4, i, j]
                if isObj > 0:
                    max1 = np.max(result[0, 5:, i, j])
                    # print(max1)
                    conf = sigmoid(isObj)*sigmoid(max1)
                    if conf > 0.1:
                        # print(result[0, 5:, i, j])
                        label = np.argmax(result[0, 5:, i, j])

                        x = result[0, 0, i, j]
                        x += j
                        x *= stride
                        y = result[0, 1, i, j]
                        y += i
                        y *= stride
                        w = result[0, 2, i, j]
                        w = np.exp(w)
                        w *= stride
                        h = result[0, 3, i, j]
                        h = np.exp(h)
                        h *= stride

                        x1 = int(max(x - w / 2, 0))
                        y1 = int(max(y - h / 2, 0))
                        x2 = int(max(x + w / 2, 0))
                        y2 = int(max(y + h / 2, 0))
                        rects.append([x1, y1, x2, y2])
                        labels.append(label)
                        confs.append(conf)    


    boxes_ids = cv2.dnn.NMSBoxes(rects,confs,0.2,0.1)
    time_decode = time.time()
    pred = [request.get_output_tensor(0).data,request.get_output_tensor(1).data,request.get_output_tensor(2).data]
    total_t += time_decode-time_start
    total_infer_time += time_infer-time_start
    total_decode_time += time_decode-time_infer
    metric.update(pred,box)
    # print(1/t)
    cv2.putText(empty_img, "fps:{0:.3f}".format(1 / (time_decode-time_start)), (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    for boxes_id in boxes_ids:
        x1 = rects[boxes_id][0]
        y1 = rects[boxes_id][1]
        x2 = rects[boxes_id][2]
        y2 = rects[boxes_id][3]
        label = labels[boxes_id]
        conf = confs[boxes_id]
        cv2.rectangle(empty_img, (x1, y1), (x2, y2), (0, 255, 0))

        cv2.putText(empty_img,"{0}:{1:.3f}".format(label,conf),(x1, y1),cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.imshow("img", empty_img)
    # print(result.shape)
    cv2.waitKey(1)
print("mean fps:{}".format(1/total_t*len(data_loader)))
print("mean infer time:{}".format(total_infer_time/len(data_loader)*1000))
print("mean decode time:{}".format(total_decode_time/len(data_loader)*1000))

print("data:{}".format(metric.metric.compute()))
cv2.waitKey(0)

cv2.destroyAllWindows()