onxxruntime量化模型

本文介绍了如何使用ONNXRuntime进行模型的静态量化,包括创建CalibrationDataReader,预处理数据,以及量化模型的过程。同时,展示了动态量化的使用方法。最后,通过benchmark函数测试了量化前后模型的速度差异。
摘要由CSDN通过智能技术生成

1、静态量化

import os
import numpy as np
import time
from PIL import Image
import onnxruntime
from onnxruntime.quantization import quantize_static, CalibrationDataReader, QuantFormat, QuantType


class DataReader(CalibrationDataReader):
    def __init__(self, calibration_image_folder, augmented_model_path=None):
        self.image_folder = calibration_image_folder
        self.augmented_model_path = augmented_model_path
        self.preprocess_flag = True
        self.enum_data_dicts = []
        self.datasize = 0

    def get_next(self):
        if self.preprocess_flag:
            self.preprocess_flag = False
            session = onnxruntime.InferenceSession(self.augmented_model_path, None)
            (_,_,height, width) = session.get_inputs()[0].shape
            nhwc_data_list = proprocess_func(self.image_folder, height, width, size_limit=0)
            input_name = session.get_inputs()[0].name
            self.datasize = len(nhwc_data_list)
            self.enum_data_dicts = iter([{input_name: nhwc_data} for nhwc_data in nhwc_data_list])
        return next(self.enum_data_dicts, None)


def proprocess_func(images_folder, height, width, size_limit=0):
    image_names = os.listdir(images_folder)
    if size_limit > 0 and len(image_names) >= size_limit:
        batch_filenames = [image_names[i] for i in range(size_limit)]
    else:
        batch_filenames = image_names
    unconcatenated_batch_data = []
    for image_name in batch_filenames:
        # print(image_name)
        image_filepath = images_folder + '/' + image_name
        pillow_img = Image.new("RGB", (width, height))
        pillow_img.paste(Image.open(image_filepath).resize((width, height)))
        pillow_img.resize((640,640), 0)
        input_data = np.float32(pillow_img)/255.
        input_data = input_data[np.newaxis, :, :]
        input_data=input_data.transpose(0,3,1,2)
        input_data = np.array(input_data, dtype=np.float32)
        unconcatenated_batch_data.append(input_data)
    batch_data = np.concatenate(np.expand_dims(unconcatenated_batch_data, axis=0), axis=0)
    return batch_data

def benchmark(model_path):
    """
    用于测试速度
    """
    session = onnxruntime.InferenceSession(model_path)
    input_name = session.get_inputs()[0].name

    total = 0.0
    runs = 10
    input_data = np.zeros((1,3,640,640), np.float32)  # 随便输入一个假数据
    # warming up
    _ = session.run([], {input_name: input_data})
    for i in range(runs):
        start = time.perf_counter()
        _ = session.run([], {input_name: input_data})
        end = (time.perf_counter() - start) * 1000
        total += end
        print(f"{end:.2f}ms")
    total /= runs
    print(f"Avg: {total:.2f}ms")


def main():
    input_model_path = 'C:/ken/c++/models1/yolov5s-sim.onnx'  # 输入onnx模型
    output_model_path = 'C:/ken/c++/models1/yolov5s_int8q_jingtai_1.onnx'  # 输出模型名
    calibration_dataset_path = 'C:/ken/img/JPEGImages'  # 校准数据集图像地址
    # 用于校准数据加载,注意这个方法里面需要做图像一些操作,与pytorch训练的时候加载数据操作一致
    dr = DataReader(calibration_dataset_path, input_model_path)  
    # 开始量化
    quantize_static(input_model_path,
                    output_model_path,
                    dr,
                    quant_format=QuantFormat.QDQ,
                    activation_type=QuantType.QUInt8,
                    extra_options={'AddQDQPairToWeight':True,
                                   'DedicatedQDQPair':True},
                    weight_type=QuantType.QUInt8)
    print("量化完成")
    print("float32测试")
    benchmark(input_model_path)
    print("int8测试")
    benchmark(output_model_path)

extra_options这个选项可以去掉。

2、动态量化

from onnxruntime.quantization import QuantType, quantize_dynamic
 
# 模型路径
model_fp32 = 'C:/ken/c++/models1/yolov5s-sim.onnx'
model_quant_dynamic = 'C:/ken/c++/models1/yolov5s_int8q_dongtai.onnx'
 
# 动态量化
quantize_dynamic(
    model_input=model_fp32, # 输入模型
    model_output=model_quant_dynamic, # 输出模型
    weight_type=QuantType.QUInt8, # 参数类型 Int8 / UInt8
)

 

  • 6
    点赞
  • 12
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值