1、静态量化
import os
import numpy as np
import time
from PIL import Image
import onnxruntime
from onnxruntime.quantization import quantize_static, CalibrationDataReader, QuantFormat, QuantType
class DataReader(CalibrationDataReader):
def __init__(self, calibration_image_folder, augmented_model_path=None):
self.image_folder = calibration_image_folder
self.augmented_model_path = augmented_model_path
self.preprocess_flag = True
self.enum_data_dicts = []
self.datasize = 0
def get_next(self):
if self.preprocess_flag:
self.preprocess_flag = False
session = onnxruntime.InferenceSession(self.augmented_model_path, None)
(_,_,height, width) = session.get_inputs()[0].shape
nhwc_data_list = proprocess_func(self.image_folder, height, width, size_limit=0)
input_name = session.get_inputs()[0].name
self.datasize = len(nhwc_data_list)
self.enum_data_dicts = iter([{input_name: nhwc_data} for nhwc_data in nhwc_data_list])
return next(self.enum_data_dicts, None)
def proprocess_func(images_folder, height, width, size_limit=0):
image_names = os.listdir(images_folder)
if size_limit > 0 and len(image_names) >= size_limit:
batch_filenames = [image_names[i] for i in range(size_limit)]
else:
batch_filenames = image_names
unconcatenated_batch_data = []
for image_name in batch_filenames:
# print(image_name)
image_filepath = images_folder + '/' + image_name
pillow_img = Image.new("RGB", (width, height))
pillow_img.paste(Image.open(image_filepath).resize((width, height)))
pillow_img.resize((640,640), 0)
input_data = np.float32(pillow_img)/255.
input_data = input_data[np.newaxis, :, :]
input_data=input_data.transpose(0,3,1,2)
input_data = np.array(input_data, dtype=np.float32)
unconcatenated_batch_data.append(input_data)
batch_data = np.concatenate(np.expand_dims(unconcatenated_batch_data, axis=0), axis=0)
return batch_data
def benchmark(model_path):
"""
用于测试速度
"""
session = onnxruntime.InferenceSession(model_path)
input_name = session.get_inputs()[0].name
total = 0.0
runs = 10
input_data = np.zeros((1,3,640,640), np.float32) # 随便输入一个假数据
# warming up
_ = session.run([], {input_name: input_data})
for i in range(runs):
start = time.perf_counter()
_ = session.run([], {input_name: input_data})
end = (time.perf_counter() - start) * 1000
total += end
print(f"{end:.2f}ms")
total /= runs
print(f"Avg: {total:.2f}ms")
def main():
input_model_path = 'C:/ken/c++/models1/yolov5s-sim.onnx' # 输入onnx模型
output_model_path = 'C:/ken/c++/models1/yolov5s_int8q_jingtai_1.onnx' # 输出模型名
calibration_dataset_path = 'C:/ken/img/JPEGImages' # 校准数据集图像地址
# 用于校准数据加载,注意这个方法里面需要做图像一些操作,与pytorch训练的时候加载数据操作一致
dr = DataReader(calibration_dataset_path, input_model_path)
# 开始量化
quantize_static(input_model_path,
output_model_path,
dr,
quant_format=QuantFormat.QDQ,
activation_type=QuantType.QUInt8,
extra_options={'AddQDQPairToWeight':True,
'DedicatedQDQPair':True},
weight_type=QuantType.QUInt8)
print("量化完成")
print("float32测试")
benchmark(input_model_path)
print("int8测试")
benchmark(output_model_path)
extra_options这个选项可以去掉。
2、动态量化
from onnxruntime.quantization import QuantType, quantize_dynamic
# 模型路径
model_fp32 = 'C:/ken/c++/models1/yolov5s-sim.onnx'
model_quant_dynamic = 'C:/ken/c++/models1/yolov5s_int8q_dongtai.onnx'
# 动态量化
quantize_dynamic(
model_input=model_fp32, # 输入模型
model_output=model_quant_dynamic, # 输出模型
weight_type=QuantType.QUInt8, # 参数类型 Int8 / UInt8
)