1、项目结构
2、ONNX转OpenVino模型
mo --framework=onnx --data_type=FP32 --input_shape=[1,3,480,640] -m .\\pruned_model\\baseline_sim.onnx --output_dir .\\yolox_vino_model
将模型从onnx转到openvino支持的模型,
3、OpenVino量化
3.1、导入库
# %% 导入依赖库
import copy
import os
import time
import cv2
import torch
import numpy as np
from openvino.tools.pot.api import DataLoader, Metric
from openvino.tools.pot.engines.ie_engine import IEEngine
from openvino.tools.pot.graph import load_model, save_model
from openvino.tools.pot.graph.model_utils import compress_model_weights
from openvino.tools.pot.pipeline.initializer import create_pipeline
from PIL import Image
from openvino.runtime import Core
import addict
from pathlib import Path
from torchmetrics.detection.mean_ap import MeanAveragePrecision
3.2、定义工具函数
# %% 定义预处理和后处理函数
def sigmoid(x):
return 1/(1+np.exp(-x))
def desigmoid(y):
return -np.log(1 / y - 1)
def preprocess_input(image):
image /= 255.0
image -= np.array([0.485, 0.456, 0.406])
image /= np.array([0.229, 0.224, 0.225])
return image
conf_threshold = desigmoid(0.8)
3.3、定义量化过程中校准数据集的加载类
# %% 定义数据加载类
class YOLOXDataLoader(DataLoader):
"""
Inherit from DataLoader function and implement for YOLOX.
"""
def __init__(self, annotation_lines, input_shape, num_classes):
# super(YOLOXDataLoader, self).__init__()
self.annotation_lines = annotation_lines
self.length = len(self.annotation_lines)
self.input_shape = input_shape
self.num_classes = num_classes
self.step_now = -1
def __len__(self):
return self.length
def __getitem__(self, index):
index = index % self.length
self.step_now += 1
print(self.step_now)
# ---------------------------------------------------#
# 训练时进行数据的随机增强
# 验证时不进行数据的随机增强
# ---------------------------------------------------#
image, box = self.get_random_data(self.annotation_lines[index], self.input_shape)
image = np.transpose(preprocess_input(np.array(image, dtype=np.float32)), (2, 0, 1))
image = np.expand_dims(image, axis=0).astype(np.float32)
box = np.array(box, dtype=np.float32)
if len(box) != 0:
box[:, 2:4] = box[:, 2:4] - box[:, 0:2]
box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2
return image, box
def rand(self, a=0, b=1):
return np.random.rand() * (b - a) + a
def get_random_data(self, annotation_line, input_shape):
line = annotation_line.strip().split(' ')
# ------------------------------#
# 读取图像并转换成RGB图像
# ------------------------------#
image = Image.open(line[0])
# image = cvtColor(image)
# ------------------------------#
# 获得图像的高宽与目标高宽
# ------------------------------#
iw, ih = image.size
h, w = input_shape
# ------------------------------#
# 获得预测框
# ------------------------------#
# boxes = line[1].split(' ')
boxes = line[1:]
if len(boxes) == 0:
a = 0
box_info = []
for box in boxes:
try:
if box != '':
info = list(map(float, box.split(',')))
box_info.append(info)
except:
return
box = np.array(box_info)
scale = min(w / iw, h / ih)
nw = int(iw * scale)
nh = int(ih * scale)
dx = (w - nw) // 2
dy = (h - nh) // 2
# ---------------------------------#
# 将图像多余的部分加上灰条
# ---------------------------------#
image = image.resize((nw, nh), Image.BICUBIC)
new_image = Image.new('RGB', (w, h), (128, 128, 128))
new_image.paste(image, (dx, dy))
image_data = np.array(new_image, np.float32)
# ---------------------------------#
# 对真实框进行调整
# ---------------------------------#
if len(box) > 0:
np.random.shuffle(box)
box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
box[:, 0:2][box[:, 0:2] < 0] = 0
box[:, 2][box[:, 2] > w] = w
box[:, 3][box[:, 3] > h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w > 1, box_h > 1)] # discard invalid box
return image_data, box
3.4、定义测试指标类,用于评价量化后模型的性能
# %% 定义指标类Metric
class MAPMetric(Metric):
def __init__(self, map_value="map"):
"""
Mean Average Precision Metric. Wraps torchmetrics implementation, see
https://torchmetrics.readthedocs.io/en/latest/references/modules.html#map
:map_value: specific metric to return. Default: "map"
Change `to one of the values in the list below to return a different value
['mar_1', 'mar_10', 'mar_100', 'mar_small', 'mar_medium', 'mar_large',
'map', 'map_50', 'map_75', 'map_small', 'map_medium', 'map_large']
See torchmetrics documentation for more details.
"""
# assert (
# map_value
# in torchmetrics.detection.map.MARMetricResults.__slots__
# + torchmetrics.detection.map.MAPMetricResults.__slots__
# )
self._name = map_value
self.metric = MeanAveragePrecision()
super().__init__()
@property
def value(self):
"""
Returns metric value for the last model output.
Possible format: {metric_name: [metric_values_per_image]}
"""
return {self._name: [0]}
@property
def avg_value(self):
"""
Returns average metric value for all model outputs.
Possible format: {metric_name: metric_value}
"""
return {self._name: self.metric.compute()[self._name].item()}
def update(self, output, target):
"""
Convert network output and labels to the format that torchmetrics' MAP
implementation expects, and call `metric.update()`.
:param output: model output
:param target: annotations for model output
"""
targetboxes = []
targetlabels = []
predboxes = []
predlabels = []
scores = []
for i in range(target[0].shape[0]):
# [tx, ty, tw, th,category] = single_target
[tx, ty, tw, th,category] = [target[0][i,0],target[0][i,1],target[0][i,2],target[0][i,3],target[0][i,4]]
targetbox = [round(tx-tw/2), round(ty-th/2), round(tx+tw/2), round(ty+th/2)]
targetboxes.append(targetbox)
targetlabels.append(category)
strides = [8,16,32]
for k,single_output in enumerate(output):
stride = strides[k]
for i in range(int(60/2**k)):
for j in range(int(80/2**k)):
isObj = single_output[0, 4, i, j]
if isObj > conf_threshold:
conf = np.max(single_output[0, 5:, i, j])*sigmoid(isObj)
if conf>0.5:
x = single_output[0, 0, i, j]
x += j
x *= stride
y = single_output[0, 1, i, j]
y += i
y *= stride
w = single_output[0, 2, i, j]
w = np.exp(w)
w *= stride
h = single_output[0, 3, i, j]
h = np.exp(h)
h *= stride
label = np.argmax(single_output[0, 5:, i, j])
predbox = [round(x-w/2), round(y-h/2), round(x+w/2), round(y+h/2)]
predboxes.append(predbox)
predlabels.append(label)
scores.append(conf)
boxes_id = cv2.dnn.NMSBoxes(predboxes,scores,0.8,0.25)
preds = [
dict(
boxes=torch.Tensor(np.array(predboxes)[boxes_id,...]).float(),
labels=torch.Tensor(np.array(predlabels)[boxes_id,...]).short(),
scores=torch.Tensor(np.array(scores)[boxes_id,...]),
)
]
targets = [
dict(
boxes=torch.Tensor(targetboxes).float(),
labels=torch.Tensor(targetlabels).short(),
)
]
self.metric.update(preds, targets)
# return self.metric.
def reset(self):
"""
Resets metric
"""
self.metric.reset()
def get_attributes(self):
"""
Returns a dictionary of metric attributes {metric_name: {attribute_name: value}}.
Required attributes: 'direction': 'higher-better' or 'higher-worse'
'type': metric type
"""
return {self._name: {"direction": "higher-better", "type": "mAP"}}
3.5、配置量化参数
# %% 设置POT的量化参数
ir_path = Path("./yolox_vino_model/baseline_sim.xml")
model_config = addict.Dict(
{
"model_name": ir_path.stem,
"model": ir_path,
"weights": ir_path.with_suffix(".bin"),
}
)
# Engine config
engine_config = addict.Dict({"device": "CPU"})
# 标准DefaultQuantization配置。对于本教程,stat_subset_size将被忽略
# 因为只有不到300张图片。对于生产使用,建议使用300。
default_algorithms = [
{
"name": "DefaultQuantization",
# "name": "AccuracyAwareQuantization",
"stat_subset_size": 300,
"params": {
"target_device": "ANY",
"preset": "mixed", # Choose between "mixed" and "performance".
},
}
]
print(f"model_config: {model_config}")
3.6、定义量化管线并运行
# %% 定义并运行量化流水线
# 步骤1:创建数据加载器
annotation_path = "./2007_val.txt"
# ----------------------------------------------------------------------#
# 验证集的划分在train.py代码里面进行
# 2007_test.txt和2007_val.txt里面没有内容是正常的。训练不会使用到。
# 当前划分方式下,验证集和训练集的比例为1:9
# ----------------------------------------------------------------------#
val_split = 0.1
with open(annotation_path, encoding='utf-8') as f:
lines = f.readlines()
np.random.seed(10101)
np.random.shuffle(lines)
np.random.seed(None)
data_loader = YOLOXDataLoader(annotation_lines=lines[0:2000], input_shape=[480, 640], num_classes=5)
# 步骤2:加载模型。
ir_model = load_model(model_config=model_config)
original_model = copy.deepcopy(ir_model)
# 步骤3:初始化度量。对于defaultquantiization,
# 指定度量值是可选的:metric可以设置为None。
metric = MAPMetric(map_value="map")
# 步骤4:初始化引擎进行度量计算和统计信息收集.
engine = IEEngine(config=engine_config, data_loader=data_loader, metric=None)
# 步骤5:创建压缩算法的管道。
# default_algorithms '参数在该单元格上面的Config单元格中定义。
pipeline = create_pipeline(default_algorithms, engine)
# 步骤6:执行流水线以量化模型
algorithm_name = pipeline.algo_seq[0].name
compressed_model = pipeline.run(ir_model)
# 步骤7(可选):将模型权重压缩到量化精度
# 为了减小最终的“.bin”文件的大小。
compress_model_weights(compressed_model)
# 步骤8:将压缩后的模型保存到所需的路径。
# 将' save_path '设置为应该存储压缩模型的目录。
preset = pipeline._algo_seq[0].config["preset"]
algorithm_name = pipeline.algo_seq[0].name
compressed_model_paths = save_model(
model=compressed_model,
save_path="./optimized_model",
model_name=f"{ir_model.name}_{preset}_{algorithm_name}",
)
# 步骤9 (可选): 评估原始模型和压缩模型,并打印结果
original_metric_results = pipeline.evaluate(original_model)
if original_metric_results:
print(f"Accuracy of the original model: {next(iter(original_metric_results.values())):.5f}")
quantized_metric_results = pipeline.evaluate(compressed_model)
if quantized_metric_results:
print(f"Accuracy of the quantized model: {next(iter(quantized_metric_results.values())):.5f}")
compressed_model_path = compressed_model_paths[0]["model"]
print("The quantized model is stored at", compressed_model_path)
3.7、量化结果
根据量化算法配置生成了三个模型相关的文件,模型的权重文件(bin)下降为约原始模型的1/4,模型拓扑结构(xml)大小有所上升
4、模型测试
4.1、测试代码
# %% 导入依赖库
import os
import time
import cv2
import torch
import numpy as np
from openvino.tools.pot.api import DataLoader, Metric
from openvino.tools.pot.engines.ie_engine import IEEngine
from openvino.tools.pot.graph import load_model, save_model
from openvino.tools.pot.graph.model_utils import compress_model_weights
from openvino.tools.pot.pipeline.initializer import create_pipeline
from PIL import Image
from openvino.runtime import Core
import torchmetrics
import addict
from pathlib import Path
from torchmetrics.detection.mean_ap import MeanAveragePrecision
# %% Load the network in Inference Engine
ie = Core()
# model_ir = ie.read_model(model="./yolox_vino_model/baseline.xml")
# model_ir = ie.read_model(model="./yolox_vino_model/prune_iter2.xml")
model_ir = ie.read_model(model="./optimized_model/baseline_sim_mixed_DefaultQuantization.xml")
compiled_model_ir = ie.compile_model(model=model_ir, device_name="CPU")
inputs_names = compiled_model_ir.inputs
outputs_names = compiled_model_ir.outputs
print(inputs_names)
print(outputs_names)
class MAPMetric(Metric):
def __init__(self, map_value="map"):
"""
Mean Average Precision Metric. Wraps torchmetrics implementation, see
https://torchmetrics.readthedocs.io/en/latest/references/modules.html#map
:map_value: specific metric to return. Default: "map"
Change `to one of the values in the list below to return a different value
['mar_1', 'mar_10', 'mar_100', 'mar_small', 'mar_medium', 'mar_large',
'map', 'map_50', 'map_75', 'map_small', 'map_medium', 'map_large']
See torchmetrics documentation for more details.
"""
# assert (
# map_value
# in torchmetrics.detection.map.MARMetricResults.__slots__
# + torchmetrics.detection.map.MAPMetricResults.__slots__
# )
self._name = map_value
self.metric = MeanAveragePrecision()
super().__init__()
@property
def value(self):
"""
Returns metric value for the last model output.
Possible format: {metric_name: [metric_values_per_image]}
"""
return {self._name: [0]}
@property
def avg_value(self):
"""
Returns average metric value for all model outputs.
Possible format: {metric_name: metric_value}
"""
return {self._name: self.metric.compute()[self._name].item()}
def update(self, output, target):
"""
Convert network output and labels to the format that torchmetrics' MAP
implementation expects, and call `metric.update()`.
:param output: model output
:param target: annotations for model output
"""
targetboxes = []
targetlabels = []
predboxes = []
predlabels = []
scores = []
# image_width = target[0][3]
# image_height = target[0][2]
for i in range(target.shape[0]):
[tx, ty, tw, th,category] = [target[i,0],target[i,1],target[i,2],target[i,3],target[i,4]]
targetbox = [round(tx-tw/2), round(ty-th/2), round(tx+tw/2), round(ty+th/2)]
targetboxes.append(targetbox)
targetlabels.append(category)
strides = [8,16,32]
for k,single_output in enumerate(output):
stride = strides[k]
for i in range(int(60/2**k)):
for j in range(int(80/2**k)):
isObj = single_output[0, 4, i, j]
if isObj > conf_threshold:
conf = np.max(single_output[0, 5:, i, j])*sigmoid(isObj)
if conf>0.5:
x = single_output[0, 0, i, j]
x += j
x *= stride
y = single_output[0, 1, i, j]
y += i
y *= stride
w = single_output[0, 2, i, j]
w = np.exp(w)
w *= stride
h = single_output[0, 3, i, j]
h = np.exp(h)
h *= stride
label = np.argmax(single_output[0, 5:, i, j])
predbox = [round(x-w/2), round(y-h/2), round(x+w/2), round(y+h/2)]
predboxes.append(predbox)
predlabels.append(label)
scores.append(conf)
boxes_id = cv2.dnn.NMSBoxes(predboxes,scores,0.5,0.25)
# print(boxes_id)
# for pred in single_output[0, 0, ::]:
# image_id, label, conf, xmin, ymin, xmax, ymax = pred
# xmin *= image_width
# xmax *= image_width
# ymin *= image_height
# ymax *= image_height
preds = [
dict(
boxes=torch.Tensor(np.array(predboxes)[boxes_id,...]).float(),
labels=torch.Tensor(np.array(predlabels)[boxes_id,...]).short(),
scores=torch.Tensor(np.array(scores)[boxes_id,...]),
)
]
targets = [
dict(
boxes=torch.Tensor(targetboxes).float(),
labels=torch.Tensor(targetlabels).short(),
)
]
self.metric.update(preds, targets)
def reset(self):
"""
Resets metric
"""
self.metric.reset()
def get_attributes(self):
"""
Returns a dictionary of metric attributes {metric_name: {attribute_name: value}}.
Required attributes: 'direction': 'higher-better' or 'higher-worse'
'type': metric type
"""
return {self._name: {"direction": "higher-better", "type": "mAP"}}
class YOLOXDataLoader(DataLoader):
"""
Inherit from DataLoader function and implement for YOLOX.
"""
def __init__(self, annotation_lines, input_shape, num_classes):
# super(YOLOXDataLoader, self).__init__()
self.annotation_lines = annotation_lines
self.length = len(self.annotation_lines)
self.input_shape = input_shape
self.num_classes = num_classes
self.step_now = -1
def __len__(self):
return self.length
def __getitem__(self, index):
index = index % self.length
self.step_now += 1
# print(self.step_now)
# ---------------------------------------------------#
# 训练时进行数据的随机增强
# 验证时不进行数据的随机增强
# ---------------------------------------------------#
image, box = self.get_random_data(self.annotation_lines[index], self.input_shape)
image = np.transpose(preprocess_input(np.array(image, dtype=np.float32)), (2, 0, 1))
image = np.expand_dims(image, axis=0).astype(np.float32)
box = np.array(box, dtype=np.float32)
if len(box) != 0:
box[:, 2:4] = box[:, 2:4] - box[:, 0:2]
box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2
return image, box
def rand(self, a=0, b=1):
return np.random.rand() * (b - a) + a
def get_random_data(self, annotation_line, input_shape):
line = annotation_line.strip().split(' ')
# ------------------------------#
# 读取图像并转换成RGB图像
# ------------------------------#
image = Image.open(line[0])
# image = cvtColor(image)
# ------------------------------#
# 获得图像的高宽与目标高宽
# ------------------------------#
iw, ih = image.size
h, w = input_shape
# ------------------------------#
# 获得预测框
# ------------------------------#
# boxes = line[1].split(' ')
boxes = line[1:]
if len(boxes) == 0:
a = 0
box_info = []
for box in boxes:
try:
if box != '':
info = list(map(float, box.split(',')))
box_info.append(info)
except:
return
# box = np.array([np.array(list(map(float, box.split(',')))) for box in line[1:]]) # 除去第一位的图片位置信息
box = np.array(box_info)
scale = min(w / iw, h / ih)
nw = int(iw * scale)
nh = int(ih * scale)
dx = (w - nw) // 2
dy = (h - nh) // 2
# ---------------------------------#
# 将图像多余的部分加上灰条
# ---------------------------------#
image = image.resize((nw, nh), Image.BICUBIC)
new_image = Image.new('RGB', (w, h), (128, 128, 128))
new_image.paste(image, (dx, dy))
image_data = np.array(new_image, np.float32)
# ---------------------------------#
# 对真实框进行调整
# ---------------------------------#
if len(box) > 0:
np.random.shuffle(box)
box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
box[:, 0:2][box[:, 0:2] < 0] = 0
box[:, 2][box[:, 2] > w] = w
box[:, 3][box[:, 3] > h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w > 1, box_h > 1)] # discard invalid box
return image_data, box
def preprocess_input(image):
image /= 255.0
image -= np.array([0.485, 0.456, 0.406])
image /= np.array([0.229, 0.224, 0.225])
return image
def de_reprocess_input(image):
img = image*np.array([0.229, 0.224, 0.225])
img += np.array([[0.485, 0.456, 0.406]])
img *= 255.0
return img
def sigmoid(x):
if x>=0: #对sigmoid函数优化,避免出现极大的数据溢出
return 1.0 / (1 + np.exp(-x))
else:
return np.exp(x)/(1+np.exp(x))
def desigmoid(y):
return -np.log(1 / y - 1)
# %% pre inference
path = r"D:\WorkSpace\graduate-student\study\graduation_study\codes\3-yolox-quantization\openvino-int8-quantization\dataset\images"
files = os.listdir(path)
np.random.seed(1000)
np.random.shuffle(files)
np.random.seed(None)
conf_threshold = desigmoid(0.5)
print(conf_threshold)
request = compiled_model_ir.create_infer_request()
# 步骤1:创建数据加载器
annotation_path = "./2007_train.txt"
# ----------------------------------------------------------------------#
# 验证集的划分在train.py代码里面进行
# 2007_test.txt和2007_val.txt里面没有内容是正常的。训练不会使用到。
# 当前划分方式下,验证集和训练集的比例为1:9
# ----------------------------------------------------------------------#
val_split = 0.1
with open(annotation_path, encoding='utf-8') as f:
lines = f.readlines()
np.random.seed(10101)
np.random.shuffle(lines)
np.random.seed(None)
data_loader = YOLOXDataLoader(annotation_lines=lines[0:2000], input_shape=[480, 640], num_classes=5)
metric = MAPMetric()
total_t = 0
total_infer_time = 0
total_decode_time = 0
for i in range(len(data_loader)):#
img,box = data_loader[i]
empty_img = np.array(de_reprocess_input( np.transpose(np.squeeze(img,axis=0),(1, 2, 0))),dtype=np.uint8)
input_img = img.astype(np.float32)
time_start = time.time()
request.infer(inputs={inputs_names[0]: input_img})
time_infer = time.time()
rects = []
labels = []
confs = []
strides = [8,16,32]
for k in range(3):
result = request.get_output_tensor(k).data
stride = strides[k]
for i in range(result.shape[2]):
for j in range(result.shape[3]):
isObj = result[0, 4, i, j]
if isObj > 0:
max1 = np.max(result[0, 5:, i, j])
# print(max1)
conf = sigmoid(isObj)*sigmoid(max1)
if conf > 0.1:
# print(result[0, 5:, i, j])
label = np.argmax(result[0, 5:, i, j])
x = result[0, 0, i, j]
x += j
x *= stride
y = result[0, 1, i, j]
y += i
y *= stride
w = result[0, 2, i, j]
w = np.exp(w)
w *= stride
h = result[0, 3, i, j]
h = np.exp(h)
h *= stride
x1 = int(max(x - w / 2, 0))
y1 = int(max(y - h / 2, 0))
x2 = int(max(x + w / 2, 0))
y2 = int(max(y + h / 2, 0))
rects.append([x1, y1, x2, y2])
labels.append(label)
confs.append(conf)
boxes_ids = cv2.dnn.NMSBoxes(rects,confs,0.2,0.1)
time_decode = time.time()
pred = [request.get_output_tensor(0).data,request.get_output_tensor(1).data,request.get_output_tensor(2).data]
total_t += time_decode-time_start
total_infer_time += time_infer-time_start
total_decode_time += time_decode-time_infer
metric.update(pred,box)
# print(1/t)
cv2.putText(empty_img, "fps:{0:.3f}".format(1 / (time_decode-time_start)), (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
for boxes_id in boxes_ids:
x1 = rects[boxes_id][0]
y1 = rects[boxes_id][1]
x2 = rects[boxes_id][2]
y2 = rects[boxes_id][3]
label = labels[boxes_id]
conf = confs[boxes_id]
cv2.rectangle(empty_img, (x1, y1), (x2, y2), (0, 255, 0))
cv2.putText(empty_img,"{0}:{1:.3f}".format(label,conf),(x1, y1),cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.imshow("img", empty_img)
# print(result.shape)
cv2.waitKey(1)
print("mean fps:{}".format(1/total_t*len(data_loader)))
print("mean infer time:{}".format(total_infer_time/len(data_loader)*1000))
print("mean decode time:{}".format(total_decode_time/len(data_loader)*1000))
print("data:{}".format(metric.metric.compute()))
cv2.waitKey(0)
cv2.destroyAllWindows()