introduction
这个例子中将展示如何量化一个 onnx 模型,执行误差分析,并与 onnxruntime 对齐结果
code
from typing import Iterable, Tuple
import torch
from ppq import (BaseGraph, QuantizationSettingFactory, TargetPlatform,
convert_any_to_numpy, torch_snr_error)
from ppq.api import (dispatch_graph, export_ppq_graph, load_onnx_graph,
quantize_onnx_model)
from ppq.core.data import convert_any_to_torch_tensor
from ppq.executor.torch import TorchExecutor
from ppq.quantization.analyse.graphwise import graphwise_error_analyse
BATCHSIZE = 1
INPUT_SHAPES = {'input.1': [BATCHSIZE, 3, 224, 224]}
DEVICE = 'cuda'
QUANT_PLATFORM = TargetPlatform.TRT_INT8
ONNX_PATH = 'model.onnx'
ONNX_OUTPUT_PATH = 'out/model.onnx'
# ------------------------------------------------------------
# 在这个例子中我们将向你展示如何量化一个 onnx 模型,执行误差分析,并与 onnxruntime 对齐结果
# 在这个例子中,我们特别地为你展示如何量化一个多输入的模型
# 此时你的 Calibration Dataset 应该是一个 list of dictionary
# ------------------------------------------------------------
def generate_calibration_dataset(graph: BaseGraph, num_of_batches: int = 32) -> Tuple[Iterable[dict], torch.Tensor]:
dataset = []
for i in range(num_of_batches):
sample = {name: torch.rand(INPUT_SHAPES[name]) for name in graph.inputs}
dataset.append(sample)
return dataset, sample # last sample
def collate_fn(batch: dict) -> torch.Tensor:
return {k: v.to(DEVICE) for k, v in batch.items()}
# ------------------------------------------------------------
# 在这里,我们仍然创建一个 QuantizationSetting 对象用来管理量化过程
# 我们将调度方法修改为 conservative,并且要求 PPQ 启动量化微调
# ------------------------------------------------------------
QSetting = QuantizationSettingFactory.default_setting()
QSetting.dispatcher = 'conservative'
QSetting.lsq_optimization = True
# ------------------------------------------------------------
# 准备好 QuantizationSetting 后,我们加载模型,并且要求 ppq 按照规则完成图调度
# ------------------------------------------------------------
graph = load_onnx_graph(onnx_import_file=ONNX_PATH)
graph = dispatch_graph(graph=graph, platform=QUANT_PLATFORM, setting=QSetting)
for name in graph.inputs:
if name not in INPUT_SHAPES:
raise KeyError(f'Graph Input {name} needs a valid shape.')
# ------------------------------------------------------------
# 生成校准所需的数据集,我们准备开始完成网络量化任务
# ------------------------------------------------------------
calibration_dataset, sample = generate_calibration_dataset(graph)
quantized = quantize_onnx_model(
onnx_import_file=ONNX_PATH, calib_dataloader=calibration_dataset,
calib_steps=32, input_shape=None, inputs=collate_fn(sample),
setting=QSetting, collate_fn=collate_fn, platform=QUANT_PLATFORM,
device=DEVICE, verbose=0)
# ------------------------------------------------------------
# 在 PPQ 完成网络量化之后,我们特别地保存一下 PPQ 网络执行的结果
# 在本样例的最后,我们将对比 PPQ 与 Onnxruntime 的执行结果是否相同
# ------------------------------------------------------------
executor, reference_outputs = TorchExecutor(quantized), []
for sample in calibration_dataset:
reference_outputs.append(executor.forward(collate_fn(sample)))
# ------------------------------------------------------------
# 执行网络误差分析,并导出计算图
# ------------------------------------------------------------
graphwise_error_analyse(
graph=quantized, running_device=DEVICE,
collate_fn=collate_fn, dataloader=calibration_dataset)
export_ppq_graph(graph=quantized, platform=TargetPlatform.ONNXRUNTIME,
graph_save_to='out/model.onnx')
# -----------------------------------------
# 在最后,我们启动 onnxruntime 并比对结果
# -----------------------------------------
try:
import onnxruntime
except ImportError as e:
raise Exception('Onnxruntime is not installed.')
sess = onnxruntime.InferenceSession(ONNX_OUTPUT_PATH, providers=['CUDAExecutionProvider'])
onnxruntime_outputs = []
for sample in calibration_dataset:
onnxruntime_outputs.append(sess.run(
output_names=[name for name in graph.outputs],
input_feed={k: convert_any_to_numpy(v) for k, v in sample.items()}))
name_of_output = [name for name in graph.outputs]
for oidx, output in enumerate(name_of_output):
y_pred, y_real = [], []
for reference_output, onnxruntime_output in zip(reference_outputs, onnxruntime_outputs):
y_pred.append(convert_any_to_torch_tensor(reference_output[oidx], device='cpu').unsqueeze(0))
y_real.append(convert_any_to_torch_tensor(onnxruntime_output[oidx], device='cpu').unsqueeze(0))
y_pred = torch.cat(y_pred, dim=0)
y_real = torch.cat(y_real, dim=0)
print(f'Simulating Error For {output}: {torch_snr_error(y_pred=y_pred, y_real=y_real).item() :.4f}')
ONNX_PATH = 'model.onnx'
是1中 quantized(onnx).onnx 修改了名称
result
# python quantize.py
____ ____ __ ____ __ __
/ __ \/ __ \/ / / __ \__ ______ _____ / /_____ ____ / /
/ /_/ / /_/ / / / / / / / / / __ `/ __ \/ __/ __ \/ __ \/ /
/ ____/ ____/ /__/ /_/ / /_/ / /_/ / / / / /_/ /_/ / /_/ / /
/_/ /_/ /_____\___\_\__,_/\__,_/_/ /_/\__/\____/\____/_/
[07:52:10] PPQ Quantization Config Refine Pass Running ... Finished.
[07:52:10] PPQ Quantization Fusion Pass Running ... Finished.
[07:52:10] PPQ Quantize Simplify Pass Running ... Finished.
[07:52:10] PPQ Parameter Quantization Pass Running ... Finished.
Calibration Progress(Phase 1): 100%|████████████████████████████████████████████████████| 32/32 [00:02<00:00, 12.33it/s]
[07:52:10] PPQ Runtime Calibration Pass Running ... Finished.
[07:52:13] PPQ Quantization Alignment Pass Running ... Finished.
[07:52:13] PPQ Passive Parameter Quantization Running ... Finished.
[07:52:13] PPQ LSQ Optimization Running ...
Check following parameters:
Is Scale Trainable: True
Interested Layers: []
Collecting Device: cuda
Num of blocks: 18
Learning Rate: 1e-05
Steps: 500
Gamma: 0.0
# Block [1 / 18]: [Conv_0 -> Conv_8]
/opt/conda/lib/python3.8/site-packages/torch/optim/adam.py:48: UserWarning: optimizer contains a parameter group with duplicate parameters; in future, this will cause an error; see github.com/pytorch/pytorch/issues/40967 for more information
super(Adam, self).__init__(params, defaults)
# Tuning Procedure : 100%|███████████████████████████████████████████████████████████| 500/500 [00:02<00:00, 184.66it/s]
# Tuning Finished : (0.0005 -> 0.0005) [Block Loss]
# Block [2 / 18]: [Conv_9 -> Conv_17]
# Tuning Procedure : 100%|███████████████████████████████████████████████████████████| 500/500 [00:02<00:00, 193.81it/s]
# Tuning Finished : (0.0014 -> 0.0010) [Block Loss]
# Block [3 / 18]: [Conv_18 -> Conv_26]
# Tuning Procedure : 100%|███████████████████████████████████████████████████████████| 500/500 [00:02<00:00, 198.41it/s]
# Tuning Finished : (0.0026 -> 0.0024) [Block Loss]
# Block [4 / 18]: [Conv_28 -> Conv_36]
# Tuning Procedure : 100%|███████████████████████████████████████████████████████████| 500/500 [00:02<00:00, 195.62it/s]
# Tuning Finished : (0.0014 -> 0.0014) [Block Loss]
# Block [5 / 18]: [Conv_37 -> Conv_45]
# Tuning Procedure : 100%|███████████████████████████████████████████████████████████| 500/500 [00:02<00:00, 196.87it/s]
# Tuning Finished : (0.0010 -> 0.0009) [Block Loss]
# Block [6 / 18]: [Conv_47 -> Conv_55]
# Tuning Procedure : 100%|███████████████████████████████████████████████████████████| 500/500 [00:02<00:00, 193.48it/s]
# Tuning Finished : (0.0011 -> 0.0010) [Block Loss]
# Block [7 / 18]: [Conv_57 -> Conv_65]
# Tuning Procedure : 100%|███████████████████████████████████████████████████████████| 500/500 [00:02<00:00, 197.79it/s]
# Tuning Finished : (0.0008 -> 0.0008) [Block Loss]
# Block [8 / 18]: [Conv_66 -> Conv_74]
# Tuning Procedure : 100%|███████████████████████████████████████████████████████████| 500/500 [00:02<00:00, 197.68it/s]
# Tuning Finished : (0.0003 -> 0.0003) [Block Loss]
# Block [9 / 18]: [Conv_76 -> Conv_84]
# Tuning Procedure : 100%|███████████████████████████████████████████████████████████| 500/500 [00:02<00:00, 194.42it/s]
# Tuning Finished : (0.0004 -> 0.0004) [Block Loss]
# Block [10 / 18]: [Conv_86 -> Conv_94]
# Tuning Procedure : 100%|███████████████████████████████████████████████████████████| 500/500 [00:02<00:00, 197.52it/s]
# Tuning Finished : (0.0030 -> 0.0008) [Block Loss]
# Block [11 / 18]: [Conv_96 -> Conv_104]
# Tuning Procedure : 100%|███████████████████████████████████████████████████████████| 500/500 [00:02<00:00, 198.10it/s]
# Tuning Finished : (0.0006 -> 0.0005) [Block Loss]
# Block [12 / 18]: [Conv_105 -> Conv_113]
# Tuning Procedure : 100%|███████████████████████████████████████████████████████████| 500/500 [00:02<00:00, 197.65it/s]
# Tuning Finished : (0.0020 -> 0.0007) [Block Loss]
# Block [13 / 18]: [Conv_115 -> Conv_123]
# Tuning Procedure : 100%|███████████████████████████████████████████████████████████| 500/500 [00:02<00:00, 197.24it/s]
# Tuning Finished : (0.0137 -> 0.0022) [Block Loss]
# Block [14 / 18]: [Conv_125 -> Conv_133]
# Tuning Procedure : 100%|███████████████████████████████████████████████████████████| 500/500 [00:02<00:00, 197.13it/s]
# Tuning Finished : (0.0026 -> 0.0017) [Block Loss]
# Block [15 / 18]: [Conv_134 -> Conv_142]
# Tuning Procedure : 100%|███████████████████████████████████████████████████████████| 500/500 [00:02<00:00, 198.01it/s]
# Tuning Finished : (0.0027 -> 0.0020) [Block Loss]
# Block [16 / 18]: [Conv_144 -> Conv_152]
# Tuning Procedure : 100%|███████████████████████████████████████████████████████████| 500/500 [00:02<00:00, 198.85it/s]
# Tuning Finished : (0.0140 -> 0.0086) [Block Loss]
# Block [17 / 18]: [Conv_154 -> Conv_162]
# Tuning Procedure : 100%|███████████████████████████████████████████████████████████| 500/500 [00:02<00:00, 198.94it/s]
# Tuning Finished : (0.0017 -> 0.0012) [Block Loss]
# Block [18 / 18]: [Conv_163 -> Gemm_169]
# Tuning Procedure : 100%|███████████████████████████████████████████████████████████| 500/500 [00:01<00:00, 419.33it/s]
# Tuning Finished : (0.0106 -> 0.0086) [Block Loss]
Finished.
[07:53:09] PPQ Passive Parameter Quantization Running ... Finished.
[07:53:09] PPQ Parameter Baking Pass Running ... Finished.
--------- Network Snapshot ---------
Num of Op: [100]
Num of Quantized Op: [52]
Num of Variable: [277]
Num of Quantized Var: [201]
------- Quantization Snapshot ------
Num of Quant Config: [208]
BAKED: [52]
ACTIVATED: [52]
FP32: [104]
Network Quantization Finished.
Analysing Graphwise Quantization Error(Phrase 1):: 100%|██████████████████████████████████| 8/8 [00:00<00:00, 9.49it/s]
Analysing Graphwise Quantization Error(Phrase 2):: 100%|██████████████████████████████████| 8/8 [00:00<00:00, 8.19it/s]
Layer | NOISE:SIGNAL POWER RATIO
Conv_144: | ████████████████████ | 0.026488
Conv_162: | ████████████████████ | 0.026420
Conv_119: | ████████████████████ | 0.026252
Conv_115: | ███████████████████ | 0.025719
Conv_154: | ██████████████████ | 0.023327
Conv_163: | █████████████████ | 0.022909
Conv_123: | █████████████████ | 0.022655
Conv_142: | █████████████████ | 0.022324
Conv_152: | ████████████████ | 0.021033
Conv_148: | ████████████████ | 0.020950
Conv_134: | ███████████████ | 0.020297
Conv_113: | ███████████████ | 0.019930
Conv_125: | ███████████████ | 0.019700
Conv_90: | ███████████████ | 0.019612
Conv_138: | ███████████████ | 0.019494
Conv_133: | ███████████████ | 0.019315
Conv_55: | ██████████████ | 0.019215
Conv_86: | ███████████ | 0.014120
Conv_129: | ██████████ | 0.013777
Conv_26: | ███████ | 0.009576
Conv_105: | ███████ | 0.009439
Conv_104: | ███████ | 0.009423
Conv_94: | ███████ | 0.009185
Conv_109: | ███████ | 0.008984
Conv_45: | ███████ | 0.008888
Conv_51: | █████ | 0.006976
Conv_41: | █████ | 0.006354
Conv_57: | ████ | 0.005857
Conv_96: | ████ | 0.005114
Conv_28: | ████ | 0.005098
Conv_84: | ████ | 0.005052
Conv_22: | ████ | 0.004921
Conv_36: | ████ | 0.004893
Conv_100: | ███ | 0.004590
Conv_70: | ███ | 0.004514
Conv_80: | ███ | 0.004059
Conv_17: | ███ | 0.003926
Conv_158: | ███ | 0.003892
Conv_65: | ███ | 0.003821
Conv_13: | ███ | 0.003791
Conv_47: | ███ | 0.003612
Conv_32: | ██ | 0.002906
Conv_66: | ██ | 0.002527
Conv_8: | ██ | 0.002344
Conv_74: | ██ | 0.002325
Conv_76: | █ | 0.002075
Conv_37: | █ | 0.001861
Conv_9: | █ | 0.001543
Conv_61: | █ | 0.001336
Conv_18: | █ | 0.001333
Conv_4: | | 0.000348
Conv_0: | | 0.000230
Simulating Error For 536: 0.0042
bug
AttributeError:'NoneType' object has no attribute 'clone'
一开始的安装方式是
python3 -m pip install ppq
会报上面的错误,解决方式:
pip uninstall ppq
git clone https://github.com/openppl-public/ppq.git
cd ppq
pip install -r requirements.txt
python setup.py install