模型量化-debug工具-eager模式

一、定义

  1. 目的
  2. 量化错误类型
  3. 静态量化dubug 工具
  4. 动态量化debug 工具
  5. 接口解读
  6. 量化常见错误

二、实现

  1. 目的
    QAT 模型相比浮点模型、或者定点模型相比浮点模型掉点较多的话,可以使用相似度对比工具比较模型中每一层输出的相似度,快速定位到是具体哪一层或者哪一个 op 导致掉点严重。
  2. 量化错误类型
    1. 数据不敏感误差-由固有模型量化误差引起,大部分输入数据误差较大
    2. 数据敏感误差-由异常输入数据引起,小部分输入数据误差较大
    3. 实现错误-量化内核与参考实现不匹配
  3. 静态量化dubug 工具
    1. 比较量化模型与浮点模型每一层的权重
    2. 比较响应位置的浮点模型和量化模型
    3. 模块的比较,相同的输入
      3.1 比较每一层权重
import numpy as np
import torch
import torch.nn as nn
import torchvision
from torchvision import models, datasets
import torchvision.transforms as transforms
import os
import torch.quantization
import torch.quantization._numeric_suite as ns
from torch.quantization import (
    default_eval_fn,
    default_qconfig,
    quantize,
)
float_model = torchvision.models.quantization.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1, quantize=False)
float_model.to('cpu')
float_model.eval()
float_model.fuse_model()
float_model.qconfig = torch.quantization.default_qconfig
img_data = [(torch.rand(2, 3, 10, 10, dtype=torch.float), torch.randint(0, 1, (2,), dtype=torch.long)) for _ in range(2)]

qmodel = quantize(float_model, default_eval_fn, [img_data], inplace=False)

wt_compare_dict = ns.compare_weights(float_model.state_dict(), qmodel.state_dict())
def compute_error(x,y):
    return torch.nn.MSELoss(reduction="mean")(torch.norm(x), torch.norm(y))


wt_compare_dict = ns.compare_weights(float_model.state_dict(), qmodel.state_dict())
for key in wt_compare_dict:
    print(
        key,
        compute_error(
            wt_compare_dict[key]['float'],
            wt_compare_dict[key]['quantized'].dequantize()
        )
    )

#量化模型推理
res=qmodel(img_data[0][0])
print(res)

3.2. 比较响应位置的浮点模型和量化模型
对同一输入在相应位置的浮点模型和量化模型之间的权重和激活进行比较

import torch
import torch.nn as nn
import torchvision
from torchvision import models, datasets
import torchvision.transforms as transforms
import os
import torch.quantization
import torch.quantization._numeric_suite as ns
from torch.quantization import (
    default_eval_fn,
    default_qconfig,
    quantize,
)

float_model = torchvision.models.quantization.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1, quantize=False)
float_model.to('cpu')
float_model.eval()
float_model.fuse_model()
float_model.qconfig = torch.quantization.default_qconfig
img_data = [(torch.rand(2, 3, 10, 10, dtype=torch.float), torch.randint(0, 1, (2,), dtype=torch.long)) for _ in range(2)]

qmodel = quantize(float_model, default_eval_fn, [img_data], inplace=False)


def compute_error(x,y):
    return torch.nn.MSELoss(reduction="mean")(torch.norm(x), torch.norm(y))
data = img_data[0][0]

act_compare_dict = ns.compare_model_outputs(float_model, qmodel, data)    #返回每一层的输出误差

for key in act_compare_dict:
    print(key, compute_error(act_compare_dict[key]['float'][0], act_compare_dict[key]['quantized'][0].dequantize()))

批量数据进行校对

#批量数据进行比较、校对
ns.prepare_model_outputs(float_model, qmodel)
for data in img_data:
    float_model(data[0])
    qmodel(data[0])

act_compare_dict = ns.get_matching_activations(float_model, qmodel)
for key in act_compare_dict:
    print(key, compute_error(act_compare_dict[key]['float'][0], act_compare_dict[key]['quantized'][0].dequantize()))

添加日志

class MyOutputLogger(ns.Logger):
    r"""Customized logger class
    """

    def __init__(self):
        super(MyOutputLogger, self).__init__()

    def forward(self, x):
        # Custom functionalities
        # ...
        return x

#批量数据进行比较、校对
ns.prepare_model_outputs(float_model, qmodel,MyOutputLogger)
for data in img_data:
    float_model(data[0])
    qmodel(data[0])

act_compare_dict = ns.get_matching_activations(float_model, qmodel)
for key in act_compare_dict:
    print(key, compute_error(act_compare_dict[key]['float'][0], act_compare_dict[key]['quantized'][0].dequantize()))
class MyOutputLogger(ns.Logger):
    r"""Customized logger class
    """

    def __init__(self):
        super(MyOutputLogger, self).__init__()

    def forward(self, x):
        # Custom functionalities
        # ...
        return x

data = img_data[0][0]
act_compare_dict = ns.compare_model_outputs(float_model, qmodel, data, logger_cls=MyOutputLogger)
for key in act_compare_dict:
    print(key, compute_error(act_compare_dict[key]['float'][0], act_compare_dict[key]['quantized'][0].dequantize()))
  1. 模块的比较,相同的输入
import torch
import torch.nn as nn
import torchvision
from torchvision import models, datasets
import torchvision.transforms as transforms
import os
import torch.quantization
import torch.quantization._numeric_suite as ns
from torch.quantization import (
    default_eval_fn,
    default_qconfig,
    quantize,
)

float_model = torchvision.models.quantization.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1, quantize=False)
float_model.to('cpu')
float_model.eval()
float_model.fuse_model()
float_model.qconfig = torch.quantization.default_qconfig
img_data = [(torch.rand(2, 3, 10, 10, dtype=torch.float), torch.randint(0, 1, (2,), dtype=torch.long)) for _ in range(2)]

qmodel = quantize(float_model, default_eval_fn, [img_data], inplace=False)


wt_compare_dict = ns.compare_weights(float_model.state_dict(), qmodel.state_dict())

def compute_error(x,y):
    return torch.nn.MSELoss(reduction="mean")(torch.norm(x), torch.norm(y))



data = img_data[0][0]
module_swap_list = [torchvision.models.quantization.resnet.QuantizableBasicBlock]

ob_dict = ns.compare_model_stub(float_model, qmodel, module_swap_list, data)

for key in ob_dict:
    print(key, compute_error(ob_dict[key]['float'][0], ob_dict[key]['quantized'][0].dequantize()))

多个输入:

在这里插入代码片data = img_data[0][0]
module_swap_list = [torchvision.models.quantization.resnet.QuantizableBasicBlock]

ns.prepare_model_with_stubs(float_model, qmodel, module_swap_list, ns.ShadowLogger)
for data in img_data:
    qmodel(data[0])
ob_dict = ns.get_logger_dict(qmodel)

for key in ob_dict:
    print(key, compute_error(ob_dict[key]['float'][0], ob_dict[key]['quantized'][0].dequantize()))

  1. 动态量化debug
    只对lstm\Linear 动态量化
import torch.nn as nn
import torchvision
from torchvision import models, datasets
import torchvision.transforms as transforms
import os
import torch.quantization
import torch.quantization._numeric_suite as ns
from torch.quantization import (
    default_eval_fn,
    default_qconfig,
    quantize,
)

class LSTMModel(nn.Module):
    """Container module with an encoder, a recurrent module, and a decoder."""

    def __init__(self, ntoken, ninp, nhid, nlayers, dropout=0.5):
        super(LSTMModel, self).__init__()
        self.encoder = nn.Embedding(ntoken, ninp)
        self.rnn = nn.LSTM(ninp, nhid, nlayers, dropout=dropout)
        self.decoder = nn.Linear(nhid, ntoken)

        self.init_weights()

        self.nhid = nhid
        self.nlayers = nlayers

    def init_weights(self):
        initrange = 0.1
        self.encoder.weight.data.uniform_(-initrange, initrange)
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self, input, hidden):
        emb = self.encoder(input)
        output, hidden = self.rnn(emb, hidden)
        decoded = self.decoder(output)
        return decoded, hidden

    def init_hidden(self, bsz):
        weight = next(self.parameters())
        return (weight.new_zeros(self.nlayers, bsz, self.nhid),
                weight.new_zeros(self.nlayers, bsz, self.nhid))

ntokens = 10

float_model = LSTMModel(
    ntoken = ntokens,
    ninp = 512,
    nhid = 256,
    nlayers = 5,
)

float_model.eval()
print(float_model)
qmodel = torch.quantization.quantize_dynamic(
    float_model, {nn.LSTM, nn.Linear}, dtype=torch.qint8
)
print(qmodel)
  1. 权重对比
def compute_error(x,y):
    return torch.nn.MSELoss(reduction="mean")(torch.norm(x), torch.norm(y))


wt_compare_dict = ns.compare_weights(float_model.state_dict(), qmodel.state_dict())
for key in wt_compare_dict:
    if wt_compare_dict[key]['quantized'].is_quantized:
        print(key, compute_error(wt_compare_dict[key]['float'], wt_compare_dict[key]['quantized'].dequantize()))
    else:
        print(key, compute_error(wt_compare_dict[key]['float'], wt_compare_dict[key]['quantized']))
  1. 特定位置对比
def compute_error(x,y):
    return torch.nn.MSELoss(reduction="mean")(torch.norm(x), torch.norm(y))


input_ = torch.randint(ntokens, (1, 1), dtype=torch.long)
hidden = float_model.init_hidden(1)

act_compare_dict = ns.compare_model_outputs(float_model, qmodel, input_, hidden)

for key in act_compare_dict:
    print(key, compute_error(act_compare_dict[key]['float'][0][0], act_compare_dict[key]['quantized'][0][0]))
  1. 模块debug
def compute_error(x,y):
    return torch.nn.MSELoss(reduction="mean")(torch.norm(x), torch.norm(y))


input_ = torch.randint(ntokens, (1, 1), dtype=torch.long)
hidden = float_model.init_hidden(1)

module_swap_list = [nn.Linear, nn.LSTM]
ob_dict = ns.compare_model_stub(float_model, qmodel, module_swap_list, input_, hidden)
for key in ob_dict:
    print(key, compute_error(ob_dict[key]['float'][0], ob_dict[key]['quantized'][0]))
  1. 接口解读
    接口:https://pytorch.org/docs/stable/torch.ao.ns._numeric_suite.html#
    在这里插入图片描述
  2. 量化常见错误
    6.1 RuntimeError: Could not run ‘quantized::some_operator’ with arguments from the ‘CPU’ backend…
    将非量化的内核正在进行转换为量化的内核。发生报错。
    一个常见的解决方法是使用torch.ao.quantization。DeQuantSprub用于对张量进行去量化。
class Net(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.quant = torch.ao.quantization.QuantStub()
        self.conv1 = torch.nn.Conv2d(1, 1, 1)
        # this module will not be quantized (see `qconfig = None` logic below)
        self.conv2 = torch.nn.Conv2d(1, 1, 1)
        self.dequant = torch.ao.quantization.DeQuantStub()

    def forward(self, x):
        # during the convert step, this will be replaced with a
        # `quantize_per_tensor` call
        x = self.quant(x)   #量化
        x = self.conv1(x)
        # during the convert step, this will be replaced with a
        # `dequantize` call
        x = self.conv2(x)
        x = self.dequant(x)  #去量化
        return x

net = Net()

net.qconfig = quant.get_default_qconfig('fbgemm')
quant.prepare(net, inplace=True)
def print_size_of_model(model):
    torch.save(model.state_dict(), "temp.p")
    print('Size (MB):', os.path.getsize("temp.p") / 1e6)
    os.remove('temp.p')
print_size_of_model(net) # Size (MB): 2.221929

net(torch.randn(4, 1, 4, 4))
net1=quant.convert(net, inplace=False)
print_size_of_model(net) # Size (MB): 0.56854
print_size_of_model(net1)

print(net1(torch.randn(4, 1, 4, 4)))
print(net1)

6.2 量化内核转换为非量化内核
RuntimeError: Could not run ‘aten::thnn_conv2d_forward’ with arguments from the ‘QuantizedCPU’ backend.
需要手动去除量化 torch.ao.quantization.DeQuantStub()
参考:
https://developer.horizon.ai/api/v1/fileData/horizon_j5_open_explorer_v1_1_33_cn_doc/plugin/source/tutorials/quant_profiler.html#id5
https://pytorch.org/tutorials/prototype/numeric_suite_tutorial.html#compare-the-weights-of-float-and-quantized-models
https://pytorch.org/docs/stable/quantization-accuracy-debugging.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值