模型量化-debug工具-eager模式

云帆@

于 2024-08-07 10:06:42 发布

阅读量836

点赞数 11

分类专栏：量化分析工具文章标签： pytorch 深度学习人工智能

本文链接：https://blog.csdn.net/weixin_40777649/article/details/140979974

版权

量化同时被 2 个专栏收录

10 篇文章 0 订阅

订阅专栏

分析工具

8 篇文章 0 订阅

订阅专栏

一、定义

目的
量化错误类型
静态量化dubug 工具
动态量化debug 工具
接口解读
量化常见错误

二、实现

目的
QAT 模型相比浮点模型、或者定点模型相比浮点模型掉点较多的话，可以使用相似度对比工具比较模型中每一层输出的相似度，快速定位到是具体哪一层或者哪一个 op 导致掉点严重。
量化错误类型
1. 数据不敏感误差-由固有模型量化误差引起，大部分输入数据误差较大
2. 数据敏感误差-由异常输入数据引起，小部分输入数据误差较大
3. 实现错误-量化内核与参考实现不匹配
静态量化dubug 工具
1. 比较量化模型与浮点模型每一层的权重
2. 比较响应位置的浮点模型和量化模型
3. 模块的比较，相同的输入
  3.1 比较每一层权重

import numpy as np
import torch
import torch.nn as nn
import torchvision
from torchvision import models, datasets
import torchvision.transforms as transforms
import os
import torch.quantization
import torch.quantization._numeric_suite as ns
from torch.quantization import (
    default_eval_fn,
    default_qconfig,
    quantize,
)
float_model = torchvision.models.quantization.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1, quantize=False)
float_model.to('cpu')
float_model.eval()
float_model.fuse_model()
float_model.qconfig = torch.quantization.default_qconfig
img_data = [(torch.rand(2, 3, 10, 10, dtype=torch.float), torch.randint(0, 1, (2,), dtype=torch.long)) for _ in range(2)]

qmodel = quantize(float_model, default_eval_fn, [img_data], inplace=False)

wt_compare_dict = ns.compare_weights(float_model.state_dict(), qmodel.state_dict())
def compute_error(x,y):
    return torch.nn.MSELoss(reduction="mean")(torch.norm(x), torch.norm(y))


wt_compare_dict = ns.compare_weights(float_model.state_dict(), qmodel.state_dict())
for key in wt_compare_dict:
    print(
        key,
        compute_error(
            wt_compare_dict[key]['float'],
            wt_compare_dict[key]['quantized'].dequantize()
        )
    )

#量化模型推理
res=qmodel(img_data[0][0])
print(res)

3.2. 比较响应位置的浮点模型和量化模型
对同一输入在相应位置的浮点模型和量化模型之间的权重和激活进行比较

import torch
import torch.nn as nn
import torchvision
from torchvision import models, datasets
import torchvision.transforms as transforms
import os
import torch.quantization
import torch.quantization._numeric_suite as ns
from torch.quantization import (
    default_eval_fn,
    default_qconfig,
    quantize,
)

float_model = torchvision.models.quantization.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1, quantize=False)
float_model.to('cpu')
float_model.eval()
float_model.fuse_model()
float_model.qconfig = torch.quantization.default_qconfig
img_data = [(torch.rand(2, 3, 10, 10, dtype=torch.float), torch.randint(0, 1, (2,), dtype=torch.long)) for _ in range(2)]

qmodel = quantize(float_model, default_eval_fn, [img_data], inplace=False)


def compute_error(x,y):
    return torch.nn.MSELoss(reduction="mean")(torch.norm(x), torch.norm(y))
data = img_data[0][0]

act_compare_dict = ns.compare_model_outputs(float_model, qmodel, data)    #返回每一层的输出误差

for key in act_compare_dict:
    print(key, compute_error(act_compare_dict[key]['float'][0], act_compare_dict[key]['quantized'][0].dequantize()))

批量数据进行校对

#批量数据进行比较、校对
ns.prepare_model_outputs(float_model, qmodel)
for data in img_data:
    float_model(data[0])
    qmodel(data[0])

act_compare_dict = ns.get_matching_activations(float_model, qmodel)
for key in act_compare_dict:
    print(key, compute_error(act_compare_dict[key]['float'][0], act_compare_dict[key]['quantized'][0].dequantize()))

添加日志

class MyOutputLogger(ns.Logger):
    r"""Customized logger class
    """

    def __init__(self):
        super(MyOutputLogger, self).__init__()

    def forward(self, x):
        # Custom functionalities
        # ...
        return x

#批量数据进行比较、校对
ns.prepare_model_outputs(float_model, qmodel,MyOutputLogger)
for data in img_data:
    float_model(data[0])
    qmodel(data[0])

act_compare_dict = ns.get_matching_activations(float_model, qmodel)
for key in act_compare_dict:
    print(key, compute_error(act_compare_dict[key]['float'][0], act_compare_dict[key]['quantized'][0].dequantize()))

class MyOutputLogger(ns.Logger):
    r"""Customized logger class
    """

    def __init__(self):
        super(MyOutputLogger, self).__init__()

    def forward(self, x):
        # Custom functionalities
        # ...
        return x

data = img_data[0][0]
act_compare_dict = ns.compare_model_outputs(float_model, qmodel, data, logger_cls=MyOutputLogger)
for key in act_compare_dict:
    print(key, compute_error(act_compare_dict[key]['float'][0], act_compare_dict[key]['quantized'][0].dequantize()))

模块的比较，相同的输入

import torch
import torch.nn as nn
import torchvision
from torchvision import models, datasets
import torchvision.transforms as transforms
import os
import torch.quantization
import torch.quantization._numeric_suite as ns
from torch.quantization import (
    default_eval_fn,
    default_qconfig,
    quantize,
)

float_model = torchvision.models.quantization.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1, quantize=False)
float_model.to('cpu')
float_model.eval()
float_model.fuse_model()
float_model.qconfig = torch.quantization.default_qconfig
img_data = [(torch.rand(2, 3, 10, 10, dtype=torch.float), torch.randint(0, 1, (2,), dtype=torch.long)) for _ in range(2)]

qmodel = quantize(float_model, default_eval_fn, [img_data], inplace=False)


wt_compare_dict = ns.compare_weights(float_model.state_dict(), qmodel.state_dict())

def compute_error(x,y):
    return torch.nn.MSELoss(reduction="mean")(torch.norm(x), torch.norm(y))



data = img_data[0][0]
module_swap_list = [torchvision.models.quantization.resnet.QuantizableBasicBlock]

ob_dict = ns.compare_model_stub(float_model, qmodel, module_swap_list, data)

for key in ob_dict:
    print(key, compute_error(ob_dict[key]['float'][0], ob_dict[key]['quantized'][0].dequantize()))

多个输入：

在这里插入代码片data = img_data[0][0]
module_swap_list = [torchvision.models.quantization.resnet.QuantizableBasicBlock]

ns.prepare_model_with_stubs(float_model, qmodel, module_swap_list, ns.ShadowLogger)
for data in img_data:
    qmodel(data[0])
ob_dict = ns.get_logger_dict(qmodel)

for key in ob_dict:
    print(key, compute_error(ob_dict[key]['float'][0], ob_dict[key]['quantized'][0].dequantize()))

动态量化debug
只对lstm\Linear 动态量化

import torch.nn as nn
import torchvision
from torchvision import models, datasets
import torchvision.transforms as transforms
import os
import torch.quantization
import torch.quantization._numeric_suite as ns
from torch.quantization import (
    default_eval_fn,
    default_qconfig,
    quantize,
)

class LSTMModel(nn.Module):
    """Container module with an encoder, a recurrent module, and a decoder."""

    def __init__(self, ntoken, ninp, nhid, nlayers, dropout=0.5):
        super(LSTMModel, self).__init__()
        self.encoder = nn.Embedding(ntoken, ninp)
        self.rnn = nn.LSTM(ninp, nhid, nlayers, dropout=dropout)
        self.decoder = nn.Linear(nhid, ntoken)

        self.init_weights()

        self.nhid = nhid
        self.nlayers = nlayers

    def init_weights(self):
        initrange = 0.1
        self.encoder.weight.data.uniform_(-initrange, initrange)
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self, input, hidden):
        emb = self.encoder(input)
        output, hidden = self.rnn(emb, hidden)
        decoded = self.decoder(output)
        return decoded, hidden

    def init_hidden(self, bsz):
        weight = next(self.parameters())
        return (weight.new_zeros(self.nlayers, bsz, self.nhid),
                weight.new_zeros(self.nlayers, bsz, self.nhid))

ntokens = 10

float_model = LSTMModel(
    ntoken = ntokens,
    ninp = 512,
    nhid = 256,
    nlayers = 5,
)

float_model.eval()
print(float_model)
qmodel = torch.quantization.quantize_dynamic(
    float_model, {nn.LSTM, nn.Linear}, dtype=torch.qint8
)
print(qmodel)

权重对比

def compute_error(x,y):
    return torch.nn.MSELoss(reduction="mean")(torch.norm(x), torch.norm(y))


wt_compare_dict = ns.compare_weights(float_model.state_dict(), qmodel.state_dict())
for key in wt_compare_dict:
    if wt_compare_dict[key]['quantized'].is_quantized:
        print(key, compute_error(wt_compare_dict[key]['float'], wt_compare_dict[key]['quantized'].dequantize()))
    else:
        print(key, compute_error(wt_compare_dict[key]['float'], wt_compare_dict[key]['quantized']))

特定位置对比

def compute_error(x,y):
    return torch.nn.MSELoss(reduction="mean")(torch.norm(x), torch.norm(y))


input_ = torch.randint(ntokens, (1, 1), dtype=torch.long)
hidden = float_model.init_hidden(1)

act_compare_dict = ns.compare_model_outputs(float_model, qmodel, input_, hidden)

for key in act_compare_dict:
    print(key, compute_error(act_compare_dict[key]['float'][0][0], act_compare_dict[key]['quantized'][0][0]))

模块debug

def compute_error(x,y):
    return torch.nn.MSELoss(reduction="mean")(torch.norm(x), torch.norm(y))


input_ = torch.randint(ntokens, (1, 1), dtype=torch.long)
hidden = float_model.init_hidden(1)

module_swap_list = [nn.Linear, nn.LSTM]
ob_dict = ns.compare_model_stub(float_model, qmodel, module_swap_list, input_, hidden)
for key in ob_dict:
    print(key, compute_error(ob_dict[key]['float'][0], ob_dict[key]['quantized'][0]))

接口解读
接口：https://pytorch.org/docs/stable/torch.ao.ns._numeric_suite.html#
量化常见错误
6.1 RuntimeError: Could not run ‘quantized::some_operator’ with arguments from the ‘CPU’ backend…
将非量化的内核正在进行转换为量化的内核。发生报错。
一个常见的解决方法是使用torch.ao.quantization。DeQuantSprub用于对张量进行去量化。

class Net(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.quant = torch.ao.quantization.QuantStub()
        self.conv1 = torch.nn.Conv2d(1, 1, 1)
        # this module will not be quantized (see `qconfig = None` logic below)
        self.conv2 = torch.nn.Conv2d(1, 1, 1)
        self.dequant = torch.ao.quantization.DeQuantStub()

    def forward(self, x):
        # during the convert step, this will be replaced with a
        # `quantize_per_tensor` call
        x = self.quant(x)   #量化
        x = self.conv1(x)
        # during the convert step, this will be replaced with a
        # `dequantize` call
        x = self.conv2(x)
        x = self.dequant(x)  #去量化
        return x

net = Net()

net.qconfig = quant.get_default_qconfig('fbgemm')
quant.prepare(net, inplace=True)
def print_size_of_model(model):
    torch.save(model.state_dict(), "temp.p")
    print('Size (MB):', os.path.getsize("temp.p") / 1e6)
    os.remove('temp.p')
print_size_of_model(net) # Size (MB): 2.221929

net(torch.randn(4, 1, 4, 4))
net1=quant.convert(net, inplace=False)
print_size_of_model(net) # Size (MB): 0.56854
print_size_of_model(net1)

print(net1(torch.randn(4, 1, 4, 4)))
print(net1)

6.2 量化内核转换为非量化内核
RuntimeError: Could not run ‘aten::thnn_conv2d_forward’ with arguments from the ‘QuantizedCPU’ backend.
需要手动去除量化 torch.ao.quantization.DeQuantStub()
参考：
https://developer.horizon.ai/api/v1/fileData/horizon_j5_open_explorer_v1_1_33_cn_doc/plugin/source/tutorials/quant_profiler.html#id5
https://pytorch.org/tutorials/prototype/numeric_suite_tutorial.html#compare-the-weights-of-float-and-quantized-models
https://pytorch.org/docs/stable/quantization-accuracy-debugging.html