使用python模拟全连接层的8位和16位定点量化计算

KangKangLoveCat

已于 2022-10-26 16:25:46 修改

阅读量667

点赞数

文章标签： python numpy 机器学习

于 2022-09-11 22:49:29 首次发布

本文链接：https://blog.csdn.net/qq_36105823/article/details/126810822

版权

import math
import numpy as np


# 余弦相似度
def cos_sim(x1, x2):
    return np.sum(x1*x2)/math.sqrt(np.sum(x1*x1)*np.sum(x2*x2))


# 对称的定点量化
def fixed_quant(x, bw=8):
    psize = (1 << (bw - 1)) - 1 # 颗粒度
    scale = max(np.max(x), -np.min(x))/psize
    xq = x/scale
    xq = np.clip(xq, -psize, psize)
    xq = np.round(xq)
    if bw == 8:
        xq = xq.astype(np.int8)
    elif bw == 16:
        xq = xq.astype(np.int16)
    else:
        print("ERROR: unsupported bw=%d" % (bw))
        exit(-1)
    return (xq, scale, 0)


# 非对称的定点量化
def ufixed_quant(x, bw=8):
    psize = (1 << bw) - 1  # 颗粒度
    scale = np.max(np.max(x) - np.min(x))/psize
    offset = -int(round(np.min(x)/scale))
    xq = x/scale + offset
    xq = np.clip(xq, 0., psize)
    xq = np.round(xq)
    if bw == 8:
        xq = xq.astype(np.uint8)
    elif bw == 16:
        xq = xq.astype(np.uint16)
    else:
        print("ERROR: unsupported bw=%d" % (bw))
        exit(-1)
    return (xq, scale, offset)


def quant(x, scale, offset, bw=8):
    psize = (1 << bw) - 1  # 颗粒度
    xq = x/scale + offset
    xq = np.clip(xq, 0., psize)
    xq = np.round(xq)
    if bw == 8:
        xq = xq.astype(np.uint8)
    elif bw == 16:
        xq = xq.astype(np.uint16)
    else:
        print("ERROR: unsupported bw=%d" % (bw))
        exit(-1)
    return xq


# 反量化
def dequant(xq, scale, offset):
    return (xq.astype(np.int32) - offset)*scale


# 全连接层
class FullConnect(object):
    def __init__(self, weight, bias):
        self.weight = weight
        self.bias = bias

    def __call__(self, x):
        return x.dot(self.weight) + self.bias


# 量化的全连接层
class FullConnectQuant(object):
    def __init__(self, weight, bias, act_bw=8):
        if act_bw != 8 and act_bw != 16:
            print("ERROR: unsupported act_bw=%d" % (act_bw))
            exit(-1)
        self.weight = weight
        self.bias = bias
        self.act_bw = act_bw

    # 使用量化输入x对模型量化
    def optimize(self, x):
        y = x.dot(self.weight) + self.bias

        self.qw, sw, _ = fixed_quant(self.weight, bw=8)
        qx, self.sx, self.ox = ufixed_quant(x, bw=self.act_bw)
        qy, self.sy, self.oy = ufixed_quant(y, bw=self.act_bw)
        self.qw = self.qw.astype(np.int32)

        self.scale = sw*self.sx/self.sy
        self.offset = np.ones((1, self.weight.shape[0]), dtype=np.int32)*(0 - self.ox)
        self.offset = self.offset.dot(self.qw)
        self.offset = self.offset + np.round(self.bias/self.sy/self.scale + self.oy/self.scale).astype(np.int32)

    def __call__(self, quant_x):
        psize = (1 << self.act_bw) - 1

        out = quant_x.dot(self.qw) + self.offset
        out = out*self.scale
        out = np.clip(out, 0, psize)
        out = np.round(out)
        if self.act_bw == 8:
            out = out.astype(np.uint8)
        else:
            out = out.astype(np.uint16)
        return out

    def get_input_quant_param(self):
        return (self.sx, self.ox)

    def get_output_quant_param(self):
        return (self.sy, self.oy)


def main():
    input_num = 128
    output_num = 256
    act_bw = 8 # 8 or 16

    W = np.random.rand(input_num, output_num)*2 - 1
    bias = np.random.rand(1, output_num)

    fcq = FullConnectQuant(W, bias, act_bw=act_bw)

    # 使用100个输入量化模型
    x = np.random.rand(100, input_num)*3 - 1
    fcq.optimize(x)
    sx, ox = fcq.get_input_quant_param()
    sy, oy = fcq.get_output_quant_param()

    # 使用10个输入进行测试
    x = np.random.rand(1, input_num)*3 - 1

    fc = FullConnect(W, bias)
    y1 = fc(x)

    xq = quant(x, sx, ox, bw=act_bw)
    yq = fcq(xq)
    y2 = dequant(yq, sy, oy)

    print("cos sim:", cos_sim(y1, y2))


if __name__ == "__main__":
    main()