import math
import numpy as np
# 余弦相似度
def cos_sim(x1, x2):
return np.sum(x1*x2)/math.sqrt(np.sum(x1*x1)*np.sum(x2*x2))
# 对称的定点量化
def fixed_quant(x, bw=8):
psize = (1 << (bw - 1)) - 1 # 颗粒度
scale = max(np.max(x), -np.min(x))/psize
xq = x/scale
xq = np.clip(xq, -psize, psize)
xq = np.round(xq)
if bw == 8:
xq = xq.astype(np.int8)
elif bw == 16:
xq = xq.astype(np.int16)
else:
print("ERROR: unsupported bw=%d" % (bw))
exit(-1)
return (xq, scale, 0)
# 非对称的定点量化
def ufixed_quant(x, bw=8):
psize = (1 << bw) - 1 # 颗粒度
scale = np.max(np.max(x) - np.min(x))/psize
offset = -int(round(np.min(x)/scale))
xq = x/scale + offset
xq = np.clip(xq, 0., psize)
xq = np.round(xq)
if bw == 8:
xq = xq.astype(np.uint8)
elif bw == 16:
xq = xq.astype(np.uint16)
else:
print("ERROR: unsupported bw=%d" % (bw))
exit(-1)
return (xq, scale, offset)
def quant(x, scale, offset, bw=8):
psize = (1 << bw) - 1 # 颗粒度
xq = x/scale + offset
xq = np.clip(xq, 0., psize)
xq = np.round(xq)
if bw == 8:
xq = xq.astype(np.uint8)
elif bw == 16:
xq = xq.astype(np.uint16)
else:
print("ERROR: unsupported bw=%d" % (bw))
exit(-1)
return xq
# 反量化
def dequant(xq, scale, offset):
return (xq.astype(np.int32) - offset)*scale
# 全连接层
class FullConnect(object):
def __init__(self, weight, bias):
self.weight = weight
self.bias = bias
def __call__(self, x):
return x.dot(self.weight) + self.bias
# 量化的全连接层
class FullConnectQuant(object):
def __init__(self, weight, bias, act_bw=8):
if act_bw != 8 and act_bw != 16:
print("ERROR: unsupported act_bw=%d" % (act_bw))
exit(-1)
self.weight = weight
self.bias = bias
self.act_bw = act_bw
# 使用量化输入x对模型量化
def optimize(self, x):
y = x.dot(self.weight) + self.bias
self.qw, sw, _ = fixed_quant(self.weight, bw=8)
qx, self.sx, self.ox = ufixed_quant(x, bw=self.act_bw)
qy, self.sy, self.oy = ufixed_quant(y, bw=self.act_bw)
self.qw = self.qw.astype(np.int32)
self.scale = sw*self.sx/self.sy
self.offset = np.ones((1, self.weight.shape[0]), dtype=np.int32)*(0 - self.ox)
self.offset = self.offset.dot(self.qw)
self.offset = self.offset + np.round(self.bias/self.sy/self.scale + self.oy/self.scale).astype(np.int32)
def __call__(self, quant_x):
psize = (1 << self.act_bw) - 1
out = quant_x.dot(self.qw) + self.offset
out = out*self.scale
out = np.clip(out, 0, psize)
out = np.round(out)
if self.act_bw == 8:
out = out.astype(np.uint8)
else:
out = out.astype(np.uint16)
return out
def get_input_quant_param(self):
return (self.sx, self.ox)
def get_output_quant_param(self):
return (self.sy, self.oy)
def main():
input_num = 128
output_num = 256
act_bw = 8 # 8 or 16
W = np.random.rand(input_num, output_num)*2 - 1
bias = np.random.rand(1, output_num)
fcq = FullConnectQuant(W, bias, act_bw=act_bw)
# 使用100个输入量化模型
x = np.random.rand(100, input_num)*3 - 1
fcq.optimize(x)
sx, ox = fcq.get_input_quant_param()
sy, oy = fcq.get_output_quant_param()
# 使用10个输入进行测试
x = np.random.rand(1, input_num)*3 - 1
fc = FullConnect(W, bias)
y1 = fc(x)
xq = quant(x, sx, ox, bw=act_bw)
yq = fcq(xq)
y2 = dequant(yq, sy, oy)
print("cos sim:", cos_sim(y1, y2))
if __name__ == "__main__":
main()
使用python模拟全连接层的8位和16位定点量化计算
于 2022-09-11 22:49:29 首次发布