在设计中加入定点运算,用于解决FPGA对小数位的处理。
学习文章链接(源代码):(https://thedatabus.io/introduction).
1.实现原理
通过设置整数位数和小数位数实现定点运算。
在有符号二进制中第一位为符号位
正数以原码保存
负数以补码保存
有几种符号可以正确表示定点数的各种参数。最流行的一种可能是 A(a,b) 格式,其中 a 是用于表示数字的整数部分的位数,b 是用于表示数字的小数部分的位数。这意味着用于存储 A(a,b) 不动点数的位总数为 N = a + b + 1。
式中+1表示符号位。
2.Python验证
进入d2l-zh环境:conda activate d2l-zh
运行:jupyter notebook
def float_to_fp(num,integer_precision,fraction_precision): #输入浮点数、整数位数、小数位数
if(num<0):
sign_bit = 1 #num为负,符号位为1,并且将num转为正数进行后续处理
num = -1*num
else:
sign_bit = 0
precision = '0'+ str(integer_precision) + 'b'
integral_part = format(int(num),precision) #format函数,将整数部分转化为固定位数的二进制数:例如 03b格式
fractional_part_f = num - int(num) #提取小数位
fractional_part = [] #用于存储二进制数
for i in range(fraction_precision): #循环小数位的位数,小数位转二进制思路:将小数*2后提取整数部分作为二进制高位,
d = fractional_part_f*2 #接着继续使用变化后的小数位进行*2,提取整数位
fractional_part_f = d -int(d)
fractional_part.append(int(d)) #将二进制保存
fraction_string = ''.join(str(e) for e in fractional_part) #将小数部分二进制组合处理
if(sign_bit == 1):
binary = str(sign_bit) + twos_comp(integral_part + fraction_string,integer_precision,fraction_precision) #组合二进制,负数需要使用补码
else:
binary = str(sign_bit) + integral_part+fraction_string #正数可以直接组合
return str(binary)
def twos_comp(val,integer_precision,fraction_precision): #补码处理
flipped = ''.join(str(1-int(x))for x in val) #1-intx 取反处理
length = '0' + str(integer_precision+fraction_precision) + 'b'
bin_literal = format((int(flipped,2)+1),length) #int(x,2):将x转变为二进制整数,并且加一。补码规则:取反加一
return bin_literal
#twos_comp('0110',2,2)
#'1010'
def fp_to_float(s,integer_precision,fraction_precision): #s = 输入二进制数
number = 0.0
i = integer_precision - 1
j = 0
if(s[0] == '1'): #负数,转变为补码
s_complemented = twos_comp((s[1:]),integer_precision,fraction_precision)
print(s_complemented)
else:
s_complemented = s[1:] #正数,保留原码
print(s_complemented)
while(j != integer_precision + fraction_precision ): #j表示整数和小数位数和
number += int(s_complemented[j])*(2**i) #对应二进制位数乘以二的阶层,i代表对应的阶层
print(j,number)
i -= 1
j += 1
if(s[0] == '1'): #负数要加负号
return (-1)*number
else:
return number
3.为定点运算设计硬件
这边部分先贴上代码,后续继续分析……
//file: qadd.v
`timescale 1ns / 1ps
module qadd #(
parameter Q = 15,
parameter N = 32
)
(
input [N-1:0] a,
input [N-1:0] b,
output [N-1:0] c
);
// (Q,N) = (12,16) => 1 sign-bit + 3 integer-bits + 12 fractional-bits = 16 total-bits
// |S|III|FFFFFFFFFFFF|
// The same thing in A(I,F) format would be A(3,12)
//Since we supply every negative number in it's 2's complement form by default, all we
//need to do is add these two numbers together (note that to subtract a binary number
//is the same as to add its two's complement)
assign c = a + b;
//If for whatever reason your system (the software/testbench feeding this hadrware with
//inputs) does not supply negative numbers in their 2's complement form,(some people
//prefer to keep the magnitude as it is and make the sign bit '1' to represent negatives)
// then you should take a look at the fixed point arithmetic modules at opencores linked
//above this code.
endmodule
//file: qmult.v
`timescale 1ns / 1ps
// (Q,N) = (12,16) => 1 sign-bit + 3 integer-bits + 12 fractional-bits = 16 total-bits
// |S|III|FFFFFFFFFFFF|
// The same thing in A(I,F) format would be A(3,12)
module qmult #(
//Parameterized values
parameter Q = 12,
parameter N = 16
)
(
input [N-1:0] a,
input [N-1:0] b,
output [N-1:0] q_result, //output quantized to same number of bits as the input
output overflow //signal to indicate output greater than the range of our format
);
// The underlying assumption, here, is that both fixed-point values are of the same length (N,Q)
// Because of this, the results will be of length N+N = 2N bits
// This also simplifies the hand-back of results, as the binimal point
// will always be in the same location
wire [2*N-1:0] f_result; // Multiplication by 2 values of N bits requires a
// register that is N+N = 2N deep
wire [N-1:0] multiplicand;
wire [N-1:0] multiplier;
wire [N-1:0] a_2cmp, b_2cmp;
wire [N-2:0] quantized_result,quantized_result_2cmp;
assign a_2cmp = {a[N-1],{(N-1){1'b1}} - a[N-2:0]+ 1'b1}; //2's complement of a
assign b_2cmp = {b[N-1],{(N-1){1'b1}} - b[N-2:0]+ 1'b1}; //2's complement of b
assign multiplicand = (a[N-1]) ? a_2cmp : a;
assign multiplier = (b[N-1]) ? b_2cmp : b;
assign q_result[N-1] = a[N-1]^b[N-1]; //Sign bit of output would be XOR or input sign bits
assign f_result = multiplicand[N-2:0] * multiplier[N-2:0]; //We remove the sign bit for multiplication
assign quantized_result = f_result[N-2+Q:Q]; //Quantization of output to required number of bits
assign quantized_result_2cmp = {(N-1){1'b1}} - quantized_result[N-2:0] + 1'b1; //2's complement of quantized_result
assign q_result[N-2:0] = (q_result[N-1]) ? quantized_result_2cmp : quantized_result; //If the result is negative, we return a 2's complement representation
//of the output value
assign overflow = (f_result[2*N-2:N-1+Q] > 0) ? 1'b1 : 1'b0;
endmodule