前言
曾经用过这些浮点运算模块,很好用,但是原模块是基于仿顺序结构,然后我将其进行了流水化处理,转化的过程中还是还是遇到一些很值得记录的内容,包括在编码指南中提到过的always
块内阻塞赋值带来的问题。
对于没有反馈的算法流程还是非常适合流水化的,相比于仿顺序结构,它节约了不同步骤运行时间差的时间,它之所以能节约时间是在于流水化需要寄存不同步骤间的运算结果,这就是常说的拿资源换速度,其实这里的速度并没有减少算法流程中的步骤,实际上运算一次加法运算,流水化和无流水所用的时钟是一样的,在多次加法运算的情况下,流水压缩步骤等待时间的优势就凸显出来了,不过,流水存在latency(潜伏时间)的问题,不像仿顺序结构能够很方便的使用握手信号。
本篇我主要分享流水化过程遇到的问题
README
原代码片段
else if( Start_Sig )
case( i )
0: // Initial A,B and other reg.
begin
rA <= { A[31], A[30:23], 2'b01, A[22:0], 23'd0 };
rB <= { B[31], B[30:23], 2'b01, B[22:0], 23'd0 };
isOver <= 1'b0; isUnder <= 1'b0; isZero <= 1'b0;
i <= i + 1'b1;
end
1: // if rExp[9..8] is 1, mean A.Exp small than B.Exp
// while rExp[9..8] is 0, mean A.Exp large than B.Exp or same.
begin
rExp = A[30:23] - B[30:23];
if( rExp[8] == 1 ) rExpDiff <= ~rExp[7:0] + 1'b1;
else rExpDiff <= rExp[7:0];
i <= i + 1'b1;
end
2: // if A < B; A.M move and A.E = B.E, else opposite act;
begin
if( rExp[8] == 1 ) begin rA[47:0] <= rA[47:0] >> rExpDiff; rA[55:48] <= rB[55:48]; end
else begin rB[47:0] <= rB[47:0] >> rExpDiff; rB[55:48] <= rA[55:48]; end
i <= i + 1'b1;
end
3: // Modify TempA and TempB. with sign
begin
TempA <= rA[56] ? { rA[56], (~rA[47:0] + 1'b1) } : { rA[56], rA[47:0] };
TempB <= rB[56] ? { rB[56], (~rB[47:0] + 1'b1) } : { rB[56], rB[47:0] };
i <= i + 1'b1;
end
4: // Addition
begin Temp <= TempA + TempB; i <= i + 1'b1; end
5: // modify result
begin
isSign <= Temp[48];
if( Temp[48] == 1'b1) Temp <= ~Temp + 1'b1; // change M be postive
rExp <= {2'b00, rA[55:48]}; // or rB[55:48] , change rExp withbe rA.Exp or rB.Exp
i <= i + 1'b1;
end
6: // Check M'hidden bit and modify to 2'b01
begin
if( Temp[47:46] == 2'b10 || Temp[47:46] == 2'b11) begin Temp <= Temp >> 1; rExp <= rExp + 1'b1; end
else if( Temp[47:46] == 2'b00 && Temp[45] ) begin Temp <= Temp << 1; rExp <= rExp - 5'd1; end
else if( Temp[47:46] == 2'b00 && Temp[44] ) begin Temp <= Temp << 2; rExp <= rExp - 5'd2; end
else if( Temp[47:46] == 2'b00 && Temp[43] ) begin Temp <= Temp << 3; rExp <= rExp - 5'd3; end
else if( Temp[47:46] == 2'b00 && Temp[42] ) begin Temp <= Temp << 4; rExp <= rExp - 5'd4; end
else if( Temp[47:46] == 2'b00 && Temp[41] ) begin Temp <= Temp << 5; rExp <= rExp - 5'd5; end
else if( Temp[47:46] == 2'b00 && Temp[40] ) begin Temp <= Temp << 6; rExp <= rExp - 5'd6; end
else if( Temp[47:46] == 2'b00 && Temp[39] ) begin Temp <= Temp << 7; rExp <= rExp - 5'd7; end
else if( Temp[47:46] == 2'b00 && Temp[38] ) begin Temp <= Temp << 8; rExp <= rExp - 5'd8; end
else if( Temp[47:46] == 2'b00 && Temp[37] ) begin Temp <= Temp << 9; rExp <= rExp - 5'd9; end
else if( Temp[47:46] == 2'b00 && Temp[36] ) begin Temp <= Temp << 10; rExp <= rExp - 5'd10; end
else if( Temp[47:46] == 2'b00 && Temp[35] ) begin Temp <= Temp << 11; rExp <= rExp - 5'd11; end
else if( Temp[47:46] == 2'b00 && Temp[34] ) begin Temp <= Temp << 12; rExp <= rExp - 5'd12; end
else if( Temp[47:46] == 2'b00 && Temp[33] ) begin Temp <= Temp << 13; rExp <= rExp - 5'd13; end
else if( Temp[47:46] == 2'b00 && Temp[32] ) begin Temp <= Temp << 14; rExp <= rExp - 5'd14; end
else if( Temp[47:46] == 2'b00 && Temp[31] ) begin Temp <= Temp << 15; rExp <= rExp - 5'd15; end
else if( Temp[47:46] == 2'b00 && Temp[30] ) begin Temp <= Temp << 16; rExp <= rExp - 5'd16; end
else if( Temp[47:46] == 2'b00 && Temp[29] ) begin Temp <= Temp << 17; rExp <= rExp - 5'd17; end
else if( Temp[47:46] == 2'b00 && Temp[28] ) begin Temp <= Temp << 18; rExp <= rExp - 5'd18; end
else if( Temp[47:46] == 2'b00 && Temp[27] ) begin Temp <= Temp << 19; rExp <= rExp - 5'd19; end
else if( Temp[47:46] == 2'b00 && Temp[26] ) begin Temp <= Temp << 20; rExp <= rExp - 5'd20; end
else if( Temp[47:46] == 2'b00 && Temp[25] ) begin Temp <= Temp << 21; rExp <= rExp - 5'd21; end
else if( Temp[47:46] == 2'b00 && Temp[24] ) begin Temp <= Temp << 22; rExp <= rExp - 5'd22; end
else if( Temp[47:46] == 2'b00 && Temp[23] ) begin Temp <= Temp << 23; rExp <= rExp - 5'd23; end
//else do nothing, can extend the hidden bit check area
i <= i + 1'b1;
end
7: //error check and format result in float format
begin
if( rExp[9:8] == 2'b01 ) begin isOver <= 1'b1; rResult <= {1'b0,8'd127, 23'd0}; end // E Overflow
else if( rExp[9:8] == 2'b11 ) begin isUnder <= 1'b1; rResult <= {1'b0, 8'd127, 23'd0}; end // E Underflow
else if( Temp[46:23] == 24'd0 ) begin isZero <= 1'b1; rResult <= {1'b0, 8'd127, 23'd0}; end // M Zero
else if( Temp[22] == 1'b1 ) rResult <= { isSign, rExp[7:0], Temp[45:23] + 1'b1 }; // okay with normalised
else rResult <= { isSign, rExp[7:0], Temp[45:23] }; // okay without normalise
i <= i + 1'b1;
end
8:
begin isDone <= 1'b1; i <= i + 1'b1; end
9:
begin isDone <= 1'b0; i <= 4'd0; end
endcase
抛开握手的done信号,整个浮点加法算法用到8个步骤,其实步骤0和1是可以简并的,算法细节看看就好,需要强调的是浮点运算遵守IEEE754单精度表示法,即“正值”表示,但尾数的运算是用补码实现的,这里分析一下步骤1,步骤1是为了计算阶码差rExp以及它的补码形式rExpDiff,这里看到他用的是阻塞赋值
2:
...
rExp = A[30:23] - B[30:23];
if( rExp[8] == 1 ) begin rA[47:0] <= rA[47:0] >> rExpDiff; rA[55:48] <= rB[55:48]; end
...
语句将综合为组合逻辑,在理想情况不耗费时间,更不用说占用时钟数,所以,上述代码等价于
...
if( rExp[8] == 1 ) begin rA[47:0] <= rA[47:0] >> rExpDiff; rA[55:48] <= rB[55:48]; end
...
assign rExp = A[30:23] - B[30:23];
在激励下的仿真图,可以看到一次加法运算模块占用10个clock,实际算法步骤占用8个clock,完成7次运算共消耗76个clock
流水化遇到的问题
我保留了原码的内容,通过注释进行了修改,为的更清楚我的修改步骤,展现仿顺序结构和流水之间的差别,重点是,
1、谨记要寄存上一步骤的结果供下一步骤使用,
2、所有步骤都在时钟的节拍下同时运行,没有步骤顺序,只有数据顺序
具体代码如下
else if( Start_Sig )begin //修改为流水结构
//case( i )
/第一步,规范化A、B,计算阶码差,并寄存结果
与下一步骤相关的信号有rA rB rExp rExpDiff
//0: // Initial A,B and other reg.
//begin
rA[0] <= { A[31], A[30:23], 2'b01, A[22:0], 23'd0 };
rB[0] <= { B[31], B[30:23], 2'b01, B[22:0], 23'd0 };
//isOver <= 1'b0; isUnder <= 1'b0; isZero <= 1'b0;
//i <= i + 1'b1;
//end
//1: // if rExp[9..8] is 1, mean A.Exp small than B.Exp
// while rExp[9..8] is 0, mean A.Exp large than B.Exp or same.
//begin
rExp[0] = {2'b0,A[30:23]} - {2'b0,B[30:23]}; //bug 位宽不一致
if( rExp[0][8] == 1 ) rExpDiff[0] <= ~rExp[0][7:0] + 1'b1;
else rExpDiff[0] <= rExp[0][7:0];
//i <= i + 1'b1;
//end
第二步,阶码对齐,并寄存结果
与下一步骤相关的信号有rA rB rExp
//2: // if A < B; A.M move and A.E = B.E, else opposite act;
//begin
if( rExp[0][8] == 1 ) begin
rA[1][47:0] <= rA[0][47:0] >> rExpDiff[0];
rA[1][55:48] <= rB[0][55:48];
rA[1][56] <= rA[0][56];
rB[1] <= rB[0];
end
else begin
rB[1][47:0] <= rB[0][47:0] >> rExpDiff[0];
rB[1][55:48] <= rA[0][55:48];
rB[1][56] <= rB[0][56];
rA[1] <= rA[0];
end
//i <= i + 1'b1;
//end
rExp[1] <= rExp[0];
第三步,变换尾数(包括隐藏位和尾码)为补码,并寄存结果
与下一步骤相关的信号有TempA TempB rExp rA
//3: // Modify TempA and TempB. with sign
//begin
TempA[0] <= rA[1][56] ? { rA[1][56], (~rA[1][47:0] + 1'b1) } : { rA[1][56], rA[1][47:0] };
TempB[0] <= rB[1][56] ? { rB[1][56], (~rB[1][47:0] + 1'b1) } : { rB[1][56], rB[1][47:0] };
//i <= i + 1'b1;
//end
rExp[2] <= rExp[1];
rA[2] <= rA[1];
第四步,尾数以补码形式进行运算,并寄存结果
//4: // Addition
//begin
Temp[0] <= TempA[0] + TempB[0];
//i <= i + 1'b1; end
与下一步骤相关的信号有Temp rExp rA
rExp[3] <= rExp[2];
rA[3] <= rA[2];
第五步,正值化尾数,并寄存结果
与下一步骤相关的信号有Temp rExp isSign
//5: // modify result
//begin
isSign[0] <= Temp[0][48];
if( Temp[0][48] == 1'b1) Temp[1] <= ~Temp[0] + 1'b1; // change M be postive
else Temp[1] <= Temp[0];
rExp[4] <= {2'b00, rA[3][55:48]}; // or rB[55:48] , change rExp withbe rA.Exp or rB.Exp
//i <= i + 1'b1;
//end
第六步,调整进位
寄存Temp rExp isSign
//6: // Check M'hidden bit and modify to 2'b01
//begin
if( Temp[1][47:46] == 2'b10 || Temp[1][47:46] == 2'b11) begin Temp[2] <= Temp[1] >> 1; rExp[5] <= rExp[4] + 1'b1; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][45] ) begin Temp[2] <= Temp[1] << 1; rExp[5] <= rExp[4] - 5'd1; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][44] ) begin Temp[2] <= Temp[1] << 2; rExp[5] <= rExp[4] - 5'd2; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][43] ) begin Temp[2] <= Temp[1] << 3; rExp[5] <= rExp[4] - 5'd3; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][42] ) begin Temp[2] <= Temp[1] << 4; rExp[5] <= rExp[4] - 5'd4; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][41] ) begin Temp[2] <= Temp[1] << 5; rExp[5] <= rExp[4] - 5'd5; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][40] ) begin Temp[2] <= Temp[1] << 6; rExp[5] <= rExp[4] - 5'd6; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][39] ) begin Temp[2] <= Temp[1] << 7; rExp[5] <= rExp[4] - 5'd7; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][38] ) begin Temp[2] <= Temp[1] << 8; rExp[5] <= rExp[4] - 5'd8; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][37] ) begin Temp[2] <= Temp[1] << 9; rExp[5] <= rExp[4] - 5'd9; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][36] ) begin Temp[2] <= Temp[1] << 10; rExp[5] <= rExp[4] - 5'd10; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][35] ) begin Temp[2] <= Temp[1] << 11; rExp[5] <= rExp[4] - 5'd11; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][34] ) begin Temp[2] <= Temp[1] << 12; rExp[5] <= rExp[4] - 5'd12; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][33] ) begin Temp[2] <= Temp[1] << 13; rExp[5] <= rExp[4] - 5'd13; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][32] ) begin Temp[2] <= Temp[1] << 14; rExp[5] <= rExp[4] - 5'd14; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][31] ) begin Temp[2] <= Temp[1] << 15; rExp[5] <= rExp[4] - 5'd15; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][30] ) begin Temp[2] <= Temp[1] << 16; rExp[5] <= rExp[4] - 5'd16; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][29] ) begin Temp[2] <= Temp[1] << 17; rExp[5] <= rExp[4] - 5'd17; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][28] ) begin Temp[2] <= Temp[1] << 18; rExp[5] <= rExp[4] - 5'd18; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][27] ) begin Temp[2] <= Temp[1] << 19; rExp[5] <= rExp[4] - 5'd19; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][26] ) begin Temp[2] <= Temp[1] << 20; rExp[5] <= rExp[4] - 5'd20; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][25] ) begin Temp[2] <= Temp[1] << 21; rExp[5] <= rExp[4] - 5'd21; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][24] ) begin Temp[2] <= Temp[1] << 22; rExp[5] <= rExp[4] - 5'd22; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][23] ) begin Temp[2] <= Temp[1] << 23; rExp[5] <= rExp[4] - 5'd23; end
else begin Temp[2] <= Temp[1]; rExp[5] <= rExp[4];end
//else do nothing, can extend the hidden bit check area
//i <= i + 1'b1;
//end
isSign[1] <= isSign[0];
第7步,输出结果
//7: //error check and format result in float format
//begin
if( rExp[5][9:8] == 2'b01 ) begin isOver <= 1'b1; rResult <= {1'b0,8'd127, 23'd0}; end // E Overflow
else if( rExp[5][9:8] == 2'b11 ) begin isUnder <= 1'b1; rResult <= {1'b0, 8'd127, 23'd0}; end // E Underflow
else if( Temp[2][46:23] == 24'd0 ) begin isZero <= 1'b1; rResult <= {1'b0, 8'd127, 23'd0}; end // M Zero
else if( Temp[2][22] == 1'b1 ) rResult <= { isSign[1], rExp[5][7:0], Temp[2][45:23] + 1'b1 }; // okay with normalised
else rResult <= { isSign[1], rExp[5][7:0], Temp[2][45:23] }; // okay without normalise
i <= i + 1'b1;
//end
//8:
//begin isDone <= 1'b1; i <= i + 1'b1; end
//9:
//begin isDone <= 1'b0; i <= 4'd0; end
//endcase
end
很合理,我一度都要出去玩了,但是,却有一个隐含的坑,先看仿真结果,三个游标处的运算结果不对,而其他结果正确,很吊诡,只好进入submodule进行分析
发现问题所在是rExp信号,rExp[0][9:0]和被寄存的rExp[1][9:0]居然被同时赋值,导致流水数据不对齐导致的计算错误,为什么会出现这种情况呢?
问题就在那个不起眼的阻塞赋值所在的 第一步和第二步
bug所在代码片段,由于Line0是忽略时钟的组合逻辑,所以Line1和Line2中的rExp[0]实际上都是Line0赋值的结果,这本来没什么,但是,Line3中用于寄存步骤2rExp的赋值其实也是Line0,于是失去了就与rA和rB的数据对齐,那么,怎么修改呢?
/第一步,规范化A、B,计算阶码差,并寄存结果
与下一步骤相关的信号有rA rB rExp rExpDiff
//0: // Initial A,B and other reg.
//begin
rA[0] <= { A[31], A[30:23], 2'b01, A[22:0], 23'd0 };
rB[0] <= { B[31], B[30:23], 2'b01, B[22:0], 23'd0 };
//isOver <= 1'b0; isUnder <= 1'b0; isZero <= 1'b0;
//i <= i + 1'b1;
//end
//1: // if rExp[9..8] is 1, mean A.Exp small than B.Exp
// while rExp[9..8] is 0, mean A.Exp large than B.Exp or same.
//begin
Line0 rExp[0] = {2'b0,A[30:23]} - {2'b0,B[30:23]}; //bug 位宽不一致
Line1 if( rExp[0][8] == 1 ) rExpDiff[0] <= ~rExp[0][7:0] + 1'b1;
else rExpDiff[0] <= rExp[0][7:0];
//i <= i + 1'b1;
//end
第二步,阶码对齐,并寄存结果
与下一步骤相关的信号有rA rB rExp
//2: // if A < B; A.M move and A.E = B.E, else opposite act;
//begin
Line2 if( rExp[0][8] == 1 ) begin
rA[1][47:0] <= rA[0][47:0] >> rExpDiff[0];
rA[1][55:48] <= rB[0][55:48];
rA[1][56] <= rA[0][56];
rB[1] <= rB[0];
end
else begin
rB[1][47:0] <= rB[0][47:0] >> rExpDiff[0];
rB[1][55:48] <= rA[0][55:48];
rB[1][56] <= rB[0][56];
rA[1] <= rA[0];
end
//i <= i + 1'b1;
//end
Line3 rExp[1] <= rExp[0];
流水化问题解决
两种方法,一种是修改步骤2中rExp的寄存,再多寄存一拍,保证数据对齐,但我更倾向于将组合逻辑去除,毕竟组合逻辑是不利于时序分析的,修改的代码片段如下:
将原代码的步骤一分开为两部分,现代码步骤0包含rExp[0]的时序寄存,现代码的rExpDiff的计算用上一步寄存的rExp[0],并在对rExp[1]寄存rExp[0]
else if( Start_Sig )begin //修改为流水结构
//case( i )
/第零步,规范化A、B,计算阶差,并寄存结果
与下一步骤相关的信号有rA rB
//0: // Initial A,B and other reg.
//begin
rA[0] <= { A[31], A[30:23], 2'b01, A[22:0], 23'd0 };
rB[0] <= { B[31], B[30:23], 2'b01, B[22:0], 23'd0 };
//isOver <= 1'b0; isUnder <= 1'b0; isZero <= 1'b0;
//i <= i + 1'b1;
//end
//1: // if rExp[9..8] is 1, mean A.Exp small than B.Exp
// while rExp[9..8] is 0, mean A.Exp large than B.Exp or same.
//begin
rExp[0] <= {2'b0,A[30:23]} - {2'b0,B[30:23]}; //bug location
第一步,计算阶码差,并寄存结果
if( rExp[0][8] == 1 ) rExpDiff[0] <= ~rExp[0][7:0] + 1'b1;
else rExpDiff[0] <= rExp[0][7:0];
//i <= i + 1'b1;
//end
rExp[1] <= rExp[0];
rA[1] <= rA[0];
rB[1] <= rB[0];
第二步,阶码对齐,并寄存结果
与下一步骤相关的信号有rA rB rExp
//2: // if A < B; A.M move and A.E = B.E, else opposite act;
//begin
if( rExp[1][8] == 1 ) begin
rA[2][47:0] <= rA[1][47:0] >> rExpDiff[0];
rA[2][55:48] <= rB[1][55:48];
rA[2][56] <= rA[0][56];
rB[2] <= rB[1];
end
else begin
rB[2][47:0] <= rB[1][47:0] >> rExpDiff[0];
rB[2][55:48] <= rA[1][55:48];
rB[2][56] <= rB[1][56];
rA[2] <= rA[1];
end
//i <= i + 1'b1;
//end
rExp[2] <= rExp[1];
第三步,变换尾数(包括隐藏位和尾码)为补码,并寄存结果
与下一步骤相关的信号有TempA TempB rExp rA
//3: // Modify TempA and TempB. with sign
//begin
TempA[0] <= rA[2][56] ? { rA[2][56], (~rA[2][47:0] + 1'b1) } : { rA[2][56], rA[2][47:0] };
TempB[0] <= rB[2][56] ? { rB[2][56], (~rB[2][47:0] + 1'b1) } : { rB[2][56], rB[2][47:0] };
//i <= i + 1'b1;
//end
rExp[3] <= rExp[2];
rA[3] <= rA[2];
第四步,尾数以补码形式进行运算,并寄存结果
//4: // Addition
//begin
Temp[0] <= TempA[0] + TempB[0];
//i <= i + 1'b1; end
与下一步骤相关的信号有Temp rExp rA
rExp[4] <= rExp[3];
rA[4] <= rA[3];
第五步,正值化尾数,并寄存结果
与下一步骤相关的信号有Temp rExp isSign
//5: // modify result
//begin
isSign[0] <= Temp[0][48];
if( Temp[0][48] == 1'b1) Temp[1] <= ~Temp[0] + 1'b1; // change M be postive
else Temp[1] <= Temp[0];
rExp[5] <= {2'b00, rA[4][55:48]}; // or rB[55:48] , change rExp withbe rA.Exp or rB.Exp
//i <= i + 1'b1;
//end
第六步,调整进位
寄存Temp rExp isSign
//6: // Check M'hidden bit and modify to 2'b01
//begin
if( Temp[1][47:46] == 2'b10 || Temp[1][47:46] == 2'b11) begin Temp[2] <= Temp[1] >> 1; rExp[6] <= rExp[5] + 1'b1; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][45] ) begin Temp[2] <= Temp[1] << 1; rExp[6] <= rExp[5] - 5'd1; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][44] ) begin Temp[2] <= Temp[1] << 2; rExp[6] <= rExp[5] - 5'd2; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][43] ) begin Temp[2] <= Temp[1] << 3; rExp[6] <= rExp[5] - 5'd3; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][42] ) begin Temp[2] <= Temp[1] << 4; rExp[6] <= rExp[5] - 5'd4; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][41] ) begin Temp[2] <= Temp[1] << 5; rExp[6] <= rExp[5] - 5'd5; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][40] ) begin Temp[2] <= Temp[1] << 6; rExp[6] <= rExp[5] - 5'd6; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][39] ) begin Temp[2] <= Temp[1] << 7; rExp[6] <= rExp[5] - 5'd7; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][38] ) begin Temp[2] <= Temp[1] << 8; rExp[6] <= rExp[5] - 5'd8; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][37] ) begin Temp[2] <= Temp[1] << 9; rExp[6] <= rExp[5] - 5'd9; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][36] ) begin Temp[2] <= Temp[1] << 10; rExp[6] <= rExp[5] - 5'd10; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][35] ) begin Temp[2] <= Temp[1] << 11; rExp[6] <= rExp[5] - 5'd11; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][34] ) begin Temp[2] <= Temp[1] << 12; rExp[6] <= rExp[5] - 5'd12; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][33] ) begin Temp[2] <= Temp[1] << 13; rExp[6] <= rExp[5] - 5'd13; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][32] ) begin Temp[2] <= Temp[1] << 14; rExp[6] <= rExp[5] - 5'd14; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][31] ) begin Temp[2] <= Temp[1] << 15; rExp[6] <= rExp[5] - 5'd15; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][30] ) begin Temp[2] <= Temp[1] << 16; rExp[6] <= rExp[5] - 5'd16; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][29] ) begin Temp[2] <= Temp[1] << 17; rExp[6] <= rExp[5] - 5'd17; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][28] ) begin Temp[2] <= Temp[1] << 18; rExp[6] <= rExp[5] - 5'd18; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][27] ) begin Temp[2] <= Temp[1] << 19; rExp[6] <= rExp[5] - 5'd19; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][26] ) begin Temp[2] <= Temp[1] << 20; rExp[6] <= rExp[5] - 5'd20; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][25] ) begin Temp[2] <= Temp[1] << 21; rExp[6] <= rExp[5] - 5'd21; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][24] ) begin Temp[2] <= Temp[1] << 22; rExp[6] <= rExp[5] - 5'd22; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][23] ) begin Temp[2] <= Temp[1] << 23; rExp[6] <= rExp[5] - 5'd23; end
else begin Temp[2] <= Temp[1]; rExp[6] <= rExp[5];end
//else do nothing, can extend the hidden bit check area
//i <= i + 1'b1;
//end
isSign[1] <= isSign[0];
第7步,输出结果
//7: //error check and format result in float format
//begin
if( rExp[6][9:8] == 2'b01 ) begin isOver <= 1'b1; rResult <= {1'b0,8'd127, 23'd0}; end // E Overflow
else if( rExp[6][9:8] == 2'b11 ) begin isUnder <= 1'b1; rResult <= {1'b0, 8'd127, 23'd0}; end // E Underflow
else if( Temp[2][46:23] == 24'd0 ) begin isZero <= 1'b1; rResult <= {1'b0, 8'd127, 23'd0}; end // M Zero
else if( Temp[2][22] == 1'b1 ) rResult <= { isSign[1], rExp[6][7:0], Temp[2][45:23] + 1'b1 }; // okay with normalised
else rResult <= { isSign[1], rExp[6][7:0], Temp[2][45:23] }; // okay without normalise
i <= i + 1'b1;
//end
//8:
//begin isDone <= 1'b1; i <= i + 1'b1; end
//9:
//begin isDone <= 1'b0; i <= 4'd0; end
//endcase
end
仿真分析,结果对了,可以看到总共用了14个clock,其中潜伏期8个clock,跟之前分析保持一致了
附件 完整功能和测试代码
float_add_pip_module
在这里插入代码片`timescale 1ns / 1ps
//
// Company:
// Engineer:
//
// Create Date: 2019/02/17 10:54:31
// Design Name:
// Module Name: float_add_module
// Project Name:
// Target Devices:
// Tool Versions:
// Description:
//
// Dependencies:
//
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
//
//
module float_add_pip_module
(
input CLK, RSTn,
input [31:0]A,B, //正值表示 IEEE754单精度
output [31:0]Result, //正值表示 IEEE754单精度
input Start_Sig, //模块使能
output [3:0]Done_Sig, //溢出、0值、完成标识
/*****************/ //调试
output [56:0]SQ_rA,SQ_rB,
output [48:0]SQ_Temp,SQ_TempA,SQ_TempB,
output [9:0]SQ_rExp,
output [7:0]SQ_rExpDiff
);
/**************************************/
reg [3:0]i;
reg [56:0]rA[4:0],rB[2:0]; // [56]Sign, [55:48]Exponent, [47:46]Hidden Bit, [45:23]Mantissa [22:0]M'Backup
reg [48:0]Temp[2:0]; // [48]M'sign, [47:46]Hidden Bit, [45:23]M, [22:0]M'Backup
reg [48:0]TempA[1:0],TempB[1:0]; //[48]M'sign, [47:46]Hidden Bit, [45:23]M, [22:0]M'Backup
reg [31:0]rResult;
reg [9:0]rExp[6:0]; //[9:8] Overflow or underflow check, [7:0] usuall exp.
reg [7:0]rExpDiff[1:0]; //Different between A.Exp and B.Exp
reg isSign[1:0];
reg isOver; // exp overflow error feedback
reg isUnder; // exp underflow error feedback
reg isZero; // m zero error feedback
reg isDone;
always @ ( posedge CLK or negedge RSTn )
if( !RSTn )
begin
/*
i <= 4'd0;
rA[0] <= 57'd0;
rB[0] <= 57'd0;
TempA[0] <= 49'd0;
TempB[0] <= 49'd0;
Temp[0] <= 49'd0;
rResult <= 32'd0;
rExp[0] <= 10'd0;
rExpDiff[0] <= 8'd0;
*/
isOver <= 1'b0;
isUnder <= 1'b0;
isZero <= 1'b0;
isDone <= 1'b0;
end
else if( Start_Sig )begin //修改为流水结构
//case( i )
/第零步,规范化A、B,计算阶差,并寄存结果
与下一步骤相关的信号有rA rB
//0: // Initial A,B and other reg.
//begin
rA[0] <= { A[31], A[30:23], 2'b01, A[22:0], 23'd0 };
rB[0] <= { B[31], B[30:23], 2'b01, B[22:0], 23'd0 };
//isOver <= 1'b0; isUnder <= 1'b0; isZero <= 1'b0;
//i <= i + 1'b1;
//end
//1: // if rExp[9..8] is 1, mean A.Exp small than B.Exp
// while rExp[9..8] is 0, mean A.Exp large than B.Exp or same.
//begin
rExp[0] <= {2'b0,A[30:23]} - {2'b0,B[30:23]}; //bug location
第一步,计算阶码差,并寄存结果
if( rExp[0][8] == 1 ) rExpDiff[0] <= ~rExp[0][7:0] + 1'b1;
else rExpDiff[0] <= rExp[0][7:0];
//i <= i + 1'b1;
//end
rExp[1] <= rExp[0];
rA[1] <= rA[0];
rB[1] <= rB[0];
第二步,阶码对齐,并寄存结果
与下一步骤相关的信号有rA rB rExp
//2: // if A < B; A.M move and A.E = B.E, else opposite act;
//begin
if( rExp[1][8] == 1 ) begin
rA[2][47:0] <= rA[1][47:0] >> rExpDiff[0];
rA[2][55:48] <= rB[1][55:48];
rA[2][56] <= rA[0][56];
rB[2] <= rB[1];
end
else begin
rB[2][47:0] <= rB[1][47:0] >> rExpDiff[0];
rB[2][55:48] <= rA[1][55:48];
rB[2][56] <= rB[1][56];
rA[2] <= rA[1];
end
//i <= i + 1'b1;
//end
rExp[2] <= rExp[1];
第三步,变换尾数(包括隐藏位和尾码)为补码,并寄存结果
与下一步骤相关的信号有TempA TempB rExp rA
//3: // Modify TempA and TempB. with sign
//begin
TempA[0] <= rA[2][56] ? { rA[2][56], (~rA[2][47:0] + 1'b1) } : { rA[2][56], rA[2][47:0] };
TempB[0] <= rB[2][56] ? { rB[2][56], (~rB[2][47:0] + 1'b1) } : { rB[2][56], rB[2][47:0] };
//i <= i + 1'b1;
//end
rExp[3] <= rExp[2];
rA[3] <= rA[2];
第四步,尾数以补码形式进行运算,并寄存结果
//4: // Addition
//begin
Temp[0] <= TempA[0] + TempB[0];
//i <= i + 1'b1; end
与下一步骤相关的信号有Temp rExp rA
rExp[4] <= rExp[3];
rA[4] <= rA[3];
第五步,正值化尾数,并寄存结果
与下一步骤相关的信号有Temp rExp isSign
//5: // modify result
//begin
isSign[0] <= Temp[0][48];
if( Temp[0][48] == 1'b1) Temp[1] <= ~Temp[0] + 1'b1; // change M be postive
else Temp[1] <= Temp[0];
rExp[5] <= {2'b00, rA[4][55:48]}; // or rB[55:48] , change rExp withbe rA.Exp or rB.Exp
//i <= i + 1'b1;
//end
第六步,调整进位
寄存Temp rExp isSign
//6: // Check M'hidden bit and modify to 2'b01
//begin
if( Temp[1][47:46] == 2'b10 || Temp[1][47:46] == 2'b11) begin Temp[2] <= Temp[1] >> 1; rExp[6] <= rExp[5] + 1'b1; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][45] ) begin Temp[2] <= Temp[1] << 1; rExp[6] <= rExp[5] - 5'd1; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][44] ) begin Temp[2] <= Temp[1] << 2; rExp[6] <= rExp[5] - 5'd2; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][43] ) begin Temp[2] <= Temp[1] << 3; rExp[6] <= rExp[5] - 5'd3; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][42] ) begin Temp[2] <= Temp[1] << 4; rExp[6] <= rExp[5] - 5'd4; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][41] ) begin Temp[2] <= Temp[1] << 5; rExp[6] <= rExp[5] - 5'd5; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][40] ) begin Temp[2] <= Temp[1] << 6; rExp[6] <= rExp[5] - 5'd6; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][39] ) begin Temp[2] <= Temp[1] << 7; rExp[6] <= rExp[5] - 5'd7; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][38] ) begin Temp[2] <= Temp[1] << 8; rExp[6] <= rExp[5] - 5'd8; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][37] ) begin Temp[2] <= Temp[1] << 9; rExp[6] <= rExp[5] - 5'd9; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][36] ) begin Temp[2] <= Temp[1] << 10; rExp[6] <= rExp[5] - 5'd10; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][35] ) begin Temp[2] <= Temp[1] << 11; rExp[6] <= rExp[5] - 5'd11; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][34] ) begin Temp[2] <= Temp[1] << 12; rExp[6] <= rExp[5] - 5'd12; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][33] ) begin Temp[2] <= Temp[1] << 13; rExp[6] <= rExp[5] - 5'd13; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][32] ) begin Temp[2] <= Temp[1] << 14; rExp[6] <= rExp[5] - 5'd14; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][31] ) begin Temp[2] <= Temp[1] << 15; rExp[6] <= rExp[5] - 5'd15; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][30] ) begin Temp[2] <= Temp[1] << 16; rExp[6] <= rExp[5] - 5'd16; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][29] ) begin Temp[2] <= Temp[1] << 17; rExp[6] <= rExp[5] - 5'd17; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][28] ) begin Temp[2] <= Temp[1] << 18; rExp[6] <= rExp[5] - 5'd18; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][27] ) begin Temp[2] <= Temp[1] << 19; rExp[6] <= rExp[5] - 5'd19; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][26] ) begin Temp[2] <= Temp[1] << 20; rExp[6] <= rExp[5] - 5'd20; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][25] ) begin Temp[2] <= Temp[1] << 21; rExp[6] <= rExp[5] - 5'd21; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][24] ) begin Temp[2] <= Temp[1] << 22; rExp[6] <= rExp[5] - 5'd22; end
else if( Temp[1][47:46] == 2'b00 && Temp[1][23] ) begin Temp[2] <= Temp[1] << 23; rExp[6] <= rExp[5] - 5'd23; end
else begin Temp[2] <= Temp[1]; rExp[6] <= rExp[5];end
//else do nothing, can extend the hidden bit check area
//i <= i + 1'b1;
//end
isSign[1] <= isSign[0];
第7步,输出结果
//7: //error check and format result in float format
//begin
if( rExp[6][9:8] == 2'b01 ) begin isOver <= 1'b1; rResult <= {1'b0,8'd127, 23'd0}; end // E Overflow
else if( rExp[6][9:8] == 2'b11 ) begin isUnder <= 1'b1; rResult <= {1'b0, 8'd127, 23'd0}; end // E Underflow
else if( Temp[2][46:23] == 24'd0 ) begin isZero <= 1'b1; rResult <= {1'b0, 8'd127, 23'd0}; end // M Zero
else if( Temp[2][22] == 1'b1 ) rResult <= { isSign[1], rExp[6][7:0], Temp[2][45:23] + 1'b1 }; // okay with normalised
else rResult <= { isSign[1], rExp[6][7:0], Temp[2][45:23] }; // okay without normalise
i <= i + 1'b1;
//end
//8:
//begin isDone <= 1'b1; i <= i + 1'b1; end
//9:
//begin isDone <= 1'b0; i <= 4'd0; end
//endcase
end
/**************************************/
assign Done_Sig = { isOver, isUnder, isZero, 1'b1 };
assign Result = rResult;
/***************************************/
assign SQ_rA = rA[3];
assign SQ_rB = rB[1];
assign SQ_Temp = Temp[2];
assign SQ_TempA = TempA[0];
assign SQ_TempB = TempB[0];
assign SQ_rExp = rExp[6];
assign SQ_rExpDiff = rExpDiff[0];
/****************************************/
endmodule
testbench
`timescale 1ns / 1ps
//
// Company:
// Engineer:
//
// Create Date: 2019/02/17 10:54:31
// Design Name:
// Module Name: float_add_pip_module_tb
// Project Name:
// Target Devices:
// Tool Versions:
// Description:
//
// Dependencies:
//
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
//
//
module float_add_pip_module_tb();
reg CLK;
reg RSTn;
reg Start_Sig;
reg [31:0] A;
reg [31:0] B;
wire [3:0] Done_Sig;
wire [31:0] Result;
/*******************/
wire [56:0]SQ_rA,SQ_rB;
wire [48:0]SQ_Temp,SQ_TempA,SQ_TempB;
wire [9:0]SQ_rExp;
wire [7:0]SQ_rExpDiff;
/**********************************/
float_add_pip_module U1
(
.CLK( CLK ),
.RSTn( RSTn ),
.A( A ),
.B( B ),
.Result( Result ),
.Start_Sig( Start_Sig ),
.Done_Sig( Done_Sig ),
.SQ_rA(SQ_rA),
.SQ_rB(SQ_rB),
.SQ_Temp( SQ_Temp ),
.SQ_TempA( SQ_TempA ),
.SQ_TempB( SQ_TempB ),
.SQ_rExp( SQ_rExp ),
.SQ_rExpDiff( SQ_rExpDiff )
);
/***********************************/
initial
begin
RSTn = 0; #10 RSTn = 1;
CLK = 0; forever #5 CLK = ~CLK;
end
/***********************************/
reg [3:0]i;
always @ ( posedge CLK or negedge RSTn )
if( !RSTn )
begin
A <= 32'd0;
B <= 32'd0;
Start_Sig <= 1'b0;
i <= 4'd0;
end
else
case( i )
0: //A=3.65, B= -7.4, A+B = ?
//if( Done_Sig[0] ) begin Start_Sig <= 1'b0; i <= i + 1'b1; $display("%b",Result); end
//else
begin
A <= 32'b0_10000000_11010011001100110011010;
B <= 32'b1_10000001_11011001100110011001101;
Start_Sig <= 1'b1;
i <= i+1'b1;
end
1: //Exp undeflow check
//if( Done_Sig[0] ) begin Start_Sig <= 1'b0; i <= i + 1'b1; end
//else
begin
A <= 32'b0_00000000_01010000101101000101101;
B <= 32'b1_00000000_00010000101100001000111;
Start_Sig <= 1'b1;
i <= i+1'b1;
end
2: //A=1.9999997, B=-1.9999998 , A+B =?
//if( Done_Sig[0] ) begin Start_Sig <= 1'b0; i <= i + 1'b1; $display("%b",Result); end
//else
begin
A <= 32'b0_01111111_11111111111111111111110;
B <= 32'b1_01111111_11111111111111111111111;
Start_Sig <= 1'b1;
i <= i+1'b1;
end
3: //Exp Overflow
//if( Done_Sig[0] ) begin Start_Sig <= 1'b0; i <= i + 1'b1; end
//else
begin
A <= 32'b0_11111111_11111111111111111111111;
B <= 32'b0_11111111_11111111111111111111111;
Start_Sig <= 1'b1;
i <= i+1'b1;
end
4: //A= -12.558, B= -7.309 , A+B =?
//if( Done_Sig[0] ) begin Start_Sig <= 1'b0; i <= i + 1'b1; $display("%b",Result); end
//else
begin
A <= 32'b11000001010010001110110110010001;
B <= 32'b11000000111010011110001101010100;
Start_Sig <= 1'b1;
i <= i+1'b1;
end
5: //A= 111.7762, B= 302.4409 , A+B =?
//if( Done_Sig[0] ) begin Start_Sig <= 1'b0; i <= i + 1'b1; $display("%b",Result); end
//else
begin
A <= 32'b01000010110111111000110101101010;
B <= 32'b01000011100101110011100001101111;
Start_Sig <= 1'b1;
i <= i+1'b1;
end
6: //A= 2112.2012, B= -2002.2012 , A+B =?
//if( Done_Sig[0] ) begin Start_Sig <= 1'b0; i <= i + 1'b1; $display("%b",Result); end
//else
begin
A <= 32'b01000101000001000000001100111000;
B <= 32'b11000100111110100100011001110000;
Start_Sig <= 1'b1;
i <= i+1'b1;
end
7:
i <= i;
endcase
endmodule