算法原理:
参考《基于FPGA的数字信号处理 [高亚军]》,文中给出的是10进制的计算过程,在verilog中使用2进制更方便,也避免引入缺乏控制的D×10乘法
约定:
- 先确定商和除数都为 n bit,则由乘法规律可知被除数为 2n bit: 除法位宽比较复杂,如果先确定被除数和除数分别为2n、n bit,则商有可能超过n bit
- 设求第 i 位商
q
i
q_i
qi迭代时的余数为
r
i
r_i
ri,除数为D: 则有:
r i = r i + 1 − q i D 2 i r_i = r_{i+1} - q_iD2^i ri=ri+1−qiD2i
其中 i 在迭代时由n - 1递减至0,可求出商的所有位
结构:
外部模块div_restoring计算每一轮迭代的 q i D 2 i q_iD2^i qiD2i值,并传递给下级模块div_iter,2进制算法中 q i q_i qi只取0或1,故每轮迭代只需在div_iter中比较1次即可获得正确的商和当前余数
module div_restoring #(
parameter N = 7
)(
input [N+N-1:0] dividend,
input [N-1:0] divisor,
output reg [N-1:0] quotient,
output reg [N-1:0] remainder,
input clk,
input rstn,
input I_valid,
output O_valid
);
reg [N+N-1:0] r_i, div_coe;
reg [N-1:0] i_cnt; // 迭代位数cnt,从N到1
wire [N+N-1:0] r_o;
wire q_o;
wire o_valid_t;
reg i_valid_t, O_validReg;
assign O_valid = O_validReg;
always @(posedge clk or negedge rstn) begin
if (!rstn) begin
r_i <= 'b0;
div_coe <= 'b0;
i_cnt <= N;
i_valid_t <= 1'b0;
O_validReg <= 'b0;
end
else if (I_valid && i_cnt == N) begin // 迭代初始化
r_i <= dividend;
div_coe <= divisor << N-1; // 计算迭代式中的系数,2为基数,相乘等于左移
i_cnt <= i_cnt - 1;
i_valid_t <= 1'b1;
end
else if (i_cnt < N && i_cnt > 0 && o_valid_t) begin // 迭代
quotient[i_cnt] <= q_o;
r_i <= r_o;
div_coe <= divisor << i_cnt - 1;
i_cnt <= i_cnt - 1;
i_valid_t <= 1'b1;
end
else if (i_cnt == 0 && o_valid_t) begin
quotient[i_cnt] <= q_o;
remainder <= r_o;
i_valid_t <= 1'b0;
O_validReg <= 'b1;
end
else begin
i_valid_t <= 1'b0;
O_validReg <= 'b0;
end
end
div_iter #(.N(N))
div_iter_u(
.clk(clk),
.rstn(rstn),
.I_valid(i_valid_t),
.r_i(r_i),
.div_coe(div_coe),
.r_o(r_o),
.q_o(q_o),
.O_valid(o_valid_t)
);
endmodule
// 复用模块,计算迭代式的正负,据此给出第i位商和当前余数
module div_iter #(
parameter N = 7
) (
input [N+N-1:0] r_i,
input [N+N-1:0] div_coe,
output reg [N+N-1:0] r_o,
output reg q_o,
input clk,
input rstn,
input I_valid,
output reg O_valid
);
reg [N+N-1:0] r_Reg1, r_Reg2;
reg calc_valid;
always @(posedge clk or negedge rstn) begin
if (!rstn) begin
r_Reg1 <= 'b0;
r_Reg2 <= 'b0;
O_valid <= 1'b0;
calc_valid <= 1'b0;
r_o <= 'b0;
q_o <= 'b0;
end
else if (I_valid && calc_valid == 0) begin
r_Reg1 <= r_i;
r_Reg2 <= r_i - div_coe; // 迭代式中的相减
calc_valid <= 1'b1;
O_valid <= 1'b0;
end
else if (calc_valid) begin
q_o <= ~r_Reg2[N+N-1];
r_o <= r_Reg2[N+N-1]? r_Reg1 : r_Reg2; // 根据符号位判断本轮余数取值
O_valid <= 1'b1;
calc_valid <= 1'b0;
end
else begin
r_Reg1 <= 'b0;
r_Reg2 <= 'b0;
O_valid <= 'b0;
calc_valid <= 1'b0;
r_o <= 'b0;
q_o <= 'b0;
end
end
endmodule
testbench: 参考菜鸟教程
module div_restoring_tb();
parameter N = 9;
reg clk, rstn;
//clock
always begin
clk = 0 ; #5 ;
clk = 1 ; #5 ;
end
//reset
initial begin
rstn = 1'b0 ;
#8 ; rstn = 1'b1 ;
end
reg [N+N-1:0] dividend;
reg [N-1:0] divisor;
wire [N-1:0] quotient, remainder;
reg I_valid;
wire O_valid;
// 用task完成多次赋值被除数、除数
task div_data_in;
input [N+N-1:0] dividend_task;
input [N-1:0] divisor_task;
begin
wait (!div_restoring_tb.div_restoring_u.O_valid); // 非输出状态
rstn = 1'b0 ;
@(negedge clk); I_valid = 1'b1;
dividend = dividend_task;
divisor = divisor_task;
rstn = 1'b1 ;
@(negedge clk); I_valid = 1'b0;
wait (div_restoring_tb.div_restoring_u.O_valid);
end
endtask
initial begin
#55 ;
div_data_in(25, 5 ) ;
div_data_in(3927, 15 ) ;
div_data_in(5454, 300 ) ;
div_data_in(215, 9) ;
end
div_restoring #(.N(N))
div_restoring_u(
.clk(clk),
.rstn(rstn),
.I_valid(I_valid),
.dividend(dividend),
.divisor(divisor),
.quotient(quotient),
.remainder(remainder),
.O_valid(O_valid)
);
//simulation finish
initial begin
forever begin
#100;
if ($time >= 10000) $finish ;
end
end
endmodule