三、重要特性
(1)级的概念
每分割一次,称为一级运算
设FFT运算点数为N,共有M级运算,则它们满足:
其中,每一级运算的标识为m=0,1,2,……,M-1
(2)蝶形单元
FFT计算结构由若干个蝶形运算单元组成,每个运算单元示意图如下:
蝶形单元的输入输出满足:
其中,
可得:每一个蝶形单元运算时,进行了1次乘法和2次加法
每一级中,均有N/2个蝶形单元。
故完成一次FFT所需要的乘法次数和加法次数分别为:
、
(3)组的概念
每一级N/2个蝶形单元可分为若干个组,每一组有着相同的结构与
因子分布。
例如m=0时,可以分为N/2=4组(这里N=8)
m=1时,可以分为N/4=2组
m=M-1时,只能分为1组
(4)
因子分布
因子存在于m级,
其中
在8点FFT第二级运算中,即m=1,蝶形运算因子可以化简为
(5)码位倒置
对于N=8点的FFT计算中,
X(0), X(1), X(2), X(3), X(4), X(5), X(6), X(7)
第一次分解为:
X(0), X(2), X(4), X(6), X(1), X(3), X(5), X(7)
第二次分解为:
X(0), X(4), X(2), X(6), X(1), X(5), X(3), X(7)
第三次分解为:
X(0), X(4), X(2), X(6), X(1), X(5), X(3), X(7)
为了在编程中易于实现该过程,可令X(0)~X(7)
对应的2进制码为:
X(000), X(001), X(010), X(011), X(100), X(101), X(110), X(111)
将其位置的2进制码进行翻转:
X(000), X(100), X(010), X(110), X(001), X(101), X(011), X(111)
此时位置对应的10进制为:
X(0), X(4), X(2), X(6), X(1), X(5), X(3), X(7)
恰好对应FFT第一级输入数据的顺序。
Verilog代码:
butterfly.v
module butterfly(
input clk,
input rstn,
input en,
input signed [23:0] xp_real, // Xm(p)
input signed [23:0] xp_imag,
input signed [23:0] xq_real, // Xm(q)
input signed [23:0] xq_imag,
input signed [15:0] factor_real, // Wnr
input signed [15:0] factor_imag,
output valid,
output signed [23:0] yp_real, //Xm+1(p)
output signed [23:0] yp_imag,
output signed [23:0] yq_real, //Xm+1(q)
output signed [23:0] yq_imag
);
reg [4:0] en_r ;
always @(posedge clk or negedge rstn) begin
if (!rstn) begin
en_r <= 'b0 ;
end
else begin
en_r <= {en_r[3:0], en} ;
end
end
//=====================================================//
//(1.0) Xm(q) mutiply and Xm(p) delay
reg signed [39:0] xq_wnr_real0;
reg signed [39:0] xq_wnr_real1;
reg signed [39:0] xq_wnr_imag0;
reg signed [39:0] xq_wnr_imag1;
reg signed [39:0] xp_real_d;
reg signed [39:0] xp_imag_d;
always @(posedge clk or negedge rstn) begin
if (!rstn) begin
xp_real_d <= 'b0;
xp_imag_d <= 'b0;
xq_wnr_real0 <= 'b0;
xq_wnr_real1 <= 'b0;
xq_wnr_imag0 <= 'b0;
xq_wnr_imag1 <= 'b0;
end
else if (en) begin
xq_wnr_real0 <= xq_real * factor_real;
xq_wnr_real1 <= xq_imag * factor_imag;
xq_wnr_imag0 <= xq_real * factor_imag;
xq_wnr_imag1 <= xq_imag * factor_real;
//expanding 8192 times as Wnr
xp_real_d <= {{4{xp_real[23]}}, xp_real[22:0], 13'b0};
xp_imag_d <= {{4{xp_imag[23]}}, xp_imag[22:0], 13'b0};
end
end
//(1.1) get Xm(q) mutiplied-results and Xm(p) delay again
reg signed [39:0] xp_real_d1;
reg signed [39:0] xp_imag_d1;
reg signed [39:0] xq_wnr_real;
reg signed [39:0] xq_wnr_imag;
always @(posedge clk or negedge rstn) begin
if (!rstn) begin
xp_real_d1 <= 'b0;
xp_imag_d1 <= 'b0;
xq_wnr_real <= 'b0 ;
xq_wnr_imag <= 'b0 ;
end
else if (en_r[0]) begin
xp_real_d1 <= xp_real_d;
xp_imag_d1 <= xp_imag_d;
//提前设置好位宽余量,防止数据溢出
xq_wnr_real <= xq_wnr_real0 - xq_wnr_real1 ;
xq_wnr_imag <= xq_wnr_imag0 + xq_wnr_imag1 ;
end
end
//======================================================//
//(2.0) butter results
reg signed [39:0] yp_real_r;
reg signed [39:0] yp_imag_r;
reg signed [39:0] yq_real_r;
reg signed [39:0] yq_imag_r;
always @(posedge clk or negedge rstn) begin
if (!rstn) begin
yp_real_r <= 'b0;
yp_imag_r <= 'b0;
yq_real_r <= 'b0;
yq_imag_r <= 'b0;
end
else if (en_r[1]) begin
yp_real_r <= xp_real_d1 + xq_wnr_real;
yp_imag_r <= xp_imag_d1 + xq_wnr_imag;
yq_real_r <= xp_real_d1 - xq_wnr_real;
yq_imag_r <= xp_imag_d1 - xq_wnr_imag;
end
end
//(3) discard the low 13bits because of Wnr
assign yp_real = {yp_real_r[39], yp_real_r[13+23:13]};
assign yp_imag = {yp_imag_r[39], yp_imag_r[13+23:13]};
assign yq_real = {yq_real_r[39], yq_real_r[13+23:13]};
assign yq_imag = {yq_imag_r[39], yq_imag_r[13+23:13]};
assign valid = en_r[2];
endmodule
fft8.v
//矩阵信号xm_real(xm_imag)的一维、二维地址是代表级和组的标识
`timescale 1ns/100ps
module fft8(
input clk,
input rstn,
input en,
input signed [23:0] x0_real,
input signed [23:0] x0_imag,
input signed [23:0] x1_real,
input signed [23:0] x1_imag,
input signed [23:0] x2_real,
input signed [23:0] x2_imag,
input signed [23:0] x3_real,
input signed [23:0] x3_imag,
input signed [23:0] x4_real,
input signed [23:0] x4_imag,
input signed [23:0] x5_real,
input signed [23:0] x5_imag,
input signed [23:0] x6_real,
input signed [23:0] x6_imag,
input signed [23:0] x7_real,
input signed [23:0] x7_imag,
output valid,
input signed [23:0] y0_real,
input signed [23:0] y0_imag,
input signed [23:0] y1_real,
input signed [23:0] y1_imag,
input signed [23:0] y2_real,
input signed [23:0] y2_imag,
input signed [23:0] y3_real,
input signed [23:0] y3_imag,
input signed [23:0] y4_real,
input signed [23:0] y4_imag,
input signed [23:0] y5_real,
input signed [23:0] y5_imag,
input signed [23:0] y6_real,
input signed [23:0] y6_imag,
input signed [23:0] y7_real,
input signed [23:0] y7_imag
);
wire signed [23:0] xm_real [3:0] [7:0];
wire signed [23:0] xm_imag [3:0] [7:0];
wire en_connect [15:0] ;
assign en_connect[0] = en;
assign en_connect[1] = en;
assign en_connect[2] = en;
assign en_connect[3] = en;
//旋转因子,每个单元的旋转因子量化为16位定点数,
//在小数的基础上乘以0x2000然后取整得到
wire signed [15:0] factor_real [3:0] ;
wire signed [15:0] factor_imag [3:0];
assign factor_real[0] = 16'h2000; //1
assign factor_imag[0] = 16'h0000; //0
assign factor_real[1] = 16'h16a0; //sqrt(2)/2,再将小数部分乘以0x2000(16进制)
//最后的结果用16进制表示
assign factor_imag[1] = 16'he95f; //-sqrt(2)/2
assign factor_real[2] = 16'h0000; //0
assign factor_imag[2] = 16'he000; //-1
assign factor_real[3] = 16'he95f; //-sqrt(2)/2
assign factor_imag[3] = 16'he95f; //-sqrt(2)/2
//输入初始化,和码位倒置有关
assign xm_real[0][0] = x0_real;
assign xm_real[0][1] = x4_real;
assign xm_real[0][2] = x2_real;
assign xm_real[0][3] = x6_real;
assign xm_real[0][4] = x1_real;
assign xm_real[0][5] = x5_real;
assign xm_real[0][6] = x3_real;
assign xm_real[0][7] = x7_real;
assign xm_imag[0][0] = x0_imag;
assign xm_imag[0][1] = x4_imag;
assign xm_imag[0][2] = x2_imag;
assign xm_imag[0][3] = x6_imag;
assign xm_imag[0][4] = x1_imag;
assign xm_imag[0][5] = x5_imag;
assign xm_imag[0][6] = x3_imag;
assign xm_imag[0][7] = x7_imag;
//蝶形单元计算
genvar m,k;
generate
//3级
for (m=0;m<=2;m=m+1) begin: stage
for (k=0;k<=3 ;k=k+1 ) begin: unit //每一级有4个蝶形单元
butterfly u_butter(
.clk (clk),
.rstn (rstn),
.en (en_connect[m*4+k]),
//是否在组内,是:组编号+组内编号,不是:下组编号+新组内编号
.xp_real (xm_real[ m ] [k[m:0] < (1<<m) ?
(k[3:m] << (m+1)) + k[m:0] :
(k[3:m] << (m+1)) + (k[m:0]-(1<<m))] ),
.xp_imag (xm_imag[ m ] [k[m:0] < (1<<m) ?
(k[3:m] << (m+1)) + k[m:0] :
(k[3:m] << (m+1)) + (k[m:0]-(1<<m))] ),
.xq_real (xm_real[ m ] [(k[m:0] < (1<<m) ?
(k[3:m] << (m+1)) + k[m:0] :
(k[3:m] << (m+1)) + (k[m:0]-(1<<m))) + (1<<m) ]), //增加蝶形单元两个输入端口间距离
.xq_imag (xm_imag[ m ] [(k[m:0] < (1<<m) ?
(k[3:m] << (m+1)) + k[m:0] :
(k[3:m] << (m+1)) + (k[m:0]-(1<<m))) + (1<<m) ]),
.factor_real(factor_real[k[m:0]<(1<<m)?
k[m:0] : k[m:0]-(1<<m) ]),
.factor_imag(factor_imag[k[m:0]<(1<<m)?
k[m:0] : k[m:0]-(1<<m) ]),
//output data
.valid (en_connect[ (m+1)*4 + k ] ),
.yp_real (xm_real[ m+1 ][k[m:0] < (1<<m) ?
(k[3:m] << (m+1)) + k[m:0] :
(k[3:m] << (m+1)) + (k[m:0]-(1<<m))] ),
.yp_imag (xm_imag[ m+1 ][(k[m:0]) < (1<<m) ?
(k[3:m] << (m+1)) + k[m:0] :
(k[3:m] << (m+1)) + (k[m:0]-(1<<m))] ),
.yq_real (xm_real[ m+1 ][(k[m:0] < (1<<m) ?
(k[3:m] << (m+1)) + k[m:0] :
(k[3:m] << (m+1)) + (k[m:0]-(1<<m))) + (1<<m) ]),
.yq_imag (xm_imag[ m+1 ][((k[m:0]) < (1<<m) ?
(k[3:m] << (m+1)) + k[m:0] :
(k[3:m] << (m+1)) + (k[m:0]-(1<<m))) + (1<<m) ])
);
end
end
endgenerate
assign valid = en_connect[12];
assign y0_real = xm_real[3][0] ;
assign y0_imag = xm_imag[3][0] ;
assign y1_real = xm_real[3][1] ;
assign y1_imag = xm_imag[3][1] ;
assign y2_real = xm_real[3][2] ;
assign y2_imag = xm_imag[3][2] ;
assign y3_real = xm_real[3][3] ;
assign y3_imag = xm_imag[3][3] ;
assign y4_real = xm_real[3][4] ;
assign y4_imag = xm_imag[3][4] ;
assign y5_real = xm_real[3][5] ;
assign y5_imag = xm_imag[3][5] ;
assign y6_real = xm_real[3][6] ;
assign y6_imag = xm_imag[3][6] ;
assign y7_real = xm_real[3][7] ;
assign y7_imag = xm_imag[3][7] ;
endmodule
testbench
`timescale 1ns/100ps
module test ;
reg clk;
reg rstn;
reg en ;
reg signed [23:0] x0_real;
reg signed [23:0] x0_imag;
reg signed [23:0] x1_real;
reg signed [23:0] x1_imag;
reg signed [23:0] x2_real;
reg signed [23:0] x2_imag;
reg signed [23:0] x3_real;
reg signed [23:0] x3_imag;
reg signed [23:0] x4_real;
reg signed [23:0] x4_imag;
reg signed [23:0] x5_real;
reg signed [23:0] x5_imag;
reg signed [23:0] x6_real;
reg signed [23:0] x6_imag;
reg signed [23:0] x7_real;
reg signed [23:0] x7_imag;
wire valid;
wire signed [23:0] y0_real;
wire signed [23:0] y0_imag;
wire signed [23:0] y1_real;
wire signed [23:0] y1_imag;
wire signed [23:0] y2_real;
wire signed [23:0] y2_imag;
wire signed [23:0] y3_real;
wire signed [23:0] y3_imag;
wire signed [23:0] y4_real;
wire signed [23:0] y4_imag;
wire signed [23:0] y5_real;
wire signed [23:0] y5_imag;
wire signed [23:0] y6_real;
wire signed [23:0] y6_imag;
wire signed [23:0] y7_real;
wire signed [23:0] y7_imag;
initial begin
clk = 0; //50MHz
rstn = 0 ;
#10 rstn = 1;
forever begin
#10 clk = ~clk; //50MHz
end
end
fft8 u_fft (
.clk (clk ),
.rstn (rstn ),
.en (en ),
.x0_real (x0_real),
.x0_imag (x0_imag),
.x1_real (x1_real),
.x1_imag (x1_imag),
.x2_real (x2_real),
.x2_imag (x2_imag),
.x3_real (x3_real),
.x3_imag (x3_imag),
.x4_real (x4_real),
.x4_imag (x4_imag),
.x5_real (x5_real),
.x5_imag (x5_imag),
.x6_real (x6_real),
.x6_imag (x6_imag),
.x7_real (x7_real),
.x7_imag (x7_imag),
.valid (valid),
.y0_real (y0_real),
.y0_imag (y0_imag),
.y1_real (y1_real),
.y1_imag (y1_imag),
.y2_real (y2_real),
.y2_imag (y2_imag),
.y3_real (y3_real),
.y3_imag (y3_imag),
.y4_real (y4_real),
.y4_imag (y4_imag),
.y5_real (y5_real),
.y5_imag (y5_imag),
.y6_real (y6_real),
.y6_imag (y6_imag),
.y7_real (y7_real),
.y7_imag (y7_imag));
//data input
initial begin
en = 0 ;
x0_real = 24'd10;
x1_real = 24'd20;
x2_real = 24'd30;
x3_real = 24'd40;
x4_real = 24'd10;
x5_real = 24'd20;
x6_real = 24'd30;
x7_real = 24'd40;
x0_imag = 24'd0;
x1_imag = 24'd0;
x2_imag = 24'd0;
x3_imag = 24'd0;
x4_imag = 24'd0;
x5_imag = 24'd0;
x6_imag = 24'd0;
x7_imag = 24'd0;
@(negedge clk) ;
en = 1 ;
forever begin
@(negedge clk) ;
x0_real = (x0_real > 22'h3F_ffff) ? 'b0 : x0_real + 1 ;
x1_real = (x1_real > 22'h3F_ffff) ? 'b0 : x1_real + 1 ;
x2_real = (x2_real > 22'h3F_ffff) ? 'b0 : x2_real + 31 ;
x3_real = (x3_real > 22'h3F_ffff) ? 'b0 : x3_real + 1 ;
x4_real = (x4_real > 22'h3F_ffff) ? 'b0 : x4_real + 23 ;
x5_real = (x5_real > 22'h3F_ffff) ? 'b0 : x5_real + 1 ;
x6_real = (x6_real > 22'h3F_ffff) ? 'b0 : x6_real + 6 ;
x7_real = (x7_real > 22'h3F_ffff) ? 'b0 : x7_real + 1 ;
x0_imag = (x0_imag > 22'h3F_ffff) ? 'b0 : x0_imag + 2 ;
x1_imag = (x1_imag > 22'h3F_ffff) ? 'b0 : x1_imag + 5 ;
x2_imag = (x2_imag > 22'h3F_ffff) ? 'b0 : x2_imag + 3 ;
x3_imag = (x3_imag > 22'h3F_ffff) ? 'b0 : x3_imag + 6 ;
x4_imag = (x4_imag > 22'h3F_ffff) ? 'b0 : x4_imag + 4 ;
x5_imag = (x5_imag > 22'h3F_ffff) ? 'b0 : x5_imag + 8 ;
x6_imag = (x6_imag > 22'h3F_ffff) ? 'b0 : x6_imag + 11 ;
x7_imag = (x7_imag > 22'h3F_ffff) ? 'b0 : x7_imag + 7 ;
end
end
//finish simulation
initial #1000 $finish ;
endmodule
仿真截图:
参考资料: