引言
经过前面一段时间的锻炼和积累,是时候做一个稍微大一点的project了。本小节就以fft256为例,整体感觉一下基于openrisc的SOC的开发过程。
1,整体思想
1>以fft256为运算核心,linux驱动将待计算的fft数据写入RAM,
2>告知mkg_core进行reset 和start,
3>mkg_core控制master,通过DMA形式,读取ram中的数据,
4>送给fft256,
5>然后获得fft256的运算结果,
6>写回ram,
7>最后驱动读取计算结果并打印。
注意:
这次的RAM是自己实现的一块独立的专用的RAM,不是使用的外部的DDR SDRAM。
2,模块的划分和接口定义
2.1 模块划分
整个project共4个叶子module:mkg_wb_ram,mkg_wb_slave,mkg_core,FFT256,mkg_wb_master.2.2 接口定义,手动用visio绘图:
1>mkg_wb_ram模块2>mkg_wb_slave模块
3>mkg_core模块
4>FFT256模块
5>mkg_wb_master模块
3,mkg_wb_ram模块的fsm设计,rtl编码,和仿真
1>mkg_wb_ram的fsm设计
2>mkg_wb_ram的rtl编码
/*
*
* rill create 2013-04-18
* rillzhen@gmail.com
*
*/
//`include "mkg_defines.v"
module mkg_wb_ram
(
wb_clk,
wb_rst,
wb_dat_i,
wb_adr_i,
wb_sel_i,
wb_cti_i,
wb_bte_i,
wb_we_i,
wb_cyc_i,
wb_stb_i,
wb_dat_o,
wb_ack_o,
wb_err_o,
wb_rty_o
);
input wb_clk;
input wb_rst;
input [31:0] wb_adr_i;
input wb_stb_i;
input wb_cyc_i;
input [2:0] wb_cti_i;
input [1:0] wb_bte_i;
input [31:0] wb_dat_i;
input [3:0] wb_sel_i;
input wb_we_i;
output reg [31:0] wb_dat_o;
output reg wb_ack_o;
output reg wb_err_o;
output reg wb_rty_o;
parameter nb=10;
wire [15:0] D_I,D_R;
wire [nb-1:0] DR,DI;
wire [7:0] ADDR;
assign DR=(D_R[15]&&(nb!=16))? (D_R[15:15-nb+1]+1) : D_R[15:15-nb+1];
assign DI=(D_I[15]&&(nb!=16))? (D_I[15:15-nb+1]+1) : D_I[15:15-nb+1];
Wave_ROM256 my_rom
(.ADDR(ADDR),
.DATA_RE(D_R),
.DATA_IM(D_I)
);
assign ADDR=wb_adr_i[9:2];
parameter my_ram_adr=8'h98;
parameter data_adr_start=32'h9800_0000;
parameter data_adr_end=32'h9800_03fc;
parameter rslt_adr_start=32'h9800_0100;
parameter rslt_adr_end=32'h9801_03fc;
parameter error_code=32'habcd_dcba;
parameter Numb=256;
parameter Idle=5'b00001;
parameter Read_Data=5'b00010;
parameter Read_Rslt=5'b00100;
parameter Write_Data=5'b01000;
parameter Write_Rslt=5'b10000;
reg [31:0] Data [Numb-1:0];
reg [31:0] Result [Numb-1:0];
reg [4:0] state,next_state;
always @(posedge wb_clk)
begin
if(wb_rst)
begin
state<=Idle;
end
else
begin
state<=next_state;
end
end
always @(*)
begin
case(state)
Idle: begin
if(wb_stb_i && wb_cyc_i && !wb_we_i && wb_adr_i >=data_adr_start && wb_adr_i<=data_adr_end)
begin
next_state=Read_Data;
end
else if(wb_stb_i && wb_cyc_i && !wb_we_i && wb_adr_i >=rslt_adr_start && wb_adr_i<=rslt_adr_end)
begin
next_state=Read_Rslt;
end
else if(wb_stb_i && wb_cyc_i && wb_we_i && wb_adr_i >=data_adr_start && wb_adr_i<=data_adr_end)
begin
next_state=Write_Data;
end
else if(wb_stb_i && wb_cyc_i && wb_we_i && wb_adr_i >=rslt_adr_start && wb_adr_i<=rslt_adr_end)
begin
next_state=Write_Rslt;
end
else
begin
next_state=Idle;
end
end
Write_Data:begin
next_state=Idle;
end
Write_Rslt:begin
next_state=Idle;
end
Read_Data: begin
next_state=Idle;
end
Read_Rslt: begin
next_state=Idle;
end
default: begin
next_state=Idle;
end
endcase
end
always @(posedge wb_clk)
begin
if(wb_rst)
begin
wb_dat_o<=0;
wb_ack_o<=0;
wb_err_o<=0;
wb_rty_o<=0;
end
else
begin
case(next_state)
Idle:
begin
wb_dat_o<=0;
wb_ack_o<=0;
wb_err_o<=0;
wb_rty_o<=0;
end
Write_Data:
begin
Data[wb_adr_i[9:2]]<=wb_dat_i;
wb_ack_o<=1'b1;
end
Write_Rslt:
begin
Result[wb_adr_i[9:2]]<=wb_dat_i;
wb_ack_o<=1'b1;
end
Read_Data:
begin
// wb_dat_o<=Data[wb_adr_i[9:2]];
wb_dat_o<= {6'b0,DR,6'b0,DI};
wb_ack_o<=1'b1;
end
Read_Rslt:
begin
wb_dat_o<=Result[wb_adr_i[9:2]];
wb_ack_o<=1'b1;
end
default:
begin
wb_dat_o<=0;
wb_ack_o<=0;
wb_err_o<=0;
wb_rty_o<=0;
end
endcase
end
end
endmodule
/************** EOF ****************/
3>mkg_wb_ram仿真
4,mkg_wb_slave模块的fsm设计,rtl编码,和仿真
1>mkg_wb_slave的fsm设计
ram的reset timing:
ram的work timing:
2>mkg_wb_slave的rtl编码
/*
*
* mkg_wb_slave.v
*
* rill create 2013-04-18
*
*/
//`include "mkg_defines.v"
module mkg_wb_slave
(
wb_clk,
wb_rst,
wb_dat_i,
wb_adr_i,
wb_sel_i,
wb_cti_i,
wb_bte_i,
wb_we_i,
wb_cyc_i,
wb_stb_i,
wb_dat_o,
wb_ack_o,
wb_err_o,
wb_rty_o,
//internal signals
status_i,
reset_o,
enable_o,
config_o
);
input wb_clk;
input wb_rst;
input [31:0] wb_adr_i;
input wb_stb_i;
input wb_cyc_i;
input [2:0] wb_cti_i;
input [1:0] wb_bte_i;
input [31:0] wb_dat_i;
input [3:0] wb_sel_i;
input wb_we_i;
output reg [31:0] wb_dat_o;
output reg wb_ack_o;
output wb_err_o;
output wb_rty_o;
//internal signals
input [31:0] status_i;
output reg reset_o;
output reg enable_o;
output reg [31:0] config_o;
//==local defines
parameter s_idle = 3'b000;
parameter s_read = 3'b001;
parameter s_write = 3'b010;
parameter s_config = 3'b100;
reg [2:0] state = s_idle;
//reset status
parameter s_config1 = 3'b000;
parameter s_config2 = 3'b001;
parameter s_config3 = 3'b010;
parameter s_config4 = 3'b100;
reg [2:0] config_status = s_config1;
reg [31:0] reg_config;//index:0x0,write
reg [31:0] reg_status;//index:0x4,read
//==loacl logic
assign wb_err_o=0;
assign wb_rty_o=0;
always @(posedge wb_clk)//get core status
begin
if(wb_rst)
begin
reg_status <= 32'h0;
end
else
begin
reg_status <= status_i;
end
end
always @(posedge wb_clk)//wishbine interface & core config logic
begin
if(wb_rst)
begin
state <= s_idle;
reset_o <= 1'b0;
enable_o <= 1'b0;
config_o <= 1'b0;
reg_config <= 32'h0;
end
else
begin
case(state)
s_idle:
begin
task_idle();
end
s_read:
begin
task_read();
end
s_write:
begin
task_write();
end
s_config:
begin
task_config();
end
default:
begin
state <= s_idle;
end
endcase
end
end
task automatic task_idle;
begin
wb_dat_o <= 1'b0;//output initial
wb_ack_o <= 1'b0;
reset_o <= 1'b0;
enable_o <= 1'b0;
config_o <= 1'b0;
if(wb_stb_i && wb_cyc_i && wb_we_i)
begin
state <= s_write;
end
else if(wb_stb_i && wb_cyc_i && !wb_we_i)
begin
state <= s_read;
end
else
begin
state <= s_idle;
end
end
endtask
task automatic task_read;
begin
wb_dat_o <= reg_status;
wb_ack_o <= 1'b1;
state <= s_idle;
end
endtask
task automatic task_write;
begin
case (wb_adr_i[8:0])
9'h0:
begin
reg_config <= wb_dat_i;//32'h1;
wb_ack_o <= 1'b1;
state <= s_config;
end
default:
begin
wb_ack_o <= 1'b1;
state <= s_idle;
end
endcase
end
endtask
task automatic task_config;
begin
if(32'h1 == reg_config)//reset mkg_core
begin
enable_o <= 1'b1;
reset_o <= 1'b1;
case (config_status)
s_config1:
begin
config_status <= s_config2;
end
s_config2:
begin
config_status <= s_config3;
end
s_config3:
begin
config_status <= s_config4;
end
s_config4:
begin
config_status <= s_config1;
state <= s_idle;
end
endcase
end
else
begin
enable_o <= 1'b1;
config_o <= reg_config;
state <= s_idle;
end
end
endtask
endmodule
/************** EOF ****************/
3>mkg_wb_slave仿真
reset仿真:
config仿真:
5,mkg_core模块的fsm设计,rtl编码,和仿真及和mkg_wb_slave的联合仿真
1>mkg_core的fsm设计
有点复杂,请阅读rtl code。
2>mkg_core的rtl编码
/*
*
* mkg_core.v
*
* rill create 2013-04-18
*
*/
//`include "mkg_defines.v"
module mkg_core
(
clk,
rst,
s_enable_i,
s_config_i,
s_status_o,
fft_ovf1_i,
fft_ovf2_i,
fft_ready_i,
fft_addr_i,
fft_dor_i,
fft_doi_i,
fft_en_o,
fft_start_o,
fft_shift_o,
fft_dr_o,
fft_di_o,
m_ack_write_i,
m_ack_read_i,
m_dat_i,
m_write_o,
m_read_o,
m_addr_o,
m_dat_o
);
input clk;
input rst;
input s_enable_i;
input [31:0] s_config_i;
output reg [31:0] s_status_o;
input fft_ovf1_i;
input fft_ovf2_i;
input fft_ready_i;
input [7:0] fft_addr_i;
input [13:0] fft_dor_i;
input [13:0] fft_doi_i;
output reg fft_en_o;
output reg fft_start_o;
output reg [3:0] fft_shift_o;
output reg [9:0] fft_dr_o;
output reg [9:0] fft_di_o;
input m_ack_write_i;
input m_ack_read_i;
input [31:0] m_dat_i;
output reg m_write_o;
output reg m_read_o;
output reg [31:0] m_addr_o;
output reg [31:0] m_dat_o;
//===loacl fsm status defines========
//=core_status define
parameter s_idle = 8'b0000_0000;
parameter s_read_ram = 8'b0000_0001;
parameter s_write_buffer = 8'b0000_0010;
parameter s_write_ram = 8'b0000_0100;
parameter s_done = 8'b0000_1000;
reg [7:0] core_status = s_idle;
//=read_ram_status define
parameter s_read_ram_start = 8'b0000_0000;
parameter s_read_ram_wait_master_ack = 8'b0000_0001;
parameter s_read_ram_write_fft = 8'b0000_0010;
parameter s_read_ram_change_index = 8'b0000_0100;
reg [7:0] read_ram_status = s_read_ram_start;
//=write_buffer_status define
parameter s_write_buffer_wait_ready = 8'b0000_0000;
parameter s_write_buffer_write = 8'b0000_0001;
reg [7:0] write_buffer_status = s_write_buffer_wait_ready;
//=write_ram_status define
parameter s_write_ram_start = 8'b0000_0000;
parameter s_write_ram_wait_master_ack = 8'b0000_0001;
parameter s_write_ram_change_index = 8'b0000_0010;
reg [7:0] write_ram_status = s_write_ram_start;
//===loacl reg defines========
reg start_flag;
reg [15:0] read_index;
reg [15:0] buffer_index;
reg [7:0] write_index;
reg [31:0] result_buffer[255:0];
reg [31:0] dat_read;
//============local logic====
always @(posedge clk)//debug interface,return core status to slave
begin
s_status_o <= {core_status,read_ram_status,write_buffer_status,write_ram_status};
end
always @(posedge clk)//get config command from linux driver via slave interface
begin
if(rst)
begin
start_flag <= 1'b0;
end
else
begin
if(s_enable_i)
begin
start_flag <= s_config_i & 32'b1;
end
else
begin
start_flag <= 1'b0;
end
end
end
always @(posedge clk)//top core fsm
begin
if(rst & s_enable_i)
begin
task_reset();
end
else
begin
case (core_status)
s_idle:
begin
task_idle();
end
s_read_ram:
begin
task_read_ram();
end
s_write_buffer:
begin
task_write_buffer();//wait fft ready then write the result data to buffer
end
s_write_ram:
begin
task_write_ram();
end
s_done:
begin
task_done();
end
default:
begin
core_status <= s_idle;
end
endcase
end
end
//===reset task action===
task automatic task_reset;//reset initial
begin
core_status <= s_idle;//core status init
read_ram_status <= s_read_ram_start;//read ram status init
write_buffer_status <= s_write_buffer_wait_ready;//write buffer status init
write_ram_status <= s_write_ram_start;//write ram status init
read_index <= 16'h0;//index init
buffer_index <= 16'h0;
write_index <= 16'h0;
fft_en_o <= 1'b0;//fft init
fft_start_o <= 1'b0;
fft_dr_o <= 1'b0;
fft_di_o <= 1'b0;
fft_shift_o <= 4'b0;
m_write_o <= 1'b0;//master init
m_read_o <= 1'b0;
m_addr_o <= 32'b0;
m_dat_o <= 32'b0;
task_result_buffer_init();//buffer init
end
endtask //task_reset
//=====top tasks================================
task automatic task_idle;//wait start flag
begin
task_reset();//just for debug
if(start_flag)
begin
core_status <= s_read_ram;
end
else
begin
core_status <= s_idle;
end
end
endtask //task_idle
task automatic task_read_ram;//read data from ram then write it into fft
begin
if(fft_ready_i)//monitor ready signal
begin
core_status <= s_write_buffer;
write_buffer_status <= s_write_buffer_write;
end
else
begin
if(read_index < 16'd256)
begin
case (read_ram_status)
s_read_ram_start:
begin
m_addr_o <= {8'h98,6'b0,read_index,2'b0};
m_read_o <= 1'b1;//enable master read
fft_en_o <= 1'b0;//debug add
read_ram_status <= s_read_ram_wait_master_ack;
end
s_read_ram_wait_master_ack:
begin
m_read_o <= 1'b0;//clear master read enable
m_addr_o <= 32'b0;
if(16'b0 == read_index)
begin
fft_start_o <= 1'b1;//start fft
end
if(m_ack_read_i)
begin
dat_read <= m_dat_i;//sample data from master
read_ram_status <= s_read_ram_write_fft;//change status
end
else
begin
read_ram_status <= s_read_ram_wait_master_ack;//wait
end
end
s_read_ram_write_fft:
begin
//fft_en_o <= 1'b1;//debug disable
fft_en_o <=1'b1;
fft_dr_o <= dat_read[25:16];
fft_di_o <= dat_read[9:0];
read_ram_status <= s_read_ram_change_index;
end
s_read_ram_change_index:
begin
if(16'b0 == read_index)
begin
fft_start_o <= 1'b0;//clear start fft
end
fft_en_o <=1'b0;
//fft_en_o <= 1'b0;//disable fft//debug disable
read_index <= read_index + 1'b1;
read_ram_status <= s_read_ram_start;
end
default:
begin
read_ram_status <= s_read_ram_start;
end
endcase
end
else
begin
read_ram_status <= s_read_ram_start;
core_status <= s_write_buffer;
fft_en_o <=1'b1;
end
end
end
endtask //task_read_ram
task automatic task_write_buffer;//read fft output data & write it into buffer
begin
if(buffer_index < 16'd256)
begin
case (write_buffer_status)
s_write_buffer_wait_ready:
begin
if(fft_ready_i)
begin
write_buffer_status <= s_write_buffer_write;
end
else
begin
write_buffer_status <= s_write_buffer_wait_ready;
end
end
s_write_buffer_write:
begin
result_buffer[fft_addr_i] <= {2'b00,fft_dor_i,2'b00,fft_doi_i};
buffer_index <= buffer_index + 1'b1;
end
default:
begin
write_buffer_status <= s_write_buffer_wait_ready;
end
endcase
end
else
begin
write_buffer_status <= s_write_buffer_wait_ready;
core_status <= s_write_ram;
end
end
endtask //task_write_buffer
task automatic task_write_ram;//read data from fft to ram via master interface
begin
if(write_index < 16'd256)
begin
case (write_ram_status)
s_write_ram_start:
begin
m_dat_o <= result_buffer[write_index];
m_addr_o <= {8'h98,14'b1,write_index,2'b00};
m_write_o <= 1'b1;//enable master write
write_ram_status <= s_write_ram_wait_master_ack;
end
s_write_ram_wait_master_ack:
begin
m_dat_o <= 32'b0;//clear master write enable
m_write_o <= 1'b0;
m_addr_o <= 32'b0;
if(m_ack_write_i)
begin
write_ram_status <= s_write_ram_change_index;//change status
end
else
begin
write_ram_status <= s_write_ram_wait_master_ack;//wait
end
end
s_write_ram_change_index:
begin
write_index <= write_index + 1'b1;
write_ram_status <= s_write_ram_start;
end
default:
begin
write_ram_status <= s_write_ram_start;
end
endcase
end
else
begin
write_ram_status <= s_write_ram_start;
core_status <= s_done;
end
end
endtask //task_write_ram
task automatic task_done;//self loop untill reset
begin
core_status <= s_done;
end
endtask //task_done
//=================================top tasks end==================
//==1===sub tasks==reset==============================
task automatic task_result_buffer_init;
begin
result_buffer[0] = 32'h0;result_buffer[1] = 32'h0;result_buffer[2] = 32'h0;result_buffer[3] = 32'h0;
result_buffer[4] = 32'h0;result_buffer[5] = 32'h0;result_buffer[6] = 32'h0;result_buffer[7] = 32'h0;
end
endtask //task_result_buffer_init
endmodule
/************** EOF ****************/
3>mkg_core的仿真
4>mkg_wb_slave和mkg_core的联合仿真
6,FFT256模块的fsm设计,rtl编码,和仿真及和mkg_wb_slave,mkg_core的联合仿真
1>FFT256的fsm设计
可以从opencores的网站download或者从官网下载:http://unicore.co.ua/index.php?page=free-ips&hl=en
2>FFT256的rtl编码
http://unicore.co.ua/index.php?page=free-ips&hl=en3>FFT256的仿真
fft256的reset和start的timing:
fft256的readytiming:
4>FFT256和mkg_wb_slave,mkg_core的联合仿真
reset和start信号:
ready信号:
write_ram信号:
done信号:
7,mkg_wb_master模块的fsm设计,rtl编码,和仿真及和mkg_wb_slave,mkg_core的联合仿真
1>mkg_wb_master的fsm设计
与mkg_wb_slave类似。
2>mkg_wb_master的rtl编码
`timescale 1ns/1ps
module mkg_wb_master
(
wb_clk,
wb_rst,
wb_adr_o,
wb_dat_o,
wb_sel_o,
wb_we_o,
wb_cyc_o,
wb_stb_o,
wb_cti_o,
wb_bte_o,
wb_dat_i,
wb_ack_i,
wb_err_i,
wb_rty_i,
//internal signals
write_i,
read_i,
addr_i,
dat_i,
ack_write_o,
ack_read_o,
dat_o
);
//wishbone interface
input wb_clk;
input wb_rst;
input wb_ack_i;
input wb_err_i;
input wb_rty_i;
input [31:0] wb_dat_i;
output reg [31:0] wb_adr_o;
output reg [31:0] wb_dat_o;
output reg wb_cyc_o;
output reg wb_stb_o;
output reg [3:0] wb_sel_o;
output reg wb_we_o;
output reg [2:0] wb_cti_o;
output reg [1:0] wb_bte_o;
//internal signals
input write_i;
input read_i;
input [31:0] addr_i;
input [31:0] dat_i;
output reg ack_write_o;
output reg ack_read_o;
output reg [31:0] dat_o;
parameter Idle= 12'b0000_0000_0001;
//parameter R_Idle= 12'b0000_0000_0010;
parameter R_Ready= 12'b0000_0000_0100;
parameter R_Wait= 12'b0000_0000_1000;
parameter R_Done= 12'b0000_0001_0000;
//parameter W_Idle= 12'b0000_0010_0000;
parameter W_Ready= 12'b0000_0100_0000;
parameter W_Wait= 12'b0000_1000_0000;
parameter W_Done= 12'b0001_0000_0000;
reg [11:0] state,next_state;
always @(posedge wb_clk)
if(wb_rst)
begin
state<=Idle;
end
else
begin
state<=next_state;
end
always @(*)
begin
case(state)
Idle:
begin
if(write_i && ! read_i )
begin
next_state=W_Ready;
end
else if(!write_i && read_i )
begin
next_state=R_Ready;
end
else
next_state=Idle;
end
W_Ready:
begin
next_state=W_Wait;
end
W_Wait:
begin
if(wb_ack_i)
next_state=W_Done;
else
next_state=W_Wait;
end
W_Done:
begin
next_state=Idle;
end
R_Ready:
begin
next_state=R_Wait;
end
R_Wait:
begin
if(wb_ack_i)
next_state=R_Done;
else
next_state=R_Wait;
end
R_Done:
begin
next_state=Idle;
end
endcase
end
always @(posedge wb_clk)
if(wb_rst)
begin
wb_we_o<=0;
wb_cyc_o<=0;
wb_stb_o<=0;
wb_adr_o<=0;
wb_dat_o<=0;
wb_sel_o<=0;
wb_cti_o<=0;
wb_bte_o<=0;
ack_write_o<=0;
ack_read_o<=0;
dat_o<=0;
end
else
begin
case(next_state)
Idle:
begin
wb_we_o<=0;
wb_cyc_o<=0;
wb_stb_o<=0;
wb_adr_o<=0;
wb_dat_o<=0;
wb_sel_o<=0;
wb_cti_o<=0;
wb_bte_o<=0;
ack_write_o<=0;
ack_read_o<=0;
dat_o<=0;
end
W_Ready:
begin
wb_we_o<=1'b1;
wb_cyc_o<=1'b1;
wb_stb_o<=1'b1;
wb_adr_o<=addr_i;
wb_dat_o<=dat_i;
ack_write_o<=0;
ack_read_o<=0;
dat_o<=0;
end
W_Wait:
begin
wb_we_o<=wb_we_o;
wb_cyc_o<=wb_cyc_o;
wb_stb_o<=wb_stb_o;
wb_adr_o<=wb_adr_o;
wb_dat_o<=wb_dat_o;
ack_write_o<=0;
ack_read_o<=0;
dat_o<=0;
end
W_Done:
begin
wb_we_o<=0;
wb_cyc_o<=0;
wb_stb_o<=0;
wb_adr_o<=0;
wb_dat_o<=0;
ack_write_o<=1'b1;
ack_read_o<=0;
dat_o<=0;
end
R_Ready:
begin
wb_we_o<=0;
wb_cyc_o<=1'b1;
wb_stb_o<=1'b1;
wb_adr_o<=addr_i;
wb_dat_o<=0;
ack_write_o<=0;
ack_read_o<=0;
dat_o<=0;
end
R_Wait:
begin
wb_we_o<=wb_we_o;
wb_cyc_o<=wb_cyc_o;
wb_stb_o<=wb_stb_o;
wb_adr_o<=wb_adr_o;
wb_dat_o<=wb_dat_o;
ack_write_o<=0;
ack_read_o<=0;
dat_o<=0;
end
R_Done:
begin
wb_we_o<=0;
wb_cyc_o<=0;
wb_stb_o<=0;
wb_adr_o<=0;
wb_dat_o<=0;
ack_write_o<=0;
ack_read_o<=1'b1;
dat_o<=wb_dat_i;
end
default:
begin
wb_we_o<=0;
wb_cyc_o<=0;
wb_stb_o<=0;
wb_adr_o<=0;
wb_dat_o<=0;
wb_sel_o<=0;
wb_cti_o<=0;
wb_bte_o<=0;
ack_write_o<=0;
ack_read_o<=0;
dat_o<=0;
end
endcase
end
endmodule
/************** EOF ****************/
3>mkg_wb_master的仿真
wishbone信号,与slave类似。
8,模块的整体仿真和综合
1>叶子模块的封装
mkg_top模块:见下图:
mkg_test模块:
见下图:
2>整体仿真架构
3>综合
mkg_test模块:
mkg_top模块:
9,linux下的driver的设计和编码
虽然driver不是硬件人员的工作,但是rtl的设计者对driver的重要程度是毋庸置疑的。做好人要做到底。
操作过程请参考:
http://blog.csdn.net/rill_zhen/article/details/8700937
现在把code list如下:
ip_mkg.c:
/*
*
* rill mkg driver
*
*/
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <asm/uaccess.h> /* get_user and put_user */
//#include <linux/clk.h>
//#include <linux/ioport.h>
#include <asm/io.h> /*ioremap*/
#include <linux/platform_device.h> /*cleanup_module*/
#include <linux/delay.h>
#include <asm-generic/io.h>
#include "ip_mkg.h"
void __iomem *g_mkg_mem_base = NULL;
void __iomem *g_mkg_core_base = NULL;
static int device_open(struct inode *inode, struct file *file)
{
g_mkg_mem_base = ioremap(MKG_MEM_BASE,MKG_MEM_LEN);
g_mkg_core_base = ioremap (MKG_CORE_BASE, MKG_CORE_LEN);
if(NULL == g_mkg_mem_base)
{
printk(KERN_ERR "mkg mem open ioremap error!\n");
return -1;
}
else
{
printk("mkg mem ioremap addr:%d!\n",(int)g_mkg_mem_base);
}
if(NULL == g_mkg_core_base)
{
printk(KERN_ERR "mkg core open ioremap error!\n");
return -1;
}
else
{
printk("mkg core ioremap addr:%d!\n",(int)g_mkg_core_base);
}
return 0;
}
static int device_release(struct inode *inode, struct file *file)
{
return 0;
}
static ssize_t device_read(struct file *filp, char *buffer, size_t length, loff_t *offset)
{
/*int ret_val = 0;
char * data = NULL;
data = (char*)kmalloc(4, GFP_KERNEL);
if((ret_val = copy_from_user(new_regs, (struct reg_data*)ioctl_param, sizeof(struct reg_data))) != 0)
ioread32(g_mkg_mem_base+length);
printk("============read:%d\n",);*/
return 1;
}
static ssize_t device_write(struct file *filp, const char *buffer, size_t count, loff_t *offset)
{
//iowrite32(2,g_mkg_mem_base);
return 1;
}
long device_ioctl(struct file *file, unsigned int ioctl_num, unsigned long ioctl_param)
{
#if 0
int ret_val = 0;
unsigned int ret = 0;
struct reg_data *new_regs;
printk("ioctl======\n");
switch(ioctl_num)
{
case IOCTL_REG_SET:
{
new_regs = (struct reg_data*)kmalloc(sizeof(struct reg_data), GFP_KERNEL);
if((ret_val = copy_from_user(new_regs, (struct reg_data*)ioctl_param, sizeof(struct reg_data))) != 0)
{
kfree(new_regs);
printk(KERN_ERR " error copy line_datafrom user.\n");
return -1;
}
//iowrite16(new_regs->value,g_mkg_mem_base+new_regs->addr);
kfree(new_regs);
}
break;
case IOCTL_REG_GET:
{
new_regs = (struct reg_data*)kmalloc(sizeof(struct reg_data), GFP_KERNEL);
if((ret_val = copy_from_user(new_regs, (struct reg_data*)ioctl_param, sizeof(struct reg_data))) != 0)
{
kfree(new_regs);
printk(KERN_ERR " error copy line_datafrom user.\n");
return -1;
}
//ret = ioread16(g_mkg_mem_base+new_regs->addr);
kfree(new_regs);
return ret;
}
break;
}
#endif
return -1;
}
struct file_operations our_file_ops = {
.unlocked_ioctl = device_ioctl,
.read = device_read,
.write = device_write,
.open = device_open,
.release = device_release,
.owner = THIS_MODULE,
};
void test(void)
{
int write_data[256]={
0x69016901, 0x8b014401, 0xa7011a01, 0xbd01ee00, 0xcd01bf00, 0xd7018f00, 0xdb015e00, 0xd8012e00, 0xcf010000,
0xc101d403, 0xad01ab03, 0x94018603, 0x78016503, 0x58014803, 0x35013203, 0x11012003, 0xec001403, 0xc7000e03,
0xa3000c03, 0x80001003, 0x5f001903, 0x42002503, 0x28003603, 0x12004803, 0x00005e03, 0xf3037403, 0xe9038b03,
0xe403a303, 0xe303b903, 0xe603ce03, 0xec03e203, 0xf503f203, 0x00000000, 0x0d000a00, 0x1c001200, 0x2b001700,
0x3a001800, 0x49001600, 0x56001100, 0x62000900, 0x6c000000, 0x7400f503, 0x7900e803, 0x7c00db03, 0x7b00cd03,
0x7800c003, 0x7300b303, 0x6b00a803, 0x61009f03, 0x56009703, 0x4a009203, 0x3c008f03, 0x2f008e03, 0x21009003,
0x15009503, 0x09009c03, 0x0000a403, 0xf803af03, 0xf203ba03, 0xef03c603, 0xee03d303, 0xef03e003, 0xf303ec03,
0xf903f703, 0x00000000, 0x09000700, 0x14000d00, 0x20001100, 0x2d001200, 0x3a001100, 0x46000e00, 0x52000800,
0x5c000000, 0x6400f703, 0x6b00eb03, 0x7000df03, 0x7200d103, 0x7100c403, 0x6e00b603, 0x6900aa03, 0x61009f03,
0x58009503, 0x4d008d03, 0x40008803, 0x33008503, 0x25008403, 0x18008703, 0x0b008c03, 0x00009403, 0xf7039e03,
0xef03aa03, 0xea03b703, 0xe803c603, 0xe903d503, 0xee03e403, 0xf603f303, 0x00000000, 0x0e000b00, 0x1e001400,
0x32001a00, 0x47001d00, 0x5d001c00, 0x75001700, 0x8c000d00, 0xa2000000, 0xb800ee03, 0xcb00d803, 0xdb00be03,
0xe700a103, 0xf0008003, 0xf4005d03, 0xf2003903, 0xec001403, 0xe000ef02, 0xcf00cb02, 0xb800a802, 0x9b008802,
0x7a006c02, 0x55005302, 0x2c003f02, 0x00003102, 0xd2032802, 0xa2032502, 0x71032902, 0x41033302, 0x12034302,
0xe6025902, 0xbc027502, 0x97029702, 0x7502bc02, 0x5902e602, 0x43021203, 0x33024103, 0x29027103, 0x2502a203,
0x2802d203, 0x31020000, 0x3f022c00, 0x53025500, 0x6c027a00, 0x88029b00, 0xa802b800, 0xcb02cf00, 0xef02e000,
0x1403ec00, 0x3903f200, 0x5d03f400, 0x8003f000, 0xa103e700, 0xbe03db00, 0xd803cb00, 0xee03b800, 0x0000a200,
0x0d008c00, 0x17007500, 0x1c005d00, 0x1d004700, 0x1a003200, 0x14001e00, 0x0b000e00, 0x00000000, 0xf303f603,
0xe403ee03, 0xd503e903, 0xc603e803, 0xb703ea03, 0xaa03ef03, 0x9e03f703, 0x94030000, 0x8c030b00, 0x87031800,
0x84032500, 0x85033300, 0x88034000, 0x8d034d00, 0x95035800, 0x9f036100, 0xaa036900, 0xb6036e00, 0xc4037100,
0xd1037200, 0xdf037000, 0xeb036b00, 0xf7036400, 0x00005c00, 0x08005200, 0x0e004600, 0x11003a00, 0x12002d00,
0x11002000, 0x0d001400, 0x07000900, 0x00000000, 0xf703f903, 0xec03f303, 0xe003ef03, 0xd303ee03, 0xc603ef03,
0xba03f203, 0xaf03f803, 0xa4030000, 0x9c030900, 0x95031500, 0x90032100, 0x8e032f00, 0x8f033c00, 0x92034a00,
0x97035600, 0x9f036100, 0xa8036b00, 0xb3037300, 0xc0037800, 0xcd037b00, 0xdb037c00, 0xe8037900, 0xf5037400,
0x00006c00, 0x09006200, 0x11005600, 0x16004900, 0x18003a00, 0x17002b00, 0x12001c00, 0x0a000d00, 0x00000000,
0xf203f503, 0xe203ec03, 0xce03e603, 0xb903e303, 0xa303e403, 0x8b03e903, 0x7403f303, 0x5e030000, 0x48031200,
0x36032800, 0x25034200, 0x19035f00, 0x10038000, 0x0c03a300, 0x0e03c700, 0x1403ec00, 0x20031101, 0x32033501,
0x48035801, 0x65037801, 0x86039401, 0xab03ad01, 0xd403c101, 0x0000cf01, 0x2e00d801, 0x5e00db01, 0x8f00d701,
0xbf00cd01, 0xee00bd01, 0x1a01a701, 0x44018b01
};
int read_rslt[256];
int loop1= 0;
int loop2= 0;
int loop3= 0;
int loop4= 0;
int loop5= 0;
int temp= 0;
printk("<----ip_mkg test start---->\n");
for(loop1=0;loop1<256;loop1++)
read_rslt[loop1]=0x98766789;
printk("<----the initialization of result --->\n");
for(loop2=0;loop2<256;loop2++)
{
iowrite32(write_data[loop2],g_mkg_mem_base+(loop2*4));
}
printk("<----write orignal data --->\n");
iowrite32(0x01000000,g_mkg_core_base+0x4);
printk("<---write control data --->\n");
//printk("<----write control data: 0x01000000 end value:0x%x ---->\n",ioread32(g_mkg_core_base+4));
// iowrite32(0x03000000,g_mkg_core_base+0x8);
// printk("<----write control data: 0x03000000 end value:0x%x---->\n",ioread32(g_mkg_core_base+8));
// mdelay(100);
// mdelay(100);
// mdelay(100);
// mdelay(100);
// printk("<----delay ends ---->\n");
// temp=ioread32(g_mkg_core_base);
// printk("<-------my core status:0x%x--->\n",temp);
/*
while(1)
{
temp=ioread32(g_mkg_mem_base+0x804);
if(temp==0x10101010)
break;
printk("<-------my core status:0x%x--->\n",temp);
mdelay(1);
}
printk("<----waiting ends ---->\n");
*/
mdelay(100);
printk("<----delay ends ---->\n");
for(loop3=0;loop3<256;loop3++)
{
read_rslt[loop3]=ioread32(g_mkg_mem_base+0x00000400+(loop3*4));
}
printk("<----read rslt from ram ---->\n");
temp=ioread32(g_mkg_mem_base+0x00000800);
printk("<-------my clock cnt:0x%x--->\n",temp);
for(loop4=0;loop4<256;loop4++)
{
printk("====mem read addr==0x%x==mem value:0x%x==\n",loop4,read_rslt[loop4]);
}
/*
for(loop2=0;loop2<256;loop2++)
{
temp=ioread32(temp_addr);
printk("====mem read addr==0x%x==mem value:0x%x==\n",temp_addr,temp);
temp_addr=temp_addr+4;
}
udelay(1000);
printk("<----ip_mkg read initial value ends---->\n");
temp_addr=g_mkg_mem_base;
for(loop=0;loop<256;loop++)
{
iowrite32(loop,temp_addr);
printk("====mem write addr==0x%x==mem value:0x%x==\n",temp_addr,loop);
temp_addr=temp_addr+4;
}
udelay(1000);
printk("<----ip_mkg write end---->\n\n\n");
temp_addr=g_mkg_mem_base;
for(loop1=0;loop1<256;loop1++)
{
temp=ioread32(temp_addr);
printk("====mem read addr==0x%x==mem value:0x%x==\n",temp_addr,temp);
temp_addr=temp_addr+4;
}
printk("<----ip_mkg test end---->\n");
#endif
int loop = 0;
unsigned int phy_addr1 = 0;
unsigned int phy_addr2 = 0;
int * virtual_addr1 = NULL;
int * virtual_addr2 = NULL;
printk("<----ip_mkg test start---->\n");
//=====ip_mkg reg test========================================================
#if 1
printk("reg test start==\n");
iowrite32(0x11223344,g_mkg_mem_base);
iowrite32(0x00000097,g_mkg_mem_base+0x10);
iowrite32(0x03000000,g_mkg_mem_base+4);
printk("reg test start1==\n");
printk("reg test start2==\n");
printk("reg test start3==\n");
for(loop=0;loop<7;loop++)
printk("====reg addr==0x%x==reg value:0x%x==\n",loop*4,ioread32(g_mkg_mem_base+4*loop));
#endif
//=========================================================================
//============mem write test
#if 0
printk("mem write test start==\n");
iowrite32(0x97000004,g_mkg_mem_base);
iowrite32(0x2,g_mkg_mem_base+0xc);
printk("======reg:c value:0x%x==\n",ioread32(g_mkg_mem_base+0xc));
printk("======reg:14 value:0x%x==\n",ioread32(g_mkg_mem_base+0x14));
printk("======reg:18 value:0x%x==\n",ioread32(g_mkg_mem_base+0x18));
printk("======reg:1c value:0x%x==\n",ioread32(g_mkg_mem_base+0x1c));
printk("======reg:20 value:0x%x==\n",ioread32(g_mkg_mem_base+0x20));
printk("======reg:24 value:0x%x==\n",ioread32(g_mkg_mem_base+0x24));
for(loop = 0;loop<10;loop++)
printk("wait=write=\n");
printk("wait=write=\n");
iowrite32(0x1,g_mkg_mem_base+0xc);
printk("======reg:c value:0x%x==\n",ioread32(g_mkg_mem_base+0xc));
for(loop = 0;loop<10;loop++)
printk("wait=read=\n");
printk("wait=read=\n");
printk("======reg:10 value:0x%x==\n",ioread32(g_mkg_mem_base+0x10));
printk("======reg:c value:0x%x==\n\n",ioread32(g_mkg_mem_base+0xc));
#endif
//============mem read test
#if 0
printk("mem read test start==\n");
virtual_addr1 = (int *)kmalloc(sizeof(int), GFP_KERNEL);
virtual_addr2 = (int *)kmalloc(sizeof(int), GFP_KERNEL);
*virtual_addr1 = 0x55;
*virtual_addr2 = 0x66;
phy_addr1 = virt_to_phys(virtual_addr1);
phy_addr2 = virt_to_phys(virtual_addr2);
printk("virtual addr1:0x%x==phy addr1:0x%x==\n",(int)virtual_addr1,phy_addr1);
printk("virtual addr2:0x%x==phy addr2:0x%x==\n",(int)virtual_addr2,phy_addr2);
iowrite32(phy_addr1,g_mkg_mem_base);
iowrite32(0x1,g_mkg_mem_base+0xc);
printk("wait=read=\n");
printk("======reg:0 value:0x%x==\n",ioread32(g_mkg_mem_base));
printk("======reg:c value:0x%x==\n",ioread32(g_mkg_mem_base+0xc));
printk("====phy addr1==0x%x==ram value:0x%x==\n",phy_addr1,ioread32(g_mkg_mem_base+0x10));
printk("======reg:c value:0x%x==\n\n",ioread32(g_mkg_mem_base+0xc));
iowrite32(phy_addr2,g_mkg_mem_base);
iowrite32(0x1,g_mkg_mem_base+0xc);
printk("wait=2=\n");
printk("======reg:0 value:0x%x==\n",ioread32(g_mkg_mem_base));
printk("======reg:c value:0x%x==\n",ioread32(g_mkg_mem_base+0xc));
printk("====phy addr2==0x%x==ram value:0x%x==\n",phy_addr2,ioread32(g_mkg_mem_base+0x10));
printk("======reg:c value:0x%x==\n\n",ioread32(g_mkg_mem_base+0xc));
kfree(virtual_addr1);
kfree(virtual_addr2);
#endif
printk("<----ip_mkg test end---->\n"); */
}
int init_module()
{
int ret_val;
int ret;
int ret2;
void __iomem *ret_from_request;
void __iomem *ret_from_request2;
//=== Allocate character device
ret_val = register_chrdev(MAJOR_NUM, DEVICE_NAME, &our_file_ops);
if (ret_val < 0)
{
printk(KERN_ALERT " device %s failed(%d)\n", DEVICE_NAME, ret_val);
return ret_val;
}
ret = check_mem_region(MKG_MEM_BASE, MKG_MEM_LEN);
if (ret < 0)
{
printk(KERN_ERR "mkg check_mem_region bussy error!\n");
return -1;
}
ret_from_request = request_mem_region(MKG_MEM_BASE, MKG_MEM_LEN, "ip_mkg");
ret2 = check_mem_region(MKG_CORE_BASE, MKG_CORE_LEN);
if (ret2 < 0)
{
printk(KERN_ERR "mkg check_mem_region bussy error!\n");
return -1;
}
ret_from_request2 = request_mem_region(MKG_CORE_BASE, MKG_CORE_LEN, "ip_mkg");
//===ioremap mkg registers
g_mkg_mem_base = ioremap(MKG_MEM_BASE,MKG_MEM_LEN);
if(NULL == g_mkg_mem_base)
{
printk(KERN_ERR "mkg mem ioremap error!\n");
return -1;
}
else
{
;//printk("mkg ioremap addr:%d!\n",(unsigned int)g_mkg_mem_base);
}
g_mkg_core_base = ioremap(MKG_CORE_BASE,MKG_CORE_LEN);
if(NULL == g_mkg_core_base)
{
printk(KERN_ERR "mkg core ioremap error!\n");
return -1;
}
else
{
;//printk("mkg ioremap addr:%d!\n",(unsigned int)g_mkg_mem_base);
}
printk("mkg module init done!\n");
test();
return 0;
}
void cleanup_module()
{
release_mem_region(MKG_MEM_BASE, MKG_MEM_LEN);
release_mem_region(MKG_CORE_BASE,MKG_CORE_LEN);
unregister_chrdev(MAJOR_NUM, DEVICE_NAME);
}
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Rill zhen:rill_zhen@126.com");
ip_mkg.h
#ifndef __IP_MKG_H__
#define __IP_MKG_H__
#define MAJOR_NUM 102
#define DEVICE_NAME "ip_mkg"
#define MKG_MEM_BASE 0x98000000
#define MKG_MEM_LEN 3072
#define MKG_CORE_BASE 0x97000000
#define MKG_CORE_LEN 64
#define IOCTL_REG_SET 0
#define IOCTL_REG_GET 1
struct reg_data
{
unsigned short addr;
int value;
};
#endif
10,FPGA验证
前面只是仿真,要想让它work,还要挂到arbiter_dbus上才行。
请参考:
http://blog.csdn.net/rill_zhen/article/details/8722664
和
http://blog.csdn.net/rill_zhen/article/details/8558463
我将可以work的整个工程也传上来了:
要想让一个工程能够work,道理很简单,但是有很多细节需要做,上面的内容是在仿真之后写的,
在做了很多工作后做才能在板子上work,有些地方与上面的内容稍微有些改动。
这个工程经过了用modelsim的仿真,在xilinx ZYNQ板子上用chipscope的仿真,经过ORPSoC的altera 的FPGA的板级验证。
主要做了如下优化:
1,将内部的mem,改为调用altera的库,减少le的使用量。
2,修改arbiter_dbus的仲裁策略为轮转。
3,对mkg_core模块进行了优化。
由于上传文件大小有限,我分成了两部分:
工程的第一部分:
http://download.csdn.net/detail/rill_zhen/5435013
工程的第二部分:
http://download.csdn.net/detail/rill_zhen/5435107
此外还有对应的linux的driver:
http://download.csdn.net/detail/rill_zhen/5435175
验证如下图:
可以与之前仿真时的fft计算结果对比,看到和仿真的结果相同。
注意:此次驱动读取计算结果采用的是延时/轮询的形式,如果采用中断方式,请参考:
http://blog.csdn.net/rill_zhen/article/details/8894856
11,硬件与软件的比较
前面介绍的都是硬件实现FFT运算,如果将这个IP作为一个硬件加速器来用的话,它和软件实现的加速效果如何呢?下面我们就做一个比较。
1>基本思想
同样的计算量,分别统计硬件用时和软件用时,加速比=软件用时 / 硬件用时
2>实现算法
1》硬件:在IP core内部增加一个counter,从reset开始到done结束,记录cycle数量,保存在一个寄存器中,驱动读取这个寄存器的值,并打印出来,然后根据时钟频率(50MHz),计算出硬件用时。
2》软件:两次调用gettimeofday()函数,计算差值,即软件用时。
3》具体操作步骤,请参考:
http://blog.csdn.net/rill_zhen/article/details/8700937
3>结果
1》硬件:“<-------my clock cnt:0x94740200--->”,需要注意的是这个数和硬件内部的大小端不一致,需要转换一下,即正确的数值是0x00027494,十进制是160916,硬件用时(ms)=(160916/50M)*1000=3.218 ms,手动10次运行取平均值为3.12 ms.
2》软件:"Used Time:144.6",运行100次,取平均值,即软件用时为144.6 ms.
3》对比:如下图
4>具体代码
1》硬件部分,
mkg_ram_wb.v:
module mkg_ram_wb
(
wb_clk,
wb_rst,
wb_dat_i,
wb_adr_i,
wb_sel_i,
wb_cti_i,
wb_bte_i,
wb_we_i,
wb_cyc_i,
wb_stb_i,
wb_dat_o,
wb_ack_o,
wb_err_o,
wb_rty_o,
data_address,
data_out,
data_rden,
data_wren,
data_q,
rslt_address,
rslt_out,
rslt_rden,
rslt_wren,
rslt_q
);
input wb_clk;
input wb_rst;
input [31:0] wb_adr_i;
input wb_stb_i;
input wb_cyc_i;
input [2:0] wb_cti_i;
input [1:0] wb_bte_i;
input [31:0] wb_dat_i;
input [3:0] wb_sel_i;
input wb_we_i;
output reg [31:0] wb_dat_o;
output reg wb_ack_o;
output reg wb_err_o;
output reg wb_rty_o;
output reg [7:0] data_address;
output reg [31:0] data_out;
output reg data_rden;
output reg data_wren;
input [31:0] data_q;
output reg [7:0] rslt_address;
output reg [31:0] rslt_out;
output reg rslt_rden;
output reg rslt_wren;
input [31:0] rslt_q;
parameter my_ram_adr=8'h98;
parameter data_adr_start=32'h9800_0000;
parameter data_adr_end=32'h9800_03fc;
parameter rslt_adr_start=32'h9800_0400;
parameter rslt_adr_end=32'h9800_07fc;
parameter error_code=32'habcd_dcba;
parameter Numb=256;
parameter cnt_adr=32'h9800_0800;
parameter status_adr=32'h9800_0804;
parameter Idle= 5'b00000;
parameter Read_Data= 5'b00001;
parameter Read_Rslt= 5'b00010;
parameter Write_Data= 5'b00011;
parameter Write_Rslt= 5'b00100;
parameter Read_Data_Pause1= 5'b00101;
parameter Read_Data_Pause2= 5'b00110;
parameter Write_Data_Pause= 5'b00111;
parameter Write_Rslt_Pause= 5'b01000;
parameter Read_Rslt_Pause1= 5'b01001;
parameter Read_Rslt_Pause2= 5'b01010;
parameter Read_Data_Pause3= 5'b01011;
parameter Read_Rslt_Pause3= 5'b01100;
parameter Read_Cnt= 5'b01101;
parameter Read_Status= 5'b01110;
parameter Read_Cnt_Pause= 5'b01111;
parameter Read_Cnt_Done= 5'b10000;
/*
parameter Idle= 5'b00000;
parameter Read_Data= 5'b00001;
parameter Read_Rslt= 5'b00010;
parameter Write_Data= 5'b00011;
parameter Write_Rslt= 5'b00100;
parameter Read_Data_Pause1= 5'b00101;
parameter Read_Data_Pause2= 5'b00110;
parameter Write_Data_Pause= 5'b00111;
parameter Write_Rslt_Pause= 5'b01000;
parameter Read_Rslt_Pause1= 5'b01001;
parameter Read_Rslt_Pause2= 5'b01010;
parameter Read_Data_Pause3= 5'b01011;
parameter Read_Rslt_Pause3= 5'b01100;
parameter Read_Cnt= 5'b01101;
parameter Read_Status= 5'b01110;
parameter Read_Cnt_Pause1= 5'b01111;
parameter Read_Cnt_Pause2= 5'b10000;
parameter Read_Cnt_Pause3= 5'b10001;
parameter Idle= 18'b000000000000000001;
parameter Read_Data= 18'b000000000000000010;
parameter Read_Rslt= 18'b000000000000000100;
parameter Write_Data= 18'b000000000000001000;
parameter Write_Rslt= 18'b000000000000010000;
parameter Read_Data_Pause1= 18'b000000000000100000;
parameter Read_Data_Pause2= 18'b000000000001000000;
parameter Write_Data_Pause= 18'b000000000010000000;
parameter Write_Rslt_Pause= 18'b000000000100000000;
parameter Read_Rslt_Pause1= 18'b000000001000000000;
parameter Read_Rslt_Pause2= 18'b000000010000000000;
parameter Read_Data_Pause3= 18'b000000100000000000;
parameter Read_Rslt_Pause3= 18'b000001000000000000;
parameter Read_Cnt= 18'b000010000000000000;
parameter Read_Status= 18'b000100000000000000;
parameter Read_Cnt_Pause1= 18'b001000000000000000;
parameter Read_Cnt_Pause2= 18'b010000000000000000;
parameter Read_Cnt_Pause3= 18'b100000000000000000;
*/
parameter c_idle = 2'b00;
parameter c_cnt = 2'b01;
parameter c_done = 2'b10;
reg [4:0] state,next_state;
reg [1:0] c_state,c_next_state;
reg [31:0] cnt;
reg [31:0]status;
reg [1:0] pause_cnt;
always @(posedge wb_clk)
if(wb_rst)
c_state<=c_idle;
else
c_state<=c_next_state;
always @(*)
begin
case(c_state)
c_idle:
if(wb_stb_i && wb_cyc_i && wb_we_i && wb_adr_i == data_adr_start )
c_next_state = c_cnt;
else
c_next_state = c_idle;
c_cnt:
if(wb_stb_i && wb_cyc_i && wb_we_i && wb_adr_i == rslt_adr_end )
c_next_state = c_done;
else
c_next_state = c_cnt;
c_done:
c_next_state = c_done;
endcase
end
always @(posedge wb_clk)
if(wb_rst)
begin
cnt<=0;
status<=0;
end
else
case(c_next_state)
c_idle:
begin
cnt<=0;
status<=0;
end
c_cnt:
begin
cnt<=cnt+1;
status<=0;
end
c_done:
begin
cnt<=cnt;
status<=32'h0101_0101;
end
endcase
/*
reg [31:0] data_q_r,rslt_q_r;
always @(posedge wb_clk)
begin
if(wb_rst)
begin
data_q_r<=0;
end
else
begin
data_q_r<=data_q;
end
end
always @ (posedge wb_clk)
begin
if(wb_rst)
begin
rslt_q_r<=0;
end
else
begin
rslt_q_r<=rslt_q;
end
end
*/
always @(posedge wb_clk)
begin
if(wb_rst)
begin
state<=Idle;
end
else
begin
state<=next_state;
end
end
always @(*)
begin
case(state)
Idle: begin
if(wb_stb_i && wb_cyc_i && !wb_we_i && wb_adr_i >=data_adr_start && wb_adr_i<=data_adr_end)
begin
next_state=Read_Data;
end
else if(wb_stb_i && wb_cyc_i && !wb_we_i && wb_adr_i >=rslt_adr_start && wb_adr_i<=rslt_adr_end)
begin
next_state=Read_Rslt;
end
else if(wb_stb_i && wb_cyc_i && !wb_we_i && wb_adr_i >= cnt_adr)
begin
next_state=Read_Cnt;
end
else if(wb_stb_i && wb_cyc_i && !wb_we_i && wb_adr_i == status_adr)
begin
next_state=Read_Status;
end
else if(wb_stb_i && wb_cyc_i && wb_we_i && wb_adr_i >=data_adr_start && wb_adr_i<=data_adr_end)
begin
next_state=Write_Data;
end
else if(wb_stb_i && wb_cyc_i && wb_we_i && wb_adr_i >=rslt_adr_start && wb_adr_i<=rslt_adr_end)
begin
next_state=Write_Rslt;
end
else
begin
next_state=Idle;
end
end
Write_Data:begin
next_state=Write_Data_Pause;
end
Write_Rslt:begin
next_state=Write_Rslt_Pause;
end
Read_Data: begin
next_state=Read_Data_Pause1;
end
Read_Rslt: begin
next_state=Read_Rslt_Pause1;
end
Read_Data_Pause1:
begin
next_state=Read_Data_Pause2;
end
Read_Data_Pause2:begin
next_state=Read_Data_Pause3;
end
Read_Rslt_Pause1:
begin
next_state=Read_Rslt_Pause2;
end
Read_Rslt_Pause2:
begin
next_state=Read_Rslt_Pause3;
end
Read_Data_Pause3:
begin
next_state=Idle;
end
Read_Rslt_Pause3:
begin
next_state=Idle;
end
Write_Data_Pause:
begin
next_state=Idle;
end
Write_Rslt_Pause:
begin
next_state=Idle;
end
Read_Cnt:
begin
next_state=Read_Cnt_Pause;
end
Read_Cnt_Pause:
begin
if(pause_cnt<2'b11)
next_state=Read_Cnt_Pause;
else
next_state=Read_Cnt_Done;
end
Read_Cnt_Done:
begin
next_state=Idle;
end
Read_Status:
begin
next_state=Idle;
end
default: begin
next_state=Idle;
end
endcase
end
always @(posedge wb_clk)
begin
if(wb_rst)
begin
wb_dat_o<=0;
wb_ack_o<=0;
wb_err_o<=0;
wb_rty_o<=0;
data_address <=0;
data_out <=0;
data_rden <=0;
data_wren <=0;
rslt_address <=0;
rslt_out <=0;
rslt_rden <=0;
rslt_wren <=0;
pause_cnt<=0;
end
else
begin
case(next_state)
Idle:
begin
wb_dat_o<=0;
wb_ack_o<=0;
wb_err_o<=0;
wb_rty_o<=0;
data_address <=0;
data_out <=0;
data_rden <=0;
data_wren <=0;
rslt_address <=0;
rslt_out <=0;
rslt_rden <=0;
rslt_wren <=0;
end
Write_Data:
begin
data_address<=wb_adr_i[9:2];
data_out<=wb_dat_i;
data_wren<=1'b1;
data_rden <=0;
wb_dat_o<=0;
wb_ack_o<=0;
wb_err_o<=0;
wb_rty_o<=0;
rslt_address <=0;
rslt_out <=0;
rslt_rden <=0;
rslt_wren <=0;
end
Write_Rslt:
begin
rslt_address<=wb_adr_i[9:2];
rslt_out <=wb_dat_i;
rslt_wren <=1'b1;
rslt_rden <=0;
wb_dat_o<=0;
wb_ack_o<=0;
wb_err_o<=0;
wb_rty_o<=0;
data_address <=0;
data_out <=0;
data_rden <=0;
data_wren <=0;
end
Read_Data:
begin
data_address<=wb_adr_i[9:2];
data_out <=0;
data_rden <=1'b1;
data_wren <=0;
wb_dat_o<=0;
wb_ack_o<=0;
wb_err_o<=0;
wb_rty_o<=0;
rslt_address <=0;
rslt_out <=0;
rslt_rden <=0;
rslt_wren <=0;
end
Read_Rslt:
begin
rslt_address<=wb_adr_i[9:2];
rslt_out <=0;
rslt_rden <=1'b1;
rslt_wren <=0;
wb_dat_o<=0;
wb_ack_o<=0;
wb_err_o<=0;
wb_rty_o<=0;
data_address <=0;
data_out <=0;
data_rden <=0;
data_wren <=0;
end
Read_Data_Pause1:
begin
wb_dat_o<=0;
wb_ack_o<=0;
wb_err_o<=0;
wb_rty_o<=0;
data_address <=0;
data_out <=0;
data_rden <=0;
data_wren <=0;
rslt_address <=0;
rslt_out <=0;
rslt_rden <=0;
rslt_wren <=0;
end
Read_Data_Pause2:
begin
wb_dat_o<=0;
wb_ack_o<=0;
wb_err_o<=0;
wb_rty_o<=0;
data_address <=0;
data_out <=0;
data_rden <=0;
data_wren <=0;
rslt_address <=0;
rslt_out <=0;
rslt_rden <=0;
rslt_wren <=0;
end
Read_Data_Pause3:
begin
wb_err_o<=0;
wb_rty_o<=0;
wb_ack_o<=1'b1;
wb_dat_o<=data_q;
// wb_dat_o<=data_q_r+32'h0000_0010;
// wb_dat_o<=data_q+32'h0000_0010;
data_address <=0;
data_out <=0;
data_rden <=0;
data_wren <=0;
rslt_address <=0;
rslt_out <=0;
rslt_rden <=0;
rslt_wren <=0;
end
Read_Rslt_Pause1:
begin
wb_dat_o<=0;
wb_ack_o<=0;
wb_err_o<=0;
wb_rty_o<=0;
data_address <=0;
data_out <=0;
data_rden <=0;
data_wren <=0;
rslt_address <=0;
rslt_out <=0;
rslt_rden <=0;
rslt_wren <=0;
end
Read_Rslt_Pause2:
begin
wb_dat_o<=0;
wb_ack_o<=0;
wb_err_o<=0;
wb_rty_o<=0;
data_address <=0;
data_out <=0;
data_rden <=0;
data_wren <=0;
rslt_address <=0;
rslt_out <=0;
rslt_rden <=0;
rslt_wren <=0;
end
Read_Rslt_Pause3:
begin
wb_err_o<=0;
wb_rty_o<=0;
wb_ack_o<=1'b1;
wb_dat_o<=rslt_q;
data_address <=0;
data_out <=0;
data_rden <=0;
data_wren <=0;
rslt_address <=0;
rslt_out <=0;
rslt_rden <=0;
rslt_wren <=0;
end
Write_Data_Pause:
begin
wb_ack_o <=1'b1;
wb_dat_o <=error_code;
wb_err_o <=0;
wb_rty_o <=0;
data_address <=0;
data_out <=0;
data_rden <=0;
data_wren <=0;
rslt_address <=0;
rslt_out <=0;
rslt_rden <=0;
rslt_wren <=0;
end
Write_Rslt_Pause:
begin
wb_ack_o <=1'b1;
wb_dat_o <=error_code;
wb_err_o <=0;
wb_rty_o <=0;
data_address <=0;
data_out <=0;
data_rden <=0;
data_wren <=0;
rslt_address <=0;
rslt_out <=0;
rslt_rden <=0;
rslt_wren <=0;
end
Read_Cnt:
begin
wb_dat_o<=0;
wb_ack_o<=0;
wb_err_o<=0;
wb_rty_o<=0;
data_address <=0;
data_out <=0;
data_rden <=0;
data_wren <=0;
rslt_address <=0;
rslt_out <=0;
rslt_rden <=0;
rslt_wren <=0;
pause_cnt <=0;
end
Read_Cnt_Pause:
begin
wb_dat_o<=0;
wb_ack_o<=0;
wb_err_o<=0;
wb_rty_o<=0;
data_address <=0;
data_out <=0;
data_rden <=0;
data_wren <=0;
pause_cnt<=pause_cnt+1;
rslt_address <=0;
rslt_out <=0;
rslt_rden <=0;
rslt_wren <=0;
end
Read_Cnt_Done:
begin
wb_dat_o<=cnt;
// wb_dat_o<=32'h0101_0101;
wb_ack_o<=1'b1;
wb_err_o<=0;
wb_rty_o<=0;
data_address <=0;
data_out <=0;
data_rden <=0;
data_wren <=0;
pause_cnt <=0;
rslt_address <=0;
rslt_out <=0;
rslt_rden <=0;
rslt_wren <=0;
end
Read_Status:
begin
wb_dat_o<=status;
wb_ack_o<=1'b1;
wb_err_o<=0;
wb_rty_o<=0;
data_address <=0;
data_out <=0;
data_rden <=0;
data_wren <=0;
rslt_address <=0;
rslt_out <=0;
rslt_rden <=0;
rslt_wren <=0;
end
default:
begin
wb_dat_o<=0;
wb_ack_o<=0;
wb_err_o<=0;
wb_rty_o<=0;
data_address <=0;
data_out <=0;
data_rden <=0;
data_wren <=0;
rslt_address <=0;
rslt_out <=0;
rslt_rden <=0;
rslt_wren <=0;
end
endcase
end
end
endmodule
2》软件部分代码:
fft256.c:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <sys/timeb.h>
#include <time.h>
typedef struct {
double real;
double img;
} COMPLEX;
typedef struct
{
long tv_sec;
long tv_usec;
} timeval;
#define PI 3.14159265358979323846
void FFT(COMPLEX*,int nfft);
void IFFT(COMPLEX*,int nfft); //inverse FFT
void common_fft(COMPLEX*,int nfft,int isign);
COMPLEX EE(COMPLEX a,COMPLEX b);
int main(int argc,char* argv[]) {
timeval tpstart, tpend;
int i;
int Nx;
int NFFT;
COMPLEX *x;
double timeuse;
int count;
Nx=256;
printf("Nx = %d\n",Nx);
gettimeofday(&tpstart,NULL);
for( count = 0; count <= 100; count++){
/* caculate NFFT as the next higer power of 2 >=Nx*/
NFFT = (int)pow(2.0,ceil(log((double)Nx)/log(2.0)));
// printf("NFFT = %d \n",NFFT);
/* allocate memory for NFFT complex numbers*/
x=(COMPLEX*)malloc(NFFT*sizeof(COMPLEX));
/* input test data*/
for(i=0;i<Nx;i++)
{
x[i].real== i;
x[i].img=0.0;
}
/* caculate FFT */
FFT(x,NFFT);
}
gettimeofday(&tpend,NULL);
timeuse=1000000*(tpend.tv_sec-tpstart.tv_sec)+tpend.tv_usec-tpstart.tv_usec;
printf("Used Time:%lf\n",timeuse);
// system("pause");
return 0;
}
void FFT(COMPLEX* x, int nfft)
{
common_fft(x,nfft,1);
}
void IFFT(COMPLEX* x,int nfft)
{
int i;
common_fft(x,nfft,-1);
for(i=0;i<nfft;i++)
{
x[i].real /= nfft;
x[i].img /= nfft;
}
}
/* fft kernel */
/* isign: 1 for FFT , -1 for IFFT */
void common_fft(COMPLEX* x,int nfft,int isign)
{
int i,j=0,k;
COMPLEX t;
for(i=0;i<nfft-1;i++)
{
if(i<j)
{
t=x[j];
x[j]=x[i];
x[i]=t;
}
k=nfft/2;
while(k<=j)
{
j-=k;
k/=2;
}
j+=k;
}
int stage,le,lei,ip;
COMPLEX u,w;
j= nfft;
for(stage=1;(j=j/2)!=1;stage++); //caculate stage,which represents butterfly stages
for(k=1;k<=stage;k++)
{
le=2<<(k-1);
lei=le/2;
u.real=1.0;// u,butterfly factor initial value
u.img=0.0;
w.real=cos(PI/lei*isign);
w.img=sin(PI/lei*isign);
for(j=0;j<=lei-1;j++)
{
for(i=j;i<=nfft-1;i+=le)
{
ip=i+lei;
t=EE(x[ip],u);
x[ip].real=x[i].real-t.real;
x[ip].img=x[i].img-t.img;
x[i].real=x[i].real+t.real;
x[i].img=x[i].img+t.img;
}
u=EE(u,w);
}
}
}
COMPLEX EE(COMPLEX a,COMPLEX b)
{
COMPLEX c;
c.real=a.real*b.real-a.img*b.img;
c.img=a.real*b.img+a.img*b.real;
return c;
}
12,后端ASIC tapeout验证
由于这个工程是一个实验性质的,目的在于说明SOC的开发的关键技术和流程,暂时没做backend flow。
如果感兴趣,可参考:
《Advanced ASIC Chip Synthesis: Using Synopsys Design Compiler, Physical Compiler and PrimeTime 》。
有中文版翻译《高级ASIC芯片综合》。
13,小结
麻雀虽小,五脏俱全。这个project,零零散散,大概用去了我三周的时间。到目前为止,对ORPSoC已经有了一个比较清晰的感觉了,其实,有的时候,感觉很重要。
之前咱们一直在or的围墙外徘徊,做这个东西的目的一方面是自己找找感觉,另一方面也希望对同样在围墙外的哥们儿有些帮助。
如果你感觉没问题了,那咱们就找到了围墙的大门口。。。。。。