OpenRisc-23-基于openrisc的SOC开发

引言

经过前面一段时间的锻炼和积累,是时候做一个稍微大一点的project了。
本小节就以fft256为例,整体感觉一下基于openrisc的SOC的开发过程。

1,整体思想

1>以fft256为运算核心,linux驱动将待计算的fft数据写入RAM,

2>告知mkg_core进行reset 和start,

3>mkg_core控制master,通过DMA形式,读取ram中的数据,

4>送给fft256,

5>然后获得fft256的运算结果,

6>写回ram,

7>最后驱动读取计算结果并打印。

注意:

这次的RAM是自己实现的一块独立的专用的RAM,不是使用的外部的DDR SDRAM。

2,模块的划分和接口定义

2.1 模块划分

整个project共4个叶子module:mkg_wb_ram,mkg_wb_slave,mkg_core,FFT256,mkg_wb_master.

2.2 接口定义,手动用visio绘图:

1>mkg_wb_ram模块

2>mkg_wb_slave模块

3>mkg_core模块

 

4>FFT256模块


5>mkg_wb_master模块


3,mkg_wb_ram模块的fsm设计,rtl编码,和仿真

1>mkg_wb_ram的fsm设计

2>mkg_wb_ram的rtl编码

 

 

/*
*
*	rill create 2013-04-18
*	rillzhen@gmail.com
*
*/


//`include "mkg_defines.v"


module mkg_wb_ram
(   
	wb_clk,			
	wb_rst,		
		
	wb_dat_i,			
	wb_adr_i,			
	wb_sel_i,		
	wb_cti_i,	
	wb_bte_i,		
	wb_we_i,		
	wb_cyc_i,		
	wb_stb_i,	
		
	wb_dat_o,		
	wb_ack_o,		
	wb_err_o,                    
	wb_rty_o
	
);

input 			      		      wb_clk;
input 			          		  wb_rst;

input [31:0]      			 	wb_adr_i;
input 			    			       wb_stb_i;
input 			    			       wb_cyc_i;
input [2:0] 				       wb_cti_i;
input [1:0] 				       wb_bte_i;
input [31:0] 					     wb_dat_i;
input [3:0] 					      wb_sel_i;
input 							         wb_we_i;
	
output reg [31:0] 		 		wb_dat_o;
output reg 			      	 	wb_ack_o;
output reg             wb_err_o;
output reg					 	      wb_rty_o;
parameter nb=10;
wire [15:0] D_I,D_R;
wire [nb-1:0] DR,DI;
wire  [7:0]  ADDR;
assign DR=(D_R[15]&&(nb!=16))? (D_R[15:15-nb+1]+1) : D_R[15:15-nb+1];
assign DI=(D_I[15]&&(nb!=16))? (D_I[15:15-nb+1]+1) : D_I[15:15-nb+1];
Wave_ROM256 my_rom
(.ADDR(ADDR),
.DATA_RE(D_R),
.DATA_IM(D_I)
);

assign ADDR=wb_adr_i[9:2];

parameter my_ram_adr=8'h98;
parameter data_adr_start=32'h9800_0000;
parameter data_adr_end=32'h9800_03fc;
parameter rslt_adr_start=32'h9800_0100;
parameter rslt_adr_end=32'h9801_03fc;
parameter error_code=32'habcd_dcba;
parameter Numb=256;
parameter Idle=5'b00001;
parameter Read_Data=5'b00010;
parameter Read_Rslt=5'b00100;
parameter Write_Data=5'b01000;
parameter Write_Rslt=5'b10000;


reg [31:0] Data [Numb-1:0];
reg [31:0] Result [Numb-1:0];
reg [4:0] state,next_state;

always @(posedge wb_clk)
begin
  if(wb_rst)
    begin
      state<=Idle;
    end
  else
    begin
      state<=next_state;
    end
end


      
always @(*)
begin
      case(state)
        Idle: begin
              if(wb_stb_i && wb_cyc_i && !wb_we_i && wb_adr_i >=data_adr_start && wb_adr_i<=data_adr_end)
                begin
                next_state=Read_Data;
                end
              else if(wb_stb_i && wb_cyc_i && !wb_we_i && wb_adr_i >=rslt_adr_start && wb_adr_i<=rslt_adr_end)
                begin
                next_state=Read_Rslt;
                end
              else if(wb_stb_i && wb_cyc_i && wb_we_i && wb_adr_i >=data_adr_start && wb_adr_i<=data_adr_end)
                begin
                next_state=Write_Data;
                end
              else if(wb_stb_i && wb_cyc_i && wb_we_i && wb_adr_i >=rslt_adr_start && wb_adr_i<=rslt_adr_end)
                begin
                next_state=Write_Rslt;
                end 
              else 
                begin
                next_state=Idle;
                end
              end
      Write_Data:begin
                next_state=Idle;
                 end 
      Write_Rslt:begin
                next_state=Idle;
                 end 
      Read_Data: begin
                next_state=Idle;
                end
      Read_Rslt: begin
                 next_state=Idle;
                 end
      default:  begin
                 next_state=Idle;
               end
    endcase
  end


always @(posedge wb_clk)
begin
  if(wb_rst)
    begin
        wb_dat_o<=0;
        wb_ack_o<=0;
        wb_err_o<=0;
        wb_rty_o<=0; 
    end
else
   begin
   case(next_state)
    Idle:
      begin
        wb_dat_o<=0;
        wb_ack_o<=0;
        wb_err_o<=0;
        wb_rty_o<=0; 
      end
    Write_Data:
     begin
        Data[wb_adr_i[9:2]]<=wb_dat_i;
        wb_ack_o<=1'b1;
      end
    Write_Rslt:
      begin
        Result[wb_adr_i[9:2]]<=wb_dat_i;
        wb_ack_o<=1'b1;
      end
    Read_Data:
     begin
      //  wb_dat_o<=Data[wb_adr_i[9:2]];
        
        wb_dat_o<= {6'b0,DR,6'b0,DI};
        wb_ack_o<=1'b1; 
    end
    Read_Rslt:
    begin
        wb_dat_o<=Result[wb_adr_i[9:2]];
        wb_ack_o<=1'b1;
    end
    default:
      begin
        wb_dat_o<=0;
        wb_ack_o<=0;
        wb_err_o<=0;
        wb_rty_o<=0;  
    end
  endcase
end
end
              
endmodule


/************** EOF ****************/


 

 

 

 

3>mkg_wb_ram仿真

 

4,mkg_wb_slave模块的fsm设计,rtl编码,和仿真

1>mkg_wb_slave的fsm设计

ram的reset timing:

ram的work timing:

2>mkg_wb_slave的rtl编码

 

 

/*
*
* mkg_wb_slave.v
*
* rill create 2013-04-18
*
*/


//`include "mkg_defines.v"


module mkg_wb_slave
(   
	wb_clk,			
	wb_rst,		
		
	wb_dat_i,			
	wb_adr_i,			
	wb_sel_i,		
	wb_cti_i,	
	wb_bte_i,		
	wb_we_i,		
	wb_cyc_i,		
	wb_stb_i,	
		
	wb_dat_o,		
	wb_ack_o,		
	wb_err_o,                    
	wb_rty_o,
	
	//internal signals
	status_i,
	
	reset_o,
	enable_o,
	config_o
	
);

input 			      		    wb_clk;
input 			          		wb_rst;

input [31:0]      				wb_adr_i;
input 			    			wb_stb_i;
input 			    			wb_cyc_i;
input [2:0] 				    wb_cti_i;
input [1:0] 				    wb_bte_i;
input [31:0] 					wb_dat_i;
input [3:0] 					wb_sel_i;
input 							wb_we_i;
	
output reg [31:0] 		 		wb_dat_o;
output reg 			      	 	wb_ack_o;
output 		               		wb_err_o;
output  					 	wb_rty_o;

//internal signals
input	[31:0] 		status_i;
output	reg 		reset_o;
output	reg 		enable_o;
output	reg	[31:0]	config_o;

//==local defines
parameter s_idle = 3'b000;
parameter s_read = 3'b001;
parameter s_write = 3'b010;
parameter s_config = 3'b100;
reg [2:0] state = s_idle;
//reset status
parameter s_config1 = 3'b000;
parameter s_config2 = 3'b001;
parameter s_config3 = 3'b010;
parameter s_config4 = 3'b100;
reg [2:0] config_status = s_config1;



reg [31:0] reg_config;//index:0x0,write
reg [31:0] reg_status;//index:0x4,read

//==loacl logic
assign wb_err_o=0;
assign wb_rty_o=0;

always @(posedge wb_clk)//get core status
begin
	if(wb_rst)
		begin
			reg_status <= 32'h0;
		end
	else
		begin
			reg_status <= status_i;
		end
end

always @(posedge wb_clk)//wishbine interface  & core config logic
begin
	if(wb_rst)
		begin
			state <= s_idle;
			reset_o <= 1'b0;
			enable_o <= 1'b0;
			config_o <= 1'b0;
			reg_config <= 32'h0;
		end
	else
		begin
			case(state)
			s_idle:
				begin
					task_idle();
				end
	
			s_read:
				begin
					task_read();
				end
			
			s_write:
				begin
					task_write();
				end
				
			s_config:
				begin
					task_config();
				end
				
			default:
				begin
					state <= s_idle;
				end
		
			endcase
		end
end


task automatic task_idle;
begin
	wb_dat_o <= 1'b0;//output initial
	wb_ack_o <= 1'b0;
					
	reset_o <= 1'b0;
	enable_o <= 1'b0;
	config_o <= 1'b0;
					
			
	if(wb_stb_i && wb_cyc_i && wb_we_i)
		begin
			state <= s_write;
		end
	else if(wb_stb_i && wb_cyc_i && !wb_we_i)
		begin
			state <= s_read;
		end
	else
		begin
			state <= s_idle;
		end
end
endtask

task automatic task_read;
begin
	wb_dat_o <= reg_status;
					
	wb_ack_o <= 1'b1;
	state <= s_idle;
end
endtask

task automatic task_write;
begin
	case (wb_adr_i[8:0])
	9'h0:
		begin
			reg_config <= wb_dat_i;//32'h1;
							
			wb_ack_o <= 1'b1;
			state <= s_config;
		end
						
	default:
		begin
			wb_ack_o <= 1'b1;
			state <= s_idle;
		end
				
	endcase
end
endtask


task automatic task_config;
begin
	if(32'h1 == reg_config)//reset mkg_core
		begin
			enable_o <= 1'b1;
			reset_o <= 1'b1;
			
			case (config_status)
			s_config1:
				begin
					config_status <= s_config2;
				end
			s_config2:
				begin
					config_status <= s_config3;
				end
			s_config3:
				begin
					config_status <= s_config4;
				end
			s_config4:
				begin
					config_status <= s_config1;
					state <= s_idle;
				end
			endcase
			
		end
	else
		begin
			enable_o <= 1'b1;
			config_o <= reg_config;
							
			state <= s_idle;
		end
end
endtask

endmodule


/************** EOF ****************/


 

 

 

 

3>mkg_wb_slave仿真

reset仿真:

config仿真:


5,mkg_core模块的fsm设计,rtl编码,和仿真及和mkg_wb_slave的联合仿真

1>mkg_core的fsm设计

有点复杂,请阅读rtl code。

 

2>mkg_core的rtl编码

 

 

/*
*
* mkg_core.v
*
* rill create 2013-04-18
*
*/


//`include "mkg_defines.v"


module mkg_core
(   
	clk,
	rst,
	
	s_enable_i,
	s_config_i,
	s_status_o,
	
	fft_ovf1_i,
	fft_ovf2_i,
	fft_ready_i,
	fft_addr_i,
	fft_dor_i,
	fft_doi_i,
	fft_en_o,
	fft_start_o,
	fft_shift_o,
	fft_dr_o,
	fft_di_o,
	
	m_ack_write_i,
	m_ack_read_i,
	m_dat_i,
	m_write_o,
	m_read_o,
	m_addr_o,
	m_dat_o
);


input 				clk;
input				rst;
	
input				s_enable_i;
input	[31:0]		s_config_i;
output	reg [31:0]	s_status_o;
	
input				fft_ovf1_i;
input				fft_ovf2_i;
input				fft_ready_i;
input	[7:0]		fft_addr_i;
input	[13:0]		fft_dor_i;
input	[13:0]		fft_doi_i;
output	reg			fft_en_o;
output	reg			fft_start_o;
output	reg	[3:0]	fft_shift_o;
output	reg [9:0]	fft_dr_o;
output	reg [9:0]	fft_di_o;
	
input				m_ack_write_i;
input				m_ack_read_i;
input	[31:0]		m_dat_i;
output	reg			m_write_o;
output	reg			m_read_o;
output	reg	[31:0]	m_addr_o;
output	reg	[31:0]	m_dat_o;


//===loacl fsm status defines========
//=core_status define
parameter s_idle = 8'b0000_0000;
parameter s_read_ram = 8'b0000_0001;
parameter s_write_buffer = 8'b0000_0010;
parameter s_write_ram = 8'b0000_0100;
parameter s_done = 8'b0000_1000;
reg [7:0] core_status = s_idle;
//=read_ram_status define
parameter s_read_ram_start = 8'b0000_0000;
parameter s_read_ram_wait_master_ack = 8'b0000_0001;
parameter s_read_ram_write_fft = 8'b0000_0010;
parameter s_read_ram_change_index = 8'b0000_0100;
reg [7:0] read_ram_status = s_read_ram_start;
//=write_buffer_status define
parameter s_write_buffer_wait_ready = 8'b0000_0000;
parameter s_write_buffer_write = 8'b0000_0001;
reg [7:0] write_buffer_status = s_write_buffer_wait_ready;
//=write_ram_status define
parameter s_write_ram_start = 8'b0000_0000;
parameter s_write_ram_wait_master_ack = 8'b0000_0001;
parameter s_write_ram_change_index = 8'b0000_0010;
reg [7:0] write_ram_status = s_write_ram_start;

//===loacl reg defines========
reg 		start_flag;
reg [15:0]	read_index;
reg [15:0]	buffer_index;
reg [7:0]	write_index;

reg [31:0]	result_buffer[255:0];

reg [31:0]	dat_read;

//============local logic====
always @(posedge clk)//debug interface,return core status to slave
begin
	s_status_o <= {core_status,read_ram_status,write_buffer_status,write_ram_status};
end

always @(posedge clk)//get config command from linux driver via slave interface
begin
	if(rst)
		begin
			start_flag <= 1'b0;
		end
	else
		begin
			if(s_enable_i)
				begin
					start_flag <= s_config_i & 32'b1;
				end
			else
				begin
					start_flag <= 1'b0;
				end
		end
end


always @(posedge clk)//top core fsm
begin
	if(rst & s_enable_i)
		begin
			task_reset();
		end
	else
		begin
			case (core_status)
			s_idle:
				begin
					task_idle();
				end
				
			s_read_ram:
				begin
					task_read_ram();
				end
			
			s_write_buffer:
				begin
					task_write_buffer();//wait fft ready then write the result data to buffer
				end
				
			s_write_ram:
				begin
					task_write_ram();
				end
			
			s_done:
				begin
					task_done();
				end
				
			default:
				begin
					core_status <= s_idle;
				end
				
			endcase
		end
end

//===reset task action===
task automatic task_reset;//reset initial
begin
	core_status <= s_idle;//core status init
	read_ram_status <= s_read_ram_start;//read ram status init
	write_buffer_status <= s_write_buffer_wait_ready;//write buffer status init
	write_ram_status <= s_write_ram_start;//write ram status init
	
	read_index <= 16'h0;//index init
	buffer_index <= 16'h0;
	write_index <= 16'h0;
	
		
	fft_en_o <= 1'b0;//fft init
	fft_start_o <= 1'b0;
	fft_dr_o <= 1'b0;
	fft_di_o <= 1'b0;
	fft_shift_o <= 4'b0;
	
	m_write_o <= 1'b0;//master init
	m_read_o <= 1'b0;
	m_addr_o <= 32'b0;
	m_dat_o <= 32'b0;
	
	task_result_buffer_init();//buffer init
end
endtask //task_reset

//=====top tasks================================

task automatic task_idle;//wait start flag
begin
	task_reset();//just for debug
	if(start_flag)
		begin
			core_status <= s_read_ram;
		end
	else
		begin
			core_status <= s_idle;
		end
end
endtask //task_idle


task automatic task_read_ram;//read data from ram then write it into fft
begin
	if(fft_ready_i)//monitor ready signal
		begin
			core_status <= s_write_buffer;
			write_buffer_status <= s_write_buffer_write;
		end
	else
		begin
		if(read_index < 16'd256)
		begin
			case (read_ram_status)
			s_read_ram_start:
				begin
					m_addr_o <= {8'h98,6'b0,read_index,2'b0};
					m_read_o <= 1'b1;//enable master read
					fft_en_o <= 1'b0;//debug add
					
					read_ram_status <= s_read_ram_wait_master_ack;
				end
		
			s_read_ram_wait_master_ack:
				begin
					m_read_o <= 1'b0;//clear master read enable
					m_addr_o <= 32'b0;
					
					if(16'b0 == read_index)
						begin
							fft_start_o <= 1'b1;//start fft
						end
						
					if(m_ack_read_i)
						begin
							dat_read <= m_dat_i;//sample data from master
								
							read_ram_status <= s_read_ram_write_fft;//change status
						end
					else
						begin
							read_ram_status <= s_read_ram_wait_master_ack;//wait
						end
				end
	
			s_read_ram_write_fft:
				begin
					//fft_en_o <= 1'b1;//debug disable
					fft_en_o <=1'b1;
					fft_dr_o <= dat_read[25:16];
					fft_di_o <= dat_read[9:0];
					
					read_ram_status <= s_read_ram_change_index;
				end
		
			s_read_ram_change_index:
				begin
					if(16'b0 == read_index)
						begin
							fft_start_o <= 1'b0;//clear start fft
						end
					fft_en_o <=1'b0;
					//fft_en_o <= 1'b0;//disable fft//debug disable
					read_index <= read_index + 1'b1;
				
					read_ram_status <= s_read_ram_start;
				end
			
			default:
				begin
					read_ram_status <= s_read_ram_start;
				end
				
			endcase
		end
	else
		begin
			read_ram_status <= s_read_ram_start;
			core_status <= s_write_buffer;
				fft_en_o <=1'b1;
		end
	end
	
end	
endtask //task_read_ram


task automatic task_write_buffer;//read fft output data & write it into buffer
begin
	if(buffer_index < 16'd256)
		begin
			case (write_buffer_status)
			s_write_buffer_wait_ready:
				begin
					if(fft_ready_i)
						begin
							write_buffer_status <= s_write_buffer_write;
						end
					else
						begin
							write_buffer_status <= s_write_buffer_wait_ready;
						end
				end
				
			s_write_buffer_write:
				begin
					result_buffer[fft_addr_i] <= {2'b00,fft_dor_i,2'b00,fft_doi_i};
					buffer_index <= buffer_index + 1'b1;
				end
			
			default:
				begin
					write_buffer_status <= s_write_buffer_wait_ready;
				end
			endcase
		end
	else
		begin
			write_buffer_status <= s_write_buffer_wait_ready;
			core_status <= s_write_ram;
		end
end
endtask //task_write_buffer

task automatic task_write_ram;//read data from fft to ram via master interface
begin
	if(write_index < 16'd256)
		begin
			case (write_ram_status)
			s_write_ram_start:
				begin
					m_dat_o <= result_buffer[write_index];
					m_addr_o <= {8'h98,14'b1,write_index,2'b00};
					m_write_o <= 1'b1;//enable master write
					
					write_ram_status <= s_write_ram_wait_master_ack;
				end
		
			s_write_ram_wait_master_ack:
				begin
					m_dat_o <= 32'b0;//clear master write enable
					m_write_o <= 1'b0;
					m_addr_o <= 32'b0;
					
					if(m_ack_write_i)
						begin
							write_ram_status <= s_write_ram_change_index;//change status
						end
					else
						begin
							write_ram_status <= s_write_ram_wait_master_ack;//wait
						end
				end
		
			s_write_ram_change_index:
				begin
					write_index <= write_index + 1'b1;
					
					write_ram_status <= s_write_ram_start;
				end
				
			default:
				begin
					write_ram_status <= s_write_ram_start;
				end
			endcase
		end
	else
		begin
			write_ram_status <= s_write_ram_start;
			core_status <= s_done;
		end
end	
endtask //task_write_ram


task automatic task_done;//self loop untill reset
begin
	core_status <= s_done;
end
endtask //task_done
//=================================top tasks end==================

//==1===sub tasks==reset==============================
task automatic task_result_buffer_init;
begin
	result_buffer[0] = 32'h0;result_buffer[1] = 32'h0;result_buffer[2] = 32'h0;result_buffer[3] = 32'h0;
	result_buffer[4] = 32'h0;result_buffer[5] = 32'h0;result_buffer[6] = 32'h0;result_buffer[7] = 32'h0;
end
endtask //task_result_buffer_init

endmodule


/************** EOF ****************/


 

 

3>mkg_core的仿真

 

4>mkg_wb_slave和mkg_core的联合仿真

 

6,FFT256模块的fsm设计,rtl编码,和仿真及和mkg_wb_slave,mkg_core的联合仿真

1>FFT256的fsm设计

可以从opencores的网站download或者从官网下载:
http://unicore.co.ua/index.php?page=free-ips&hl=en

2>FFT256的rtl编码

http://unicore.co.ua/index.php?page=free-ips&hl=en

3>FFT256的仿真

fft256的reset和start的timing:

fft256的readytiming:

 

4>FFT256和mkg_wb_slave,mkg_core的联合仿真

reset和start信号:

ready信号:

write_ram信号:

done信号:

 


7,mkg_wb_master模块的fsm设计,rtl编码,和仿真及和mkg_wb_slave,mkg_core的联合仿真

1>mkg_wb_master的fsm设计

与mkg_wb_slave类似。

 

2>mkg_wb_master的rtl编码

 

 

`timescale 1ns/1ps
module mkg_wb_master
(   
	wb_clk,			
	wb_rst,		

	wb_adr_o,
	wb_dat_o,
	wb_sel_o,
	wb_we_o,
	wb_cyc_o,
	wb_stb_o,
	wb_cti_o,
	wb_bte_o,
  
	wb_dat_i,
	wb_ack_i,
	wb_err_i,
	wb_rty_i,
	
	//internal signals
	write_i,
	read_i,
	addr_i,
	dat_i,
	ack_write_o,
	ack_read_o,
	dat_o
);

//wishbone interface
input				wb_clk;			
input				wb_rst;

input				wb_ack_i; 
input				wb_err_i; 
input				wb_rty_i;
input	[31:0]		wb_dat_i;

output	reg [31:0]		wb_adr_o;
output	reg [31:0]		wb_dat_o;
output	reg 			wb_cyc_o; 
output	reg				wb_stb_o;
output	reg [3:0]		wb_sel_o;
output	reg 			wb_we_o;
output	reg [2:0]		wb_cti_o;
output	reg [1:0]		wb_bte_o;

//internal signals
input 				write_i;
input 				read_i;
input [31:0]		addr_i;
input [31:0]		dat_i;

output reg			ack_write_o;
output reg			ack_read_o;
output reg	[31:0]	dat_o;

parameter Idle=     12'b0000_0000_0001;
//parameter R_Idle=   12'b0000_0000_0010;
parameter R_Ready=  12'b0000_0000_0100;
parameter R_Wait=   12'b0000_0000_1000;
parameter R_Done=   12'b0000_0001_0000;
//parameter W_Idle=   12'b0000_0010_0000;
parameter W_Ready=  12'b0000_0100_0000;
parameter W_Wait=   12'b0000_1000_0000;
parameter W_Done=   12'b0001_0000_0000;

reg [11:0] state,next_state;


always @(posedge wb_clk)
if(wb_rst)
  begin
    state<=Idle;
  end
else
  begin
    state<=next_state;
  end
  
always @(*)
  begin
    case(state)
      Idle:
        begin
          if(write_i && ! read_i )
            begin
            next_state=W_Ready;
            end
          else if(!write_i && read_i )
            begin
            next_state=R_Ready;
            end
          else 
            next_state=Idle;

            
        end
      W_Ready:
        begin
          next_state=W_Wait;
        end
      W_Wait:
        begin
          if(wb_ack_i)
            next_state=W_Done;
          else
            next_state=W_Wait;
        end
      W_Done:
        begin
          next_state=Idle;

        end
      R_Ready:
        begin
          next_state=R_Wait;
        end
      R_Wait:
        begin
          if(wb_ack_i)
            next_state=R_Done;
          else
            next_state=R_Wait;
        end
      R_Done:
         begin
           next_state=Idle;
         end
      endcase
  end
  
  always @(posedge wb_clk)
  if(wb_rst)
    begin
     wb_we_o<=0;
     wb_cyc_o<=0;
     wb_stb_o<=0;
     wb_adr_o<=0;
     wb_dat_o<=0;
     wb_sel_o<=0;
     wb_cti_o<=0;
     wb_bte_o<=0;
     
     ack_write_o<=0;
     ack_read_o<=0;
     dat_o<=0;
   end
 else
   begin
     case(next_state)
       Idle:
        begin
          wb_we_o<=0;
          wb_cyc_o<=0;
          wb_stb_o<=0;
          wb_adr_o<=0;
          wb_dat_o<=0;
          wb_sel_o<=0;
          wb_cti_o<=0;
          wb_bte_o<=0;
     
          ack_write_o<=0;
          ack_read_o<=0;
          dat_o<=0;  
        end
        W_Ready:
          begin
          wb_we_o<=1'b1;
          wb_cyc_o<=1'b1;
          wb_stb_o<=1'b1;
          wb_adr_o<=addr_i;
          wb_dat_o<=dat_i;
          
          ack_write_o<=0;
          ack_read_o<=0;
          dat_o<=0; 
        end
        W_Wait:
          begin
          wb_we_o<=wb_we_o;
          wb_cyc_o<=wb_cyc_o;
          wb_stb_o<=wb_stb_o;
          wb_adr_o<=wb_adr_o;
          wb_dat_o<=wb_dat_o;
          
          ack_write_o<=0;
          ack_read_o<=0;
          dat_o<=0;
          end
        W_Done:
          begin
          wb_we_o<=0;
          wb_cyc_o<=0;
          wb_stb_o<=0;
          wb_adr_o<=0;
          wb_dat_o<=0;
          
          ack_write_o<=1'b1;
          ack_read_o<=0;
          dat_o<=0;
          end
        R_Ready:
          begin
          wb_we_o<=0;
          wb_cyc_o<=1'b1;
          wb_stb_o<=1'b1;
          wb_adr_o<=addr_i;
          wb_dat_o<=0;
          
          ack_write_o<=0;
          ack_read_o<=0;
          dat_o<=0; 
          end
        R_Wait:
          begin
          wb_we_o<=wb_we_o;
          wb_cyc_o<=wb_cyc_o;
          wb_stb_o<=wb_stb_o;
          wb_adr_o<=wb_adr_o;
          wb_dat_o<=wb_dat_o;
          
          ack_write_o<=0;
          ack_read_o<=0;
          dat_o<=0; 
          end
        R_Done:
          begin
            wb_we_o<=0;
            wb_cyc_o<=0;
            wb_stb_o<=0;
            wb_adr_o<=0;
            wb_dat_o<=0;
            
            ack_write_o<=0;
            ack_read_o<=1'b1;
            dat_o<=wb_dat_i;
          end
        default:
          begin
          wb_we_o<=0;
          wb_cyc_o<=0;
          wb_stb_o<=0;
          wb_adr_o<=0;
          wb_dat_o<=0;
          wb_sel_o<=0;
          wb_cti_o<=0;
          wb_bte_o<=0;
     
          ack_write_o<=0;
          ack_read_o<=0;
          dat_o<=0;
        end
      endcase
    end            
                   
                             
          
      
          
          
  

endmodule


/************** EOF ****************/


 

3>mkg_wb_master的仿真

wishbone信号,与slave类似。

 

 

8,模块的整体仿真和综合

1>叶子模块的封装

mkg_top模块:

见下图:

mkg_test模块:

见下图:

2>整体仿真架构


3>综合

mkg_test模块:

 

 mkg_top模块:

9,linux下的driver的设计和编码

虽然driver不是硬件人员的工作,但是rtl的设计者对driver的重要程度是毋庸置疑的。
做好人要做到底。
操作过程请参考:
http://blog.csdn.net/rill_zhen/article/details/8700937

现在把code list如下:

ip_mkg.c:



/*
*
* rill mkg driver
*
*/
#include <linux/vmalloc.h>
#include <linux/slab.h>

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <asm/uaccess.h> /* get_user and put_user */
//#include <linux/clk.h>
//#include <linux/ioport.h>
#include <asm/io.h> /*ioremap*/
#include <linux/platform_device.h> /*cleanup_module*/
#include <linux/delay.h>
#include <asm-generic/io.h>

#include "ip_mkg.h"



void	__iomem 	*g_mkg_mem_base = NULL;
void  __iomem   *g_mkg_core_base = NULL;

static int device_open(struct inode *inode, struct file *file)
{
	g_mkg_mem_base = ioremap(MKG_MEM_BASE,MKG_MEM_LEN);
	g_mkg_core_base = ioremap (MKG_CORE_BASE, MKG_CORE_LEN);
	
	if(NULL == g_mkg_mem_base)
	{
		printk(KERN_ERR "mkg mem open ioremap error!\n");
		return -1;
	}
	else
	{
		printk("mkg mem ioremap addr:%d!\n",(int)g_mkg_mem_base);
	} 
		if(NULL == g_mkg_core_base)
	{
		printk(KERN_ERR "mkg core open ioremap error!\n");
		return -1;
	}
	else
	{
		printk("mkg core ioremap addr:%d!\n",(int)g_mkg_core_base);
	}
 
	return 0;
}

static int device_release(struct inode *inode, struct file *file)
{
	return 0;
}


static ssize_t device_read(struct file *filp, char *buffer, size_t length, loff_t *offset)
{
	/*int ret_val = 0;

	char * data = NULL;
	
	data = (char*)kmalloc(4, GFP_KERNEL);
	if((ret_val = copy_from_user(new_regs, (struct reg_data*)ioctl_param, sizeof(struct reg_data))) != 0) 

	ioread32(g_mkg_mem_base+length);
	printk("============read:%d\n",);*/
	
	return 1;
}

static ssize_t device_write(struct file *filp, const char *buffer, size_t count, loff_t *offset)
{
	//iowrite32(2,g_mkg_mem_base);
	return 1;
}

long device_ioctl(struct file *file, unsigned int ioctl_num, unsigned long ioctl_param)
{
#if 0

   int ret_val = 0;
   unsigned int ret = 0;
   struct reg_data *new_regs;
   printk("ioctl======\n");

   switch(ioctl_num)
   {
      case IOCTL_REG_SET:
	  {
		 new_regs = (struct reg_data*)kmalloc(sizeof(struct reg_data), GFP_KERNEL);
		 if((ret_val = copy_from_user(new_regs, (struct reg_data*)ioctl_param, sizeof(struct reg_data))) != 0) 
		 	{
			    kfree(new_regs);
			    printk(KERN_ERR " error copy line_datafrom user.\n");
				return -1;
		 	}

			//iowrite16(new_regs->value,g_mkg_mem_base+new_regs->addr);
		 kfree(new_regs);
     }
	 break;

	case IOCTL_REG_GET:
	{
	 new_regs = (struct reg_data*)kmalloc(sizeof(struct reg_data), GFP_KERNEL);
	 if((ret_val = copy_from_user(new_regs, (struct reg_data*)ioctl_param, sizeof(struct reg_data))) != 0) 
	 	{
		    kfree(new_regs);
		    printk(KERN_ERR " error copy line_datafrom user.\n");
			return -1;
	 	}

		//ret = ioread16(g_mkg_mem_base+new_regs->addr);
	 	kfree(new_regs);
		return ret;
	}
	break;
      
   }
#endif

  return -1;
}

struct file_operations our_file_ops = {
  .unlocked_ioctl = device_ioctl,
  .read = device_read,
  .write = device_write,
  .open = device_open,
  .release = device_release,
  .owner = THIS_MODULE,
};



void test(void)
{   
  int write_data[256]={
  	0x69016901, 0x8b014401, 0xa7011a01, 0xbd01ee00, 0xcd01bf00, 0xd7018f00, 0xdb015e00, 0xd8012e00, 0xcf010000, 
  	0xc101d403, 0xad01ab03, 0x94018603, 0x78016503, 0x58014803, 0x35013203, 0x11012003, 0xec001403, 0xc7000e03, 
  	0xa3000c03, 0x80001003, 0x5f001903, 0x42002503, 0x28003603, 0x12004803, 0x00005e03, 0xf3037403, 0xe9038b03,
  	0xe403a303, 0xe303b903, 0xe603ce03, 0xec03e203, 0xf503f203, 0x00000000, 0x0d000a00, 0x1c001200, 0x2b001700, 
  	0x3a001800, 0x49001600, 0x56001100, 0x62000900, 0x6c000000, 0x7400f503, 0x7900e803, 0x7c00db03, 0x7b00cd03, 
  	0x7800c003, 0x7300b303, 0x6b00a803, 0x61009f03, 0x56009703, 0x4a009203, 0x3c008f03, 0x2f008e03, 0x21009003,
  	0x15009503, 0x09009c03, 0x0000a403, 0xf803af03, 0xf203ba03, 0xef03c603, 0xee03d303, 0xef03e003, 0xf303ec03, 
  	0xf903f703, 0x00000000, 0x09000700, 0x14000d00, 0x20001100, 0x2d001200, 0x3a001100, 0x46000e00, 0x52000800, 
  	0x5c000000, 0x6400f703, 0x6b00eb03, 0x7000df03, 0x7200d103, 0x7100c403, 0x6e00b603, 0x6900aa03, 0x61009f03, 
  	0x58009503, 0x4d008d03, 0x40008803, 0x33008503, 0x25008403, 0x18008703, 0x0b008c03, 0x00009403, 0xf7039e03,
  	0xef03aa03, 0xea03b703, 0xe803c603, 0xe903d503, 0xee03e403, 0xf603f303, 0x00000000, 0x0e000b00, 0x1e001400,
  	0x32001a00, 0x47001d00, 0x5d001c00, 0x75001700, 0x8c000d00, 0xa2000000, 0xb800ee03, 0xcb00d803, 0xdb00be03, 
  	0xe700a103, 0xf0008003, 0xf4005d03, 0xf2003903, 0xec001403, 0xe000ef02, 0xcf00cb02, 0xb800a802, 0x9b008802, 
  	0x7a006c02, 0x55005302, 0x2c003f02, 0x00003102, 0xd2032802, 0xa2032502, 0x71032902, 0x41033302, 0x12034302,
  	0xe6025902, 0xbc027502, 0x97029702, 0x7502bc02, 0x5902e602, 0x43021203, 0x33024103, 0x29027103, 0x2502a203,
  	0x2802d203, 0x31020000, 0x3f022c00, 0x53025500, 0x6c027a00, 0x88029b00, 0xa802b800, 0xcb02cf00, 0xef02e000,
  	0x1403ec00, 0x3903f200, 0x5d03f400, 0x8003f000, 0xa103e700, 0xbe03db00, 0xd803cb00, 0xee03b800, 0x0000a200, 
  	0x0d008c00, 0x17007500, 0x1c005d00, 0x1d004700, 0x1a003200, 0x14001e00, 0x0b000e00, 0x00000000, 0xf303f603, 
  	0xe403ee03, 0xd503e903, 0xc603e803, 0xb703ea03, 0xaa03ef03, 0x9e03f703, 0x94030000, 0x8c030b00, 0x87031800, 
  	0x84032500, 0x85033300, 0x88034000, 0x8d034d00, 0x95035800, 0x9f036100, 0xaa036900, 0xb6036e00, 0xc4037100,
  	0xd1037200, 0xdf037000, 0xeb036b00, 0xf7036400, 0x00005c00, 0x08005200, 0x0e004600, 0x11003a00, 0x12002d00, 
  	0x11002000, 0x0d001400, 0x07000900, 0x00000000, 0xf703f903, 0xec03f303, 0xe003ef03, 0xd303ee03, 0xc603ef03, 
  	0xba03f203, 0xaf03f803, 0xa4030000, 0x9c030900, 0x95031500, 0x90032100, 0x8e032f00, 0x8f033c00, 0x92034a00, 
  	0x97035600, 0x9f036100, 0xa8036b00, 0xb3037300, 0xc0037800, 0xcd037b00, 0xdb037c00, 0xe8037900, 0xf5037400, 
  	0x00006c00, 0x09006200, 0x11005600, 0x16004900, 0x18003a00, 0x17002b00, 0x12001c00, 0x0a000d00, 0x00000000, 
  	0xf203f503, 0xe203ec03, 0xce03e603, 0xb903e303, 0xa303e403, 0x8b03e903, 0x7403f303, 0x5e030000, 0x48031200, 
  	0x36032800, 0x25034200, 0x19035f00, 0x10038000, 0x0c03a300, 0x0e03c700, 0x1403ec00, 0x20031101, 0x32033501, 
  	0x48035801, 0x65037801, 0x86039401, 0xab03ad01, 0xd403c101, 0x0000cf01, 0x2e00d801, 0x5e00db01, 0x8f00d701, 
  	0xbf00cd01, 0xee00bd01, 0x1a01a701, 0x44018b01
  	};
  int read_rslt[256];
  int loop1=	0;
  int loop2=	0;
  int loop3=	0;
  int loop4=  0;
  int loop5=  0;
  int temp=		0;
  
  printk("<----ip_mkg test start---->\n");
  for(loop1=0;loop1<256;loop1++)
  read_rslt[loop1]=0x98766789;
  printk("<----the initialization of result --->\n");
  
  for(loop2=0;loop2<256;loop2++)
   {
     iowrite32(write_data[loop2],g_mkg_mem_base+(loop2*4));
   }
   printk("<----write orignal data --->\n");
   iowrite32(0x01000000,g_mkg_core_base+0x4);
   printk("<---write control data --->\n");
   //printk("<----write control data: 0x01000000 end value:0x%x ---->\n",ioread32(g_mkg_core_base+4));
  // iowrite32(0x03000000,g_mkg_core_base+0x8);
  // printk("<----write control data: 0x03000000 end value:0x%x---->\n",ioread32(g_mkg_core_base+8));
   //  mdelay(100);
   //  mdelay(100);
   //  mdelay(100);
   //  mdelay(100);
   //   printk("<----delay ends ---->\n"); 
   // temp=ioread32(g_mkg_core_base);
   // printk("<-------my core status:0x%x--->\n",temp);
   /*
  while(1)
  {
  	temp=ioread32(g_mkg_mem_base+0x804);
  	if(temp==0x10101010)
  		break;
        printk("<-------my core status:0x%x--->\n",temp);
  	mdelay(1);	
  } 
      printk("<----waiting ends ---->\n");
   */
   mdelay(100);   
   printk("<----delay ends ---->\n"); 

  for(loop3=0;loop3<256;loop3++)
  {
  	read_rslt[loop3]=ioread32(g_mkg_mem_base+0x00000400+(loop3*4));
  }

     printk("<----read rslt from ram ---->\n");
      temp=ioread32(g_mkg_mem_base+0x00000800);
    printk("<-------my clock cnt:0x%x--->\n",temp);
    for(loop4=0;loop4<256;loop4++)
  {
  	  printk("====mem read addr==0x%x==mem value:0x%x==\n",loop4,read_rslt[loop4]);
  }

   


  

  
	/*
         for(loop2=0;loop2<256;loop2++)
    {
         temp=ioread32(temp_addr);
        printk("====mem read addr==0x%x==mem value:0x%x==\n",temp_addr,temp);
		temp_addr=temp_addr+4;
    
    }
	 udelay(1000);
    printk("<----ip_mkg read initial value ends---->\n");
	
    temp_addr=g_mkg_mem_base;
    for(loop=0;loop<256;loop++)
    {
    	
    	iowrite32(loop,temp_addr);
    	printk("====mem write addr==0x%x==mem value:0x%x==\n",temp_addr,loop);
		temp_addr=temp_addr+4;
    }
        udelay(1000);
	printk("<----ip_mkg write end---->\n\n\n");
	temp_addr=g_mkg_mem_base;
	
      for(loop1=0;loop1<256;loop1++)
    {
      temp=ioread32(temp_addr);
        printk("====mem read addr==0x%x==mem value:0x%x==\n",temp_addr,temp);
		temp_addr=temp_addr+4;
    
    }
    printk("<----ip_mkg test end---->\n");
    #endif
    
	int loop = 0;
	unsigned int phy_addr1 = 0;
	unsigned int phy_addr2 = 0;
	int * virtual_addr1 = NULL;
	int * virtual_addr2 = NULL;



	printk("<----ip_mkg test start---->\n");

	//=====ip_mkg reg test========================================================
	#if 1
	printk("reg test start==\n");
	iowrite32(0x11223344,g_mkg_mem_base);

	iowrite32(0x00000097,g_mkg_mem_base+0x10);
	iowrite32(0x03000000,g_mkg_mem_base+4);

	printk("reg test start1==\n");
	printk("reg test start2==\n");
	printk("reg test start3==\n");

	for(loop=0;loop<7;loop++)
	printk("====reg addr==0x%x==reg value:0x%x==\n",loop*4,ioread32(g_mkg_mem_base+4*loop));
	#endif
	//=========================================================================

	//============mem write test
	#if 0
	printk("mem write test start==\n");
	iowrite32(0x97000004,g_mkg_mem_base);
	
	iowrite32(0x2,g_mkg_mem_base+0xc);
	printk("======reg:c value:0x%x==\n",ioread32(g_mkg_mem_base+0xc));
	printk("======reg:14 value:0x%x==\n",ioread32(g_mkg_mem_base+0x14));
	printk("======reg:18 value:0x%x==\n",ioread32(g_mkg_mem_base+0x18));
	printk("======reg:1c value:0x%x==\n",ioread32(g_mkg_mem_base+0x1c));
	printk("======reg:20 value:0x%x==\n",ioread32(g_mkg_mem_base+0x20));
	printk("======reg:24 value:0x%x==\n",ioread32(g_mkg_mem_base+0x24));
	
	for(loop = 0;loop<10;loop++)
	printk("wait=write=\n");
	printk("wait=write=\n");
	iowrite32(0x1,g_mkg_mem_base+0xc);
	printk("======reg:c value:0x%x==\n",ioread32(g_mkg_mem_base+0xc));
	for(loop = 0;loop<10;loop++)
	printk("wait=read=\n");
	printk("wait=read=\n");
	
	printk("======reg:10 value:0x%x==\n",ioread32(g_mkg_mem_base+0x10));

	printk("======reg:c value:0x%x==\n\n",ioread32(g_mkg_mem_base+0xc));
	#endif

	//============mem read test
	#if 0
	printk("mem read test start==\n");
	virtual_addr1 = (int *)kmalloc(sizeof(int), GFP_KERNEL);
	virtual_addr2 = (int *)kmalloc(sizeof(int), GFP_KERNEL);
	*virtual_addr1 = 0x55;
	*virtual_addr2 = 0x66;
	
	phy_addr1 = virt_to_phys(virtual_addr1);
	phy_addr2 = virt_to_phys(virtual_addr2);

	
	printk("virtual addr1:0x%x==phy addr1:0x%x==\n",(int)virtual_addr1,phy_addr1);
	printk("virtual addr2:0x%x==phy addr2:0x%x==\n",(int)virtual_addr2,phy_addr2);



	iowrite32(phy_addr1,g_mkg_mem_base);
	iowrite32(0x1,g_mkg_mem_base+0xc);
	printk("wait=read=\n");
	printk("======reg:0 value:0x%x==\n",ioread32(g_mkg_mem_base));
	printk("======reg:c value:0x%x==\n",ioread32(g_mkg_mem_base+0xc));
	printk("====phy addr1==0x%x==ram value:0x%x==\n",phy_addr1,ioread32(g_mkg_mem_base+0x10));
	printk("======reg:c value:0x%x==\n\n",ioread32(g_mkg_mem_base+0xc));



	iowrite32(phy_addr2,g_mkg_mem_base);
	iowrite32(0x1,g_mkg_mem_base+0xc);
	printk("wait=2=\n");
	printk("======reg:0 value:0x%x==\n",ioread32(g_mkg_mem_base));
	printk("======reg:c value:0x%x==\n",ioread32(g_mkg_mem_base+0xc));
	printk("====phy addr2==0x%x==ram value:0x%x==\n",phy_addr2,ioread32(g_mkg_mem_base+0x10));
	printk("======reg:c value:0x%x==\n\n",ioread32(g_mkg_mem_base+0xc));


	
	kfree(virtual_addr1);
	kfree(virtual_addr2);
	#endif
	printk("<----ip_mkg test end---->\n");  */

}






int init_module()
{
	int ret_val;
	int ret;
	int ret2;
	void __iomem *ret_from_request;
	void __iomem *ret_from_request2;


	//=== Allocate character device 
	ret_val = register_chrdev(MAJOR_NUM, DEVICE_NAME, &our_file_ops);
	if (ret_val < 0)
	{
		printk(KERN_ALERT " device %s failed(%d)\n", DEVICE_NAME, ret_val);
		return ret_val;
	}

	ret = check_mem_region(MKG_MEM_BASE, MKG_MEM_LEN);
	if (ret < 0) 
	{
		printk(KERN_ERR "mkg check_mem_region bussy error!\n");
		return -1;
	}

	ret_from_request = request_mem_region(MKG_MEM_BASE, MKG_MEM_LEN, "ip_mkg");
	
	ret2 = check_mem_region(MKG_CORE_BASE, MKG_CORE_LEN);
	if (ret2 < 0) 
	{
		printk(KERN_ERR "mkg check_mem_region bussy error!\n");
		return -1;
	}

	ret_from_request2 = request_mem_region(MKG_CORE_BASE, MKG_CORE_LEN, "ip_mkg");

	//===ioremap mkg registers

	g_mkg_mem_base = ioremap(MKG_MEM_BASE,MKG_MEM_LEN);
	if(NULL == g_mkg_mem_base)
	{
		printk(KERN_ERR "mkg mem ioremap error!\n");
		return -1;
	}
	else
	{
		;//printk("mkg ioremap addr:%d!\n",(unsigned int)g_mkg_mem_base);
	}
	
	g_mkg_core_base = ioremap(MKG_CORE_BASE,MKG_CORE_LEN);
	if(NULL == g_mkg_core_base)
	{
		printk(KERN_ERR "mkg core ioremap error!\n");
		return -1;
	}
	else
	{
		;//printk("mkg ioremap addr:%d!\n",(unsigned int)g_mkg_mem_base);
	}

	
  
	printk("mkg module init done!\n");


	test();

	return 0;
}

void cleanup_module()
{
	release_mem_region(MKG_MEM_BASE, MKG_MEM_LEN);
  release_mem_region(MKG_CORE_BASE,MKG_CORE_LEN);
	unregister_chrdev(MAJOR_NUM, DEVICE_NAME);
}

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Rill zhen:rill_zhen@126.com");




ip_mkg.h


#ifndef __IP_MKG_H__
#define __IP_MKG_H__

#define MAJOR_NUM	102
#define DEVICE_NAME	"ip_mkg"
#define MKG_MEM_BASE 0x98000000
#define MKG_MEM_LEN	3072
#define MKG_CORE_BASE 0x97000000
#define MKG_CORE_LEN 64
#define IOCTL_REG_SET 0
#define IOCTL_REG_GET 1



struct reg_data 
{
	unsigned short addr;
	int value;
};

#endif



10,FPGA验证

前面只是仿真,要想让它work,还要挂到arbiter_dbus上才行。
请参考:

http://blog.csdn.net/rill_zhen/article/details/8722664


http://blog.csdn.net/rill_zhen/article/details/8558463


我将可以work的整个工程也传上来了:

要想让一个工程能够work,道理很简单,但是有很多细节需要做,上面的内容是在仿真之后写的,

在做了很多工作后做才能在板子上work,有些地方与上面的内容稍微有些改动。

这个工程经过了用modelsim的仿真,在xilinx ZYNQ板子上用chipscope的仿真,经过ORPSoC的altera 的FPGA的板级验证。

主要做了如下优化:

1,将内部的mem,改为调用altera的库,减少le的使用量。

2,修改arbiter_dbus的仲裁策略为轮转。

3,对mkg_core模块进行了优化。


由于上传文件大小有限,我分成了两部分:

工程的第一部分:

http://download.csdn.net/detail/rill_zhen/5435013

工程的第二部分:

http://download.csdn.net/detail/rill_zhen/5435107

此外还有对应的linux的driver:

http://download.csdn.net/detail/rill_zhen/5435175

验证如下图:

可以与之前仿真时的fft计算结果对比,看到和仿真的结果相同。

注意:此次驱动读取计算结果采用的是延时/轮询的形式,如果采用中断方式,请参考:

http://blog.csdn.net/rill_zhen/article/details/8894856



11,硬件与软件的比较

前面介绍的都是硬件实现FFT运算,如果将这个IP作为一个硬件加速器来用的话,它和软件实现的加速效果如何呢?下面我们就做一个比较。

1>基本思想

同样的计算量,分别统计硬件用时和软件用时,加速比=软件用时 / 硬件用时

2>实现算法

1》硬件:在IP core内部增加一个counter,从reset开始到done结束,记录cycle数量,保存在一个寄存器中,驱动读取这个寄存器的值,并打印出来,然后根据时钟频率(50MHz),计算出硬件用时。

2》软件:两次调用gettimeofday()函数,计算差值,即软件用时。

3》具体操作步骤,请参考:

http://blog.csdn.net/rill_zhen/article/details/8700937

3>结果

1》硬件:“<-------my clock cnt:0x94740200--->”,需要注意的是这个数和硬件内部的大小端不一致,需要转换一下,即正确的数值是0x00027494,十进制是160916,硬件用时(ms)=(160916/50M)*1000=3.218 ms,手动10次运行取平均值为3.12 ms.

2》软件:"Used Time:144.6",运行100次,取平均值,即软件用时为144.6 ms.

3》对比:如下图


4>具体代码

1》硬件部分,

mkg_ram_wb.v:


module mkg_ram_wb 
(
	wb_clk,			
	wb_rst,		
		
	wb_dat_i,			
	wb_adr_i,			
	wb_sel_i,		
	wb_cti_i,	
	wb_bte_i,		
	wb_we_i,		
	wb_cyc_i,		
	wb_stb_i,	
		
	wb_dat_o,		
	wb_ack_o,		
	wb_err_o,                    
	wb_rty_o,
	data_address,
	data_out,
	data_rden,
	data_wren,
	data_q,
	rslt_address,
	rslt_out,
	rslt_rden,
	rslt_wren,
	rslt_q
);
input 			      		      wb_clk;
input 			          		   wb_rst;

input [31:0]      			 	   wb_adr_i;
input 			    			      wb_stb_i;
input 			    			      wb_cyc_i;
input [2:0] 				         wb_cti_i;
input [1:0] 				         wb_bte_i;
input [31:0] 					      wb_dat_i;
input [3:0] 					      wb_sel_i;
input 							      wb_we_i;
	
output reg [31:0] 		 		wb_dat_o;
output reg 			      	 	wb_ack_o;
output reg            			wb_err_o;
output reg					 	   wb_rty_o;

output reg	[7:0]  data_address;
output reg	[31:0] data_out;
output reg	  data_rden;
output reg	  data_wren;
input	[31:0]  data_q;


output reg	[7:0]  rslt_address;
output reg	[31:0] rslt_out;
output reg	  rslt_rden;
output reg	  rslt_wren;
input	[31:0]  rslt_q;

parameter my_ram_adr=8'h98;
parameter data_adr_start=32'h9800_0000;
parameter data_adr_end=32'h9800_03fc;
parameter rslt_adr_start=32'h9800_0400;
parameter rslt_adr_end=32'h9800_07fc;
parameter error_code=32'habcd_dcba;
parameter Numb=256;
parameter cnt_adr=32'h9800_0800;
parameter status_adr=32'h9800_0804;
parameter Idle=					5'b00000;
parameter Read_Data= 			5'b00001;
parameter Read_Rslt= 			5'b00010;
parameter Write_Data=			5'b00011;
parameter Write_Rslt=			5'b00100;
parameter Read_Data_Pause1=   5'b00101;
parameter Read_Data_Pause2=	5'b00110;
parameter Write_Data_Pause=	5'b00111;
parameter Write_Rslt_Pause=	5'b01000;
parameter Read_Rslt_Pause1=	5'b01001;
parameter Read_Rslt_Pause2=	5'b01010;
parameter Read_Data_Pause3= 	5'b01011;
parameter Read_Rslt_Pause3=	5'b01100;
parameter Read_Cnt=				5'b01101;
parameter Read_Status=			5'b01110;
parameter Read_Cnt_Pause=		5'b01111;
parameter Read_Cnt_Done=		5'b10000;

/*
parameter Idle=					5'b00000;
parameter Read_Data= 			5'b00001;
parameter Read_Rslt= 			5'b00010;
parameter Write_Data=			5'b00011;
parameter Write_Rslt=			5'b00100;
parameter Read_Data_Pause1=   5'b00101;
parameter Read_Data_Pause2=	5'b00110;
parameter Write_Data_Pause=	5'b00111;
parameter Write_Rslt_Pause=	5'b01000;
parameter Read_Rslt_Pause1=	5'b01001;
parameter Read_Rslt_Pause2=	5'b01010;
parameter Read_Data_Pause3= 	5'b01011;
parameter Read_Rslt_Pause3=	5'b01100;
parameter Read_Cnt=				5'b01101;
parameter Read_Status=			5'b01110;
parameter Read_Cnt_Pause1=		5'b01111;
parameter Read_Cnt_Pause2=		5'b10000;
parameter Read_Cnt_Pause3=		5'b10001;

parameter Idle=					18'b000000000000000001;
parameter Read_Data= 			18'b000000000000000010;
parameter Read_Rslt= 			18'b000000000000000100;
parameter Write_Data=			18'b000000000000001000;
parameter Write_Rslt=			18'b000000000000010000;
parameter Read_Data_Pause1=   18'b000000000000100000;
parameter Read_Data_Pause2=	18'b000000000001000000;
parameter Write_Data_Pause=	18'b000000000010000000;
parameter Write_Rslt_Pause=	18'b000000000100000000;
parameter Read_Rslt_Pause1=	18'b000000001000000000;
parameter Read_Rslt_Pause2=	18'b000000010000000000;
parameter Read_Data_Pause3= 	18'b000000100000000000;
parameter Read_Rslt_Pause3=	18'b000001000000000000;
parameter Read_Cnt=				18'b000010000000000000;
parameter Read_Status=			18'b000100000000000000;
parameter Read_Cnt_Pause1=		18'b001000000000000000;
parameter Read_Cnt_Pause2=		18'b010000000000000000;
parameter Read_Cnt_Pause3=		18'b100000000000000000;
*/
parameter c_idle	=	2'b00;
parameter c_cnt	=	2'b01;
parameter c_done	=	2'b10;


reg [4:0] state,next_state;

reg [1:0] c_state,c_next_state;
reg [31:0] cnt;
reg [31:0]status;
reg [1:0] pause_cnt;

always @(posedge wb_clk)
if(wb_rst)
	c_state<=c_idle;
else
	c_state<=c_next_state;

always @(*)
begin	
	case(c_state)
	c_idle:
		if(wb_stb_i && wb_cyc_i && wb_we_i && wb_adr_i == data_adr_start )
			c_next_state = c_cnt;
		else 
			c_next_state	=	c_idle;
	c_cnt:
		if(wb_stb_i && wb_cyc_i && wb_we_i && wb_adr_i == rslt_adr_end	)
			c_next_state	=	c_done;
		else
			c_next_state	=	c_cnt;
	c_done:
			c_next_state	=	c_done;
	endcase
end
	always @(posedge wb_clk)
	if(wb_rst)
		begin
		cnt<=0;
		status<=0;
		end
	else
	case(c_next_state)
		c_idle:
		begin
			cnt<=0;
			status<=0;
		end
		c_cnt:
		begin
			cnt<=cnt+1;
			status<=0;
		end
		c_done:
		begin
			cnt<=cnt;
			status<=32'h0101_0101;
		end
	endcase

			
	

/*
reg [31:0] data_q_r,rslt_q_r;
always @(posedge wb_clk)
begin
	if(wb_rst)
	begin
		data_q_r<=0;
	end
	else
	begin
		data_q_r<=data_q;
	end
end
always @ (posedge wb_clk)
begin
	if(wb_rst)
	begin
		rslt_q_r<=0;
	end
	else
	begin
		rslt_q_r<=rslt_q;
	 end
end
	*/


always @(posedge wb_clk)
begin
  if(wb_rst)
    begin
      state<=Idle;
    end
  else
    begin
      state<=next_state;
    end
end


      
always @(*)
begin
      case(state)
        Idle: begin
              if(wb_stb_i && wb_cyc_i && !wb_we_i && wb_adr_i >=data_adr_start && wb_adr_i<=data_adr_end)
                begin
                next_state=Read_Data;
                end
              else if(wb_stb_i && wb_cyc_i && !wb_we_i && wb_adr_i >=rslt_adr_start && wb_adr_i<=rslt_adr_end)
                begin
                next_state=Read_Rslt;
                end
				 else if(wb_stb_i && wb_cyc_i && !wb_we_i && wb_adr_i >= cnt_adr)
                begin
                next_state=Read_Cnt;
                end
				  else if(wb_stb_i && wb_cyc_i && !wb_we_i && wb_adr_i == status_adr)
                begin
                next_state=Read_Status;
                end
              else if(wb_stb_i && wb_cyc_i && wb_we_i && wb_adr_i >=data_adr_start && wb_adr_i<=data_adr_end)
                begin
                next_state=Write_Data;
                end
              else if(wb_stb_i && wb_cyc_i && wb_we_i && wb_adr_i >=rslt_adr_start && wb_adr_i<=rslt_adr_end)
                begin
                next_state=Write_Rslt;
                end 
              else 
                begin
                next_state=Idle;
                end
              end
      Write_Data:begin
                next_state=Write_Data_Pause;
                 end 
      Write_Rslt:begin
                next_state=Write_Rslt_Pause;
                 end 
      Read_Data: begin
                next_state=Read_Data_Pause1;
                end
      Read_Rslt: begin
                 next_state=Read_Rslt_Pause1;
                 end
		Read_Data_Pause1:	
						begin
						next_state=Read_Data_Pause2;
						end
		Read_Data_Pause2:begin
						next_state=Read_Data_Pause3;
						end
		Read_Rslt_Pause1:
						begin
						next_state=Read_Rslt_Pause2;
						end
		Read_Rslt_Pause2:
						begin
						next_state=Read_Rslt_Pause3;
						end
		Read_Data_Pause3:
						begin
						next_state=Idle;
						end
		Read_Rslt_Pause3:
						begin
						next_state=Idle;
						end
		Write_Data_Pause:
						begin
						next_state=Idle;
						end
		Write_Rslt_Pause:
						begin
						next_state=Idle;
						end
		Read_Cnt:
					begin
					next_state=Read_Cnt_Pause;
					end
		Read_Cnt_Pause:
					begin
					if(pause_cnt<2'b11)
					next_state=Read_Cnt_Pause;
					else
					next_state=Read_Cnt_Done;
					end
		Read_Cnt_Done:
					begin
					next_state=Idle;
					end
		Read_Status:
					begin
					next_state=Idle;
					end
      default:  begin
                 next_state=Idle;
               end
    endcase
  end


always @(posedge wb_clk)
begin
  if(wb_rst)
    begin
        wb_dat_o<=0;
        wb_ack_o<=0;
        wb_err_o<=0;
        wb_rty_o<=0; 
		  
		  data_address <=0;
		  data_out	 <=0;
		  data_rden	 <=0;
		  data_wren	 <=0;
		  
		  rslt_address <=0;
		  rslt_out	 <=0;
		  rslt_rden	 <=0;
		  rslt_wren  <=0;
		  
		  pause_cnt<=0;
		  
    end
else
   begin
   case(next_state)
    Idle:
      begin
        wb_dat_o<=0;
        wb_ack_o<=0;
        wb_err_o<=0;
        wb_rty_o<=0;
		  
		  data_address <=0;
		  data_out	 <=0;
		  data_rden	 <=0;
		  data_wren	 <=0; 
		  
		  rslt_address <=0;
		  rslt_out	 <=0;
		  rslt_rden	 <=0;
		  rslt_wren  <=0;
		  
      end
    Write_Data:
     begin
	     data_address<=wb_adr_i[9:2];
		  data_out<=wb_dat_i;
		  data_wren<=1'b1;
		  data_rden	 <=0;
		  
        wb_dat_o<=0;
        wb_ack_o<=0;
        wb_err_o<=0;
        wb_rty_o<=0;
		  
		  rslt_address <=0;
		  rslt_out	 <=0;
		  rslt_rden	 <=0;
		  rslt_wren  <=0;
      end
    Write_Rslt:
      begin
        rslt_address<=wb_adr_i[9:2];
		  rslt_out 	  <=wb_dat_i;
		  rslt_wren	  <=1'b1;
		  rslt_rden	 <=0;
		  
        wb_dat_o<=0;
        wb_ack_o<=0;
        wb_err_o<=0;
        wb_rty_o<=0;
		  
		  data_address <=0;
		  data_out	 <=0;
		  data_rden	 <=0;
		  data_wren	 <=0; 
		  
      end
    Read_Data:
     begin
        data_address<=wb_adr_i[9:2];
		  data_out    <=0;
		  data_rden   <=1'b1;
		  data_wren	 <=0; 
		  
        wb_dat_o<=0;
        wb_ack_o<=0;
        wb_err_o<=0;
        wb_rty_o<=0;

		  rslt_address <=0;
		  rslt_out	 <=0;
		  rslt_rden	 <=0;
		  rslt_wren  <=0;
    end
    Read_Rslt:
    begin
	     rslt_address<=wb_adr_i[9:2];
		  rslt_out    <=0;
		  rslt_rden	  <=1'b1;
		  rslt_wren  <=0;  
		  
        wb_dat_o<=0;
        wb_ack_o<=0;
        wb_err_o<=0;
        wb_rty_o<=0;
		  
		  data_address <=0;
		  data_out	 <=0;
		  data_rden	 <=0;
		  data_wren	 <=0; 

    end
	 Read_Data_Pause1:
	 begin
        wb_dat_o<=0;
        wb_ack_o<=0;
        wb_err_o<=0;
        wb_rty_o<=0;
		  
		  data_address <=0;
		  data_out	 <=0;
		  data_rden	 <=0;
		  data_wren	 <=0; 
		  
		  rslt_address <=0;
		  rslt_out	 <=0;
		  rslt_rden	 <=0;
		  rslt_wren  <=0;
		end
	 Read_Data_Pause2:
	 begin
        wb_dat_o<=0;
        wb_ack_o<=0;
        wb_err_o<=0;
        wb_rty_o<=0;
		  
		  data_address <=0;
		  data_out	 <=0;
		  data_rden	 <=0;
		  data_wren	 <=0; 
		  
		  rslt_address <=0;
		  rslt_out	 <=0;
		  rslt_rden	 <=0;
		  rslt_wren  <=0;
	 end
	 Read_Data_Pause3:
	 begin
        wb_err_o<=0;
        wb_rty_o<=0;
		  wb_ack_o<=1'b1;
		  wb_dat_o<=data_q;
	//	  wb_dat_o<=data_q_r+32'h0000_0010;
		//		wb_dat_o<=data_q+32'h0000_0010;
		  
		  data_address <=0;
		  data_out	 <=0;
		  data_rden	 <=0;
		  data_wren	 <=0; 
		  
		  rslt_address <=0;
		  rslt_out	 <=0;
		  rslt_rden	 <=0;
		  rslt_wren  <=0;
		  

	 end
	 Read_Rslt_Pause1:
	 begin
        wb_dat_o<=0;
        wb_ack_o<=0;
        wb_err_o<=0;
        wb_rty_o<=0;
		  
		  data_address <=0;
		  data_out	 <=0;
		  data_rden	 <=0;
		  data_wren	 <=0; 
		  
		  rslt_address <=0;
		  rslt_out	 <=0;
		  rslt_rden	 <=0;
		  rslt_wren  <=0;
	 end
	 Read_Rslt_Pause2:
	 begin
        wb_dat_o<=0;
        wb_ack_o<=0;
        wb_err_o<=0;
        wb_rty_o<=0;
		  
		  data_address <=0;
		  data_out	 <=0;
		  data_rden	 <=0;
		  data_wren	 <=0; 
		  
		  rslt_address <=0;
		  rslt_out	 <=0;
		  rslt_rden	 <=0;
		  rslt_wren  <=0;
	 end
	 Read_Rslt_Pause3:
	 begin
        wb_err_o<=0;
        wb_rty_o<=0;
		  wb_ack_o<=1'b1;
		  wb_dat_o<=rslt_q;
		
		  
		  data_address <=0;
		  data_out	 <=0;
		  data_rden	 <=0;
		  data_wren	 <=0; 
		  
		  rslt_address <=0;
		  rslt_out	 <=0;
		  rslt_rden	 <=0;
		  rslt_wren  <=0;

	 end
	 Write_Data_Pause:
	 begin
		  wb_ack_o		 <=1'b1;
		  wb_dat_o		 <=error_code;
        wb_err_o		 <=0;
        wb_rty_o		 <=0; 
		  
		  data_address <=0;
		  data_out	 <=0;
		  data_rden	 <=0;
		  data_wren	 <=0; 
		  
		  rslt_address <=0;
		  rslt_out	 <=0;
		  rslt_rden	 <=0;
		  rslt_wren  <=0; 
	 end
	 Write_Rslt_Pause:
	 begin
		  wb_ack_o	<=1'b1;
		  wb_dat_o  <=error_code;
        wb_err_o  <=0;
        wb_rty_o  <=0; 
		  
		  data_address <=0;
		  data_out	 <=0;
		  data_rden	 <=0;
		  data_wren	 <=0; 
		  
		  rslt_address <=0;
		  rslt_out	 <=0;
		  rslt_rden	 <=0;
		  rslt_wren  <=0;
	end
	Read_Cnt:
		begin
        wb_dat_o<=0;
        wb_ack_o<=0;
        wb_err_o<=0;
        wb_rty_o<=0;
		  
		  data_address <=0;
		  data_out	 <=0;
		  data_rden	 <=0;
		  data_wren	 <=0; 
		  
		  rslt_address <=0;
		  rslt_out	 <=0;
		  rslt_rden	 <=0;
		  rslt_wren  <=0;		
		  pause_cnt	<=0;
		 end
	Read_Cnt_Pause:
		begin
        wb_dat_o<=0;
        wb_ack_o<=0;
        wb_err_o<=0;
        wb_rty_o<=0;
		  
		  data_address <=0;
		  data_out	 <=0;
		  data_rden	 <=0;
		  data_wren	 <=0; 
		  pause_cnt<=pause_cnt+1;
		  rslt_address <=0;
		  rslt_out	 <=0;
		  rslt_rden	 <=0;
		  rslt_wren  <=0;	
		end
	Read_Cnt_Done:
		begin
        wb_dat_o<=cnt;
	//	wb_dat_o<=32'h0101_0101;
        wb_ack_o<=1'b1;
        wb_err_o<=0;
        wb_rty_o<=0;
		  
		  data_address <=0;
		  data_out	 <=0;
		  data_rden	 <=0;
		  data_wren	 <=0; 
		  pause_cnt	<=0;
		  rslt_address <=0;
		  rslt_out	 <=0;
		  rslt_rden	 <=0;
		  rslt_wren  <=0;	
		end	
	Read_Status:
		begin
        wb_dat_o<=status;
        wb_ack_o<=1'b1;
        wb_err_o<=0;
        wb_rty_o<=0;
		  
		  data_address <=0;
		  data_out	 <=0;
		  data_rden	 <=0;
		  data_wren	 <=0; 
		  
		  rslt_address <=0;
		  rslt_out	 <=0;
		  rslt_rden	 <=0;
		  rslt_wren  <=0;
		end
    default:
      begin
        wb_dat_o<=0;
        wb_ack_o<=0;
        wb_err_o<=0;
        wb_rty_o<=0; 
		  
		  data_address <=0;
		  data_out	 <=0;
		  data_rden	 <=0;
		  data_wren	 <=0; 
		  
		  rslt_address <=0;
		  rslt_out	 <=0;
		  rslt_rden	 <=0;
		  rslt_wren  <=0; 
		  
    end
  endcase
end
end       
endmodule



2》软件部分代码:

fft256.c:


#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <sys/timeb.h>
#include <time.h>

typedef  struct {
	double  real;
	double  img;
	} COMPLEX;
typedef struct
{
	long tv_sec;
	long tv_usec;
} timeval;

#define PI 3.14159265358979323846

void FFT(COMPLEX*,int nfft);
void IFFT(COMPLEX*,int nfft); //inverse FFT
void common_fft(COMPLEX*,int nfft,int isign);
COMPLEX EE(COMPLEX a,COMPLEX b);

int main(int argc,char* argv[]) {


	timeval tpstart, tpend;
   

	

	int i;
	int Nx; 
	int NFFT;
	COMPLEX  *x;
	double timeuse;
	int count;
	Nx=256;
	printf("Nx = %d\n",Nx);
	gettimeofday(&tpstart,NULL);
	for( count = 0; count <= 100; count++){
	 
	/* caculate NFFT as the next higer power of 2 >=Nx*/
	NFFT = (int)pow(2.0,ceil(log((double)Nx)/log(2.0)));
//	printf("NFFT = %d \n",NFFT);

	/* allocate memory for NFFT complex numbers*/
	x=(COMPLEX*)malloc(NFFT*sizeof(COMPLEX));

	/* input test data*/
	for(i=0;i<Nx;i++)
	{
		x[i].real== i;
		x[i].img=0.0;
	}
	/* caculate FFT  */
	FFT(x,NFFT);	
	}
	gettimeofday(&tpend,NULL); 
	timeuse=1000000*(tpend.tv_sec-tpstart.tv_sec)+tpend.tv_usec-tpstart.tv_usec;

    printf("Used Time:%lf\n",timeuse);


    //	system("pause");
	return 0;
	
}

void FFT(COMPLEX* x, int nfft)
{
	common_fft(x,nfft,1);

}

void IFFT(COMPLEX* x,int nfft)
{
	int i;
	common_fft(x,nfft,-1);

	for(i=0;i<nfft;i++)
	{
		x[i].real /= nfft;
		x[i].img /= nfft;
	}
}

/* fft kernel */
/* isign: 1 for FFT , -1 for IFFT */
void common_fft(COMPLEX* x,int nfft,int isign)
{
	int i,j=0,k;
	COMPLEX t;

	for(i=0;i<nfft-1;i++)
	{
		if(i<j)
		{
			t=x[j];
			x[j]=x[i];
			x[i]=t;
		}
		k=nfft/2;
		while(k<=j)
		{
			j-=k;
			k/=2;
		}
		j+=k;
	}

	int stage,le,lei,ip;
	COMPLEX u,w;
	 j= nfft;
	 for(stage=1;(j=j/2)!=1;stage++); //caculate stage,which represents  butterfly stages
	
	for(k=1;k<=stage;k++)
	{
		le=2<<(k-1);
		lei=le/2;
		u.real=1.0;// u,butterfly factor initial value
		u.img=0.0;
		w.real=cos(PI/lei*isign);
		w.img=sin(PI/lei*isign);
		for(j=0;j<=lei-1;j++)
		{
			for(i=j;i<=nfft-1;i+=le)
			{
				ip=i+lei;
				t=EE(x[ip],u);
				x[ip].real=x[i].real-t.real;
				x[ip].img=x[i].img-t.img;
				x[i].real=x[i].real+t.real;
				x[i].img=x[i].img+t.img;
			}
			u=EE(u,w);
		}
	}
}

COMPLEX EE(COMPLEX a,COMPLEX b)
{
	COMPLEX c;
	c.real=a.real*b.real-a.img*b.img;
	c.img=a.real*b.img+a.img*b.real;
	return c;
}



12,后端ASIC tapeout验证

由于这个工程是一个实验性质的,目的在于说明SOC的开发的关键技术和流程,暂时没做backend flow。

如果感兴趣,可参考:

Advanced ASIC Chip Synthesis: Using Synopsys Design Compiler, Physical Compiler and PrimeTime 》。

有中文版翻译《高级ASIC芯片综合》。

13,小结

麻雀虽小,五脏俱全。这个project,零零散散,大概用去了我三周的时间。
到目前为止,对ORPSoC已经有了一个比较清晰的感觉了,其实,有的时候,感觉很重要。
之前咱们一直在or的围墙外徘徊,做这个东西的目的一方面是自己找找感觉,另一方面也希望对同样在围墙外的哥们儿有些帮助。
如果你感觉没问题了,那咱们就找到了围墙的大门口。。。。。。

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值