FPGA逻辑开发中,访问PS DDR资源的方式很多,除DMA操作外,也可直接通过外部AXI FULL master进行数据搬移。
下面先附上一个网上很常见的master代码,注:该代码非本人原创,非本人原创,非本人原创
module master(
// Reset, Clock
input ARESETN,
input ACLK,
// Master 鍐欏湴鍧�閫氶亾
output [0:0] M_AXI_AWID,
output [31:0] M_AXI_AWADDR,
output [7:0] M_AXI_AWLEN, // Burst Length:0-255
output [2:0] M_AXI_AWSIZE, // Burst Size:Fixed 2'b011
output [1:0] M_AXI_AWBURST, // Burst Type:Fixed 2'b01(Incremental Burst)
output M_AXI_AWLOCK, // Lock: Fixed2'b00
output [3:0] M_AXI_AWCACHE, // Cache: Fiex2'b0011
output [2:0] M_AXI_AWPROT, // Protect: Fixed2'b000
output [3:0] M_AXI_AWQOS, // QoS: Fixed2'b0000
output [0:0] M_AXI_AWUSER, // User: Fixed32'd0
output M_AXI_AWVALID,
input M_AXI_AWREADY,
// Master 鍐欐暟鎹�氶亾
output [63:0] M_AXI_WDATA,
output [7:0] M_AXI_WSTRB,
output M_AXI_WLAST,
output [0:0] M_AXI_WUSER,
output M_AXI_WVALID,
input M_AXI_WREADY,
// Master 鍐欏搷搴旈�氶亾
input [0:0] M_AXI_BID,
input [1:0] M_AXI_BRESP,
input [0:0] M_AXI_BUSER,
input M_AXI_BVALID,
output M_AXI_BREADY,
// Master 璇诲湴鍧�閫氶亾
output [0:0] M_AXI_ARID,
output [31:0] M_AXI_ARADDR,
output [7:0] M_AXI_ARLEN,
output [2:0] M_AXI_ARSIZE,
output [1:0] M_AXI_ARBURST,
output [1:0] M_AXI_ARLOCK,
output [3:0] M_AXI_ARCACHE,
output [2:0] M_AXI_ARPROT,
output [3:0] M_AXI_ARQOS,
output [0:0] M_AXI_ARUSER,
output M_AXI_ARVALID,
input M_AXI_ARREADY,
// Master 璇绘暟鎹�氶亾
input [0:0] M_AXI_RID,
input [63:0] M_AXI_RDATA,
input [1:0] M_AXI_RRESP,
input M_AXI_RLAST,
input [0:0] M_AXI_RUSER,
input M_AXI_RVALID,
output M_AXI_RREADY,
// Local Bus
input MASTER_RST,
input WR_START,
input [31:0] WR_ADRS,
input [31:0] WR_LEN,
output WR_READY,
output WR_FIFO_RE,
input WR_FIFO_EMPTY,
input WR_FIFO_AEMPTY,
input [63:0] WR_FIFO_DATA,
output WR_DONE,
input RD_START,
input [31:0] RD_ADRS,
input [31:0] RD_LEN,
output RD_READY,
output RD_FIFO_WE,
input RD_FIFO_FULL,
input RD_FIFO_AFULL,
output [63:0] RD_FIFO_DATA,
output RD_DONE,
output [31:0] DEBUG
);
localparam S_WR_IDLE = 3'd0;
localparam S_WA_WAIT = 3'd1;
localparam S_WA_START = 3'd2;
localparam S_WD_WAIT = 3'd3;
localparam S_WD_PROC = 3'd4;
localparam S_WR_WAIT = 3'd5;
localparam S_WR_DONE = 3'd6;
reg [2:0] wr_state;
reg [31:0] reg_wr_adrs;
reg [31:0] reg_wr_len;
reg reg_awvalid, reg_wvalid, reg_w_last;
reg [7:0] reg_w_len;
reg [7:0] reg_w_stb;
reg [1:0] reg_wr_status;
reg [3:0] reg_w_count, reg_r_count;
reg [7:0] rd_chkdata, wr_chkdata;
reg [1:0] resp;
reg rd_first_data;
reg rd_fifo_enable;
reg[31:0] rd_fifo_cnt;
assign WR_DONE =(wr_state == S_WR_DONE);
assign WR_FIFO_RE = rd_first_data |(reg_wvalid & ~WR_FIFO_EMPTY & M_AXI_WREADY & rd_fifo_enable);
always @(posedge ACLK or negedge ARESETN)
begin
if(!ARESETN)
rd_fifo_cnt <= 32'd0;
else if(WR_FIFO_RE)
rd_fifo_cnt <= rd_fifo_cnt +32'd1;
else if(wr_state == S_WR_IDLE)
rd_fifo_cnt <= 32'd0;
end
always @(posedge ACLK or negedge ARESETN)
begin
if(!ARESETN)
rd_fifo_enable <= 1'b0;
else if(wr_state == S_WR_IDLE &&WR_START)
rd_fifo_enable <= 1'b1;
else if(WR_FIFO_RE && (rd_fifo_cnt== RD_LEN[31:3] - 32'd1) )
rd_fifo_enable <= 1'b0;
end
// Write State
always @(posedge ACLK or negedge ARESETN)begin
if(!ARESETN) begin
wr_state <= S_WR_IDLE;
reg_wr_adrs[31:0] <= 32'd0;
reg_wr_len[31:0] <= 32'd0;
reg_awvalid <= 1'b0;
reg_wvalid <= 1'b0;
reg_w_last <= 1'b0;
reg_w_len[7:0] <= 8'd0;
reg_w_stb[7:0] <= 8'd0;
reg_wr_status[1:0] <= 2'd0;
reg_w_count[3:0] <= 4'd0;
reg_r_count[3:0] <= 4'd0;
wr_chkdata <= 8'd0;
rd_chkdata <= 8'd0;
resp <= 2'd0;
rd_first_data <= 1'b0;
end else begin
if(MASTER_RST) begin
wr_state <= S_WR_IDLE;
end else begin
case(wr_state)
S_WR_IDLE: begin
if(WR_START) begin //澶栭儴寮�濮嬪啓鍦板潃
wr_state <= S_WA_WAIT;
reg_wr_adrs[31:0] <=WR_ADRS[31:0];//鍐欏湴鍧�
reg_wr_len[31:0] <= WR_LEN[31:0] -32'd1;//鍐欓暱搴�
rd_first_data <= 1'b1;
end
reg_awvalid <= 1'b0;
reg_wvalid <= 1'b0;
reg_w_last <= 1'b0;
reg_w_len[7:0] <= 8'd0;
reg_w_stb[7:0] <= 8'd0;
reg_wr_status[1:0] <= 2'd0;
end
//鍐欏湴鍧�绛夊緟
S_WA_WAIT: begin
//澶栭儴FIFO涓嶇┖鎴栬�呴暱搴︿负0鍒欏紑濮嬪啓鍦板潃
if(!WR_FIFO_AEMPTY |(reg_wr_len[31:11] == 21'd0)) begin
wr_state <= S_WA_START;
end
rd_first_data <= 1'b0;
end
//鍐欏湴鍧�寮�濮�
S_WA_START: begin
wr_state <= S_WD_WAIT;//鍐欐暟鎹瓑寰�
reg_awvalid <= 1'b1;
//鍐欓暱搴﹀噺涓�
reg_wr_len[31:11] <= reg_wr_len[31:11] - 21'd1;
if(reg_wr_len[31:11] != 21'd0) begin
reg_w_len[7:0] <= 8'hFF;//姣忔鍐�256涓暟鎹�
reg_w_last <= 1'b0;
reg_w_stb[7:0] <= 8'hFF;
end else begin//鏈�鍚庝笉瓒�256涓殑鏁版嵁鍐欏叆
reg_w_len[7:0] <= reg_wr_len[10:3];
reg_w_last <= 1'b1;
reg_w_stb[7:0] <= 8'hFF;
end
end
S_WD_WAIT: begin
//绛夊緟鍐欐�荤嚎READY锛岃繘鍏ュ啓鏁版嵁鐘舵��
if(M_AXI_AWREADY) begin
wr_state <= S_WD_PROC;
reg_awvalid <= 1'b0;
//寮�濮嬪啓鏁版嵁
reg_wvalid <= 1'b1;
end
end
//鍐欐暟鎹�
S_WD_PROC: begin
if(M_AXI_WREADY & ~WR_FIFO_EMPTY)begin
//涓�娆$獊鍙戝啓瀹屾垚
if(reg_w_len[7:0] == 8'd0) begin
wr_state <= S_WR_WAIT;
reg_wvalid <= 1'b0;
reg_w_stb[7:0] <= 8'h00;
end else begin
reg_w_len[7:0] <= reg_w_len[7:0] -8'd1;
end
end
end
//鍐欑瓑寰�
S_WR_WAIT: begin
//鍐欏搷搴斿畬鎴�
if(M_AXI_BVALID) begin
reg_wr_status[1:0] <= reg_wr_status[1:0] | M_AXI_BRESP[1:0];
if(reg_w_last) begin//鍐欏畬鎴�
wr_state <= S_WR_DONE;
end else begin//鍐欐湭瀹屾垚
wr_state <= S_WA_WAIT;
//鍦板潃姣忔閫掑
reg_wr_adrs[31:0] <=reg_wr_adrs[31:0] + 32'd2048;
end
end
end
S_WR_DONE: begin
wr_state <= S_WR_IDLE;
end
default: begin
wr_state <= S_WR_IDLE;
end
endcase
end
end
end
assign M_AXI_AWID = 1'b0;
assign M_AXI_AWADDR[31:0] =reg_wr_adrs[31:0];
assign M_AXI_AWLEN[7:0] = reg_w_len[7:0];
assign M_AXI_AWSIZE[2:0] = 2'b011;
assign M_AXI_AWBURST[1:0] = 2'b01;
assign M_AXI_AWLOCK = 1'b0;
assign M_AXI_AWCACHE[3:0] = 4'b0011;
assign M_AXI_AWPROT[2:0] = 3'b000;
assign M_AXI_AWQOS[3:0] = 4'b0000;
assign M_AXI_AWUSER[0] = 1'b1;
assign M_AXI_AWVALID = reg_awvalid;
assign M_AXI_WDATA[63:0] = WR_FIFO_DATA[63:0];
assign M_AXI_WSTRB[7:0] = (reg_wvalid & ~WR_FIFO_EMPTY)?8'hFF:8'h00;
assign M_AXI_WLAST = (reg_w_len[7:0] == 8'd0)?1'b1:1'b0;
assign M_AXI_WUSER = 1;
assign M_AXI_WVALID = reg_wvalid & ~WR_FIFO_EMPTY;
assign M_AXI_BREADY = M_AXI_BVALID;
assign WR_READY = (wr_state == S_WR_IDLE)?1'b1:1'b0;
localparam S_RD_IDLE = 3'd0;
localparam S_RA_WAIT = 3'd1;
localparam S_RA_START = 3'd2;
localparam S_RD_WAIT = 3'd3;
localparam S_RD_PROC = 3'd4;
localparam S_RD_DONE = 3'd5;
reg [2:0] rd_state;
reg[31:0] reg_rd_adrs;
reg [31:0] reg_rd_len;
reg reg_arvalid, reg_r_last;
reg [7:0] reg_r_len;
assign RD_DONE = (rd_state == S_RD_DONE) ;
// Read State
always @(posedge ACLK or negedge ARESETN)begin
if(!ARESETN) begin
rd_state <= S_RD_IDLE;
reg_rd_adrs[31:0] <= 32'd0;
reg_rd_len[31:0] <= 32'd0;
reg_arvalid <= 1'b0;
reg_r_len[7:0] <= 8'd0;
end else begin
case(rd_state)
S_RD_IDLE: begin
//璇诲紑濮�
if(RD_START) begin
rd_state <= S_RA_WAIT;
reg_rd_adrs[31:0] <=RD_ADRS[31:0];
reg_rd_len[31:0] <= RD_LEN[31:0] -32'd1;
end
reg_arvalid <= 1'b0;
reg_r_len[7:0] <= 8'd0;
end
//璇婚�氶亾绛夊緟
S_RA_WAIT: begin
if(~RD_FIFO_AFULL) begin
rd_state <= S_RA_START;
end
end
//璇诲湴鍧�寮�濮�
S_RA_START: begin
rd_state <= S_RD_WAIT;
reg_arvalid <= 1'b1;
reg_rd_len[31:11] <=reg_rd_len[31:11] -21'd1;
if(reg_rd_len[31:11] != 21'd0) begin
reg_r_last <= 1'b0;
reg_r_len[7:0] <= 8'd255;
end else begin
reg_r_last <= 1'b1;
reg_r_len[7:0] <= reg_rd_len[10:3];
end
end
//璇绘暟鎹瓑寰�
S_RD_WAIT: begin
if(M_AXI_ARREADY) begin
rd_state <= S_RD_PROC;
reg_arvalid <= 1'b0;
end
end
//璇绘暟鎹紑濮�
S_RD_PROC: begin
if(M_AXI_RVALID) begin
if(M_AXI_RLAST) begin
if(reg_r_last) begin
rd_state <= S_RD_DONE;
end else begin
rd_state <= S_RA_WAIT;
reg_rd_adrs[31:0] <=reg_rd_adrs[31:0] + 32'd2048;
end
end else begin
reg_r_len[7:0] <=reg_r_len[7:0] -8'd1;
end
end
end
S_RD_DONE:begin
rd_state <= S_RD_IDLE;
end
endcase
end
end
// Master Read Address
assign M_AXI_ARID = 1'b0;
assign M_AXI_ARADDR[31:0] =reg_rd_adrs[31:0];
assign M_AXI_ARLEN[7:0] = reg_r_len[7:0];
assign M_AXI_ARSIZE[2:0] = 3'b011;
assign M_AXI_ARBURST[1:0] = 2'b01;
assign M_AXI_ARLOCK = 1'b0;
assign M_AXI_ARCACHE[3:0] = 4'b0011;
assign M_AXI_ARPROT[2:0] = 3'b000;
assign M_AXI_ARQOS[3:0] = 4'b0000;
assign M_AXI_ARUSER[0] = 1'b1;
assign M_AXI_ARVALID = reg_arvalid;
assign M_AXI_RREADY = M_AXI_RVALID & ~RD_FIFO_FULL;
assign RD_READY = (rd_state == S_RD_IDLE)?1'b1:1'b0;
assign RD_FIFO_WE = M_AXI_RVALID;
assign RD_FIFO_DATA[63:0] = M_AXI_RDATA[63:0];
assign DEBUG[31:0] = {reg_wr_len[31:8],
1'd0, wr_state[2:0],1'd0, rd_state[2:0]};
endmodule
上图以写入2KB连续自然数进行行为仿真,可以发现在每次传输过程中,实际只写入了127个时钟周期的数据,而剩下的一位数据在地址信息发生偏移后才拉高valid。以写入1KB数据进行仿真结果如下:
乍一看时序似乎没有什么问题,但仔细观察发现,valid信号在last信号拉高的同时拉低了:
也就是这1KB的数据传输并没有正确写入DDR。
修改方案
经过对代码的研读,主要问题由代码中的 WR_FIFO_EMPTY信号造成,有兴趣的同学可以自行研究一下代码,这里直接给出修改方案:
将该信号后打一拍:
reg WR_FIFO_EMPTY_d1;
always @(posedge ACLK)
begin
WR_FIFO_EMPTY_d1 <= WR_FIFO_EMPTY;
end
替换原代码中采用 WR_FIFO_EMPTY做条件判断的部分为 WR_FIFO_EMPTY_d1
修改后仿真时序如下:
可以看到末位数据正确传输(仿真数据为0-127 64bit位宽的递增数):
硬件ila核实测:
Fin
以上为个人对该代码的一些修改建议,可以根据思路自行进行代码修改,这里也附上修改后测试通过的版本链接 AXI master接口访问PS数据,还是建议多思考,共同进步。