基本思路
AXI4协议有5个独立的通道,我们可以在每个时钟周期检测控制信号,从而判断该周期每个通道分别在做什么。
具体代码
noFSM_axi4_slave.v
`timescale 1ns / 1ps
module noFSM_axi4_slave(
//global sig
input wire aclk,
input wire arstn,
//aw channel
input wire [3:0] awid,
input wire [31:0] awaddr,
input wire [7:0] awlen,
input wire [2:0] awsize,
input wire [1:0] awburst,
input wire awvalid,
output reg awready,
//w channel
input wire [31:0] wdata,
input wire [3:0] wstrb,
input wire wlast,
input wire wvalid,
output reg wready,
//b channel
input wire bready,
output reg [3:0] bid,
output reg [1:0] bresp,
output reg bvalid,
//ar channel
input wire [3:0] arid,
input wire [31:0] araddr,
input wire [7:0] arlen,
input wire [2:0] arsize,
input wire [1:0] arburst,
input wire arvalid,
output reg arready,
//r channel
input wire rready,
output reg [3:0] rid,
output reg [31:0] rdata,
output reg [1:0] rresp,
output reg rlast,
output reg rvalid
//busy sig
//output reg rsta_busy,
//output reg rstb_busy
);
//new idea: these 5 channels are seperated, so we just need to check the signals every clock cycle
//to know what every channel is doing, and generate signals and operations.
//difficulties: some signals have dependency on each other; read and write process are quite annoying in AXI4-FULL;
//RAM
reg [31:0] mem [0:1023];
//addr reg and temp reg
reg [31:0] waddr_reg;
reg [31:0] start_waddr;
reg [31:0] raddr_reg;
reg [31:0] start_raddr;
reg [7:0] rcnt;
reg [29:0] wword_pos;//addr[31:2] = n, the word is mem[n]
reg [31:0] WWRAP_Boundary;
reg [31:0] RWRAP_Boundary;
//para reg
reg [7:0] AWLEN;
reg [7:0] ARLEN;
reg [2:0] AWSIZE;
reg [2:0] ARSIZE;
reg [3:0] AWID;
reg [3:0] ARID;
reg [3:0] WSTRB;
reg [31:0] WDATA;
//initializing the mem
always @(posedge aclk) begin
if (!arstn) begin
mem[0] <= 32'h33221100;
mem[1] <= 32'h77665544;
mem[2] <= 32'hBBAA9988;
mem[3] <= 32'hFFEEDDCC;
mem[4] <= 32'h00112233;
mem[5] <= 32'h44556677;
mem[6] <= 32'h8899AABB;
mem[7] <= 32'hCCDDEEFF;
end
end
//write addr channel
always @(posedge aclk) begin
if (!arstn) begin
awready <= 1'b0;
waddr_reg <= 32'hFFFFFFFF;
end else if (!awready && awvalid == 1'b1)
awready <= 1'b1;
else
awready <= 1'b0;
end
//para transport
always @(posedge aclk) begin
if (!arstn) begin
AWLEN <= 8'b0;
AWSIZE <= 3'b0;
waddr_reg <= 32'b0;
end else if(awvalid == 1'b1) begin
AWLEN <= awlen;
AWSIZE <= awsize;
AWID <= awid;
WSTRB <= wstrb;
waddr_reg <= awaddr;
start_waddr <= awaddr;
end
end
//write data channel
always @(posedge aclk) begin
if (!arstn)
wready <= 1'b0;
else if (awvalid == 1'b1 && awready == 1'b1) begin
wready <= 1'b1;
awready <= 1'b0;
end else if (wlast == 1'b1) begin
wready <= 1'b0;
awready <= 1'b1;
end
end
//write to the memory
always @(posedge aclk) begin
if (!arstn) begin
wready <= 1'b0;
end else if (wready == 1'b1 && wvalid == 1'b1) begin
bid <= AWID;
WDATA <= wdata;
WSTRB <= wstrb;
//calculate where to write and update the address register according to the type
WWRAP_Boundary <= (start_waddr >> (AWLEN * 2**(AWSIZE))) << (AWLEN * 2**(AWSIZE));
waddr_reg <= (waddr_reg >> AWSIZE) << AWSIZE;//align the address with byte
//calculate next addr
case (awburst)
2'b00 : begin
//FIXED
waddr_reg <= waddr_reg;
end
2'b01 : begin
//INCR
if (!wlast) begin
waddr_reg <= waddr_reg + 2**(AWSIZE);
end
end
2'b10 : begin
//WRAP
if (!wlast) begin
if (waddr_reg >= WWRAP_Boundary + AWLEN * 2**(AWSIZE)) begin
waddr_reg <= WWRAP_Boundary;
end else begin
waddr_reg <= waddr_reg + 2**(AWSIZE);
end
end
end
//default: ;
endcase
end
end
//write according to wstrb
always @(posedge aclk) begin
if (!arstn) begin
wword_pos <= 30'h0;
end else begin
wword_pos <= waddr_reg[31:2];
if (WSTRB[0] == 1'b1) begin
mem[wword_pos][7:0] <= WDATA[7:0];
end
if (WSTRB[1] == 1'b1) begin
mem[wword_pos][15:8] <= WDATA[15:8];
end
if (WSTRB[2] == 1'b1) begin
mem[wword_pos][23:16] <= WDATA[23:16];
end
if (WSTRB[3] == 1'b1) begin
mem[wword_pos][31:24] <= WDATA[31:24];
end
end
end
//write response channel
always @(posedge aclk) begin
if (!arstn)
bvalid <= 1'b0;
else if (wlast == 1'b1 && wready == 1'b1 && wvalid == 1'b1) begin
bvalid <= 1'b1;
bresp <= 2'b00;
end else if (bvalid == 1'b1 && bready == 1'b1) begin
bvalid <= 1'b0;
end
end
//read addr channel
always @(posedge aclk) begin
if (!arstn)
arready <= 1'b0;
else if (arvalid == 1'b1 && !arready)
arready <= 1'b1;
else
arready <= 1'b0;
end
//para transport
always @(posedge aclk) begin
if (!arstn) begin
ARLEN <= 8'b0;
ARSIZE <= 3'b0;
raddr_reg <= 32'hFFFFFFFF;
end else if (arvalid == 1'b1) begin
ARLEN <= arlen;
ARSIZE <= arsize;
ARID <= arid;
raddr_reg <= araddr;
start_raddr <= araddr;
end
end
//read data channel
always @(posedge aclk) begin
if (!arstn)
rvalid <= 1'b0;
else if (arready == 1'b1 && arvalid == 1'b1) begin
rvalid <= 1'b1;
arready <= 1'b0;
rid <= ARID;
end
end
//memory to the master
always @(posedge aclk) begin
if (!arstn) begin
rvalid <= 1'b0;
rlast <= 1'b0;
end else if(rvalid == 1'b1 && rready == 1'b1) begin
rresp <= 2'b00;
RWRAP_Boundary <= (start_raddr >> (ARLEN * 2**(ARSIZE))) << (ARLEN * 2**(ARSIZE));
raddr_reg <= (raddr_reg >> ARSIZE) << ARSIZE;//align the address with byte
case (arburst)
2'b00 : begin
raddr_reg <= raddr_reg;
end
2'b01 : begin
if (!rlast) begin
raddr_reg <= raddr_reg + 2 ** (ARSIZE);
end
end
2'b10 : begin
if (raddr_reg <= araddr + ARLEN * 2 ** (ARSIZE)) begin
raddr_reg <= raddr_reg + 2 ** (ARSIZE);
end else begin
raddr_reg <= RWRAP_Boundary;
end
end
//default:
endcase
end
end
//data out
always @(posedge aclk) begin
if (!arstn) begin
rdata <= 32'h0;
rcnt <= 8'd0;
end else if (rvalid == 1'b1 && rready == 1'b1) begin
rdata <= mem[raddr_reg[31:2]];
if (rcnt < ARLEN) begin
rcnt <= rcnt + 8'd1;
end else begin
rlast <= 1'b1;
rcnt <= 8'b0;
end
end
end
always @(posedge aclk) begin
if (!arstn) begin
rlast <= 1'b0;
end else if (rlast == 1'b1) begin
rvalid <= 1'b0;
rlast <= 1'b0;
end
end
endmodule
tb.v
`timescale 1ns / 1ps
module axi_slave_try1_test();
reg aclk;
reg arstn;
reg [3:0] awid;
reg [31:0] awaddr;
reg [7:0] awlen;
reg [2:0] awsize;
reg [1:0] awburst;
reg awvalid;
wire awready;
reg [31:0] wdata;
reg [3:0] wstrb;
reg wlast;
reg wvalid;
wire wready;
wire [3:0] bid;
wire [1:0] bresp;
wire bvalid;
reg bready;
reg [3:0] arid;
reg [31:0] araddr;
reg [7:0] arlen;
reg [2:0] arsize;
reg [1:0] arburst;
reg arvalid;
wire arready;
wire [3:0] rid;
wire [31:0] rdata;
wire [1:0] rresp;
wire rlast;
wire rvalid;
reg rready;
noFSM_axi4_slave my_interface_full(
.aclk(aclk), // input wire aclk
.arstn(arstn), // input wire arstn
.awid(awid), // input wire [3 : 0] awid
.awaddr(awaddr), // input wire [31 : 0] awaddr
.awlen(awlen), // input wire [7 : 0] awlen
.awsize(awsize), // input wire [2 : 0] awsize
.awburst(awburst), // input wire [1 : 0] awburst
.awvalid(awvalid), // input wire awvalid
.awready(awready), // output wire awready
.wdata(wdata), // input wire [31 : 0] wdata
.wstrb(wstrb), // input wire [3 : 0] wstrb
.wlast(wlast), // input wire wlast
.wvalid(wvalid), // input wire wvalid
.wready(wready), // output wire wready
.bid(bid), // output wire [3 : 0] bid
.bresp(bresp), // output wire [1 : 0] bresp
.bvalid(bvalid), // output wire bvalid
.bready(bready), // input wire bready
.arid(arid), // input wire [3 : 0] arid
.araddr(araddr), // input wire [31 : 0] araddr
.arlen(arlen), // input wire [7 : 0] arlen
.arsize(arsize), // input wire [2 : 0] arsize
.arburst(arburst), // input wire [1 : 0] arburst
.arvalid(arvalid), // input wire arvalid
.arready(arready), // output wire arready
.rid(rid), // output wire [3 : 0] rid
.rdata(rdata), // output wire [31 : 0] rdata
.rresp(rresp), // output wire [1 : 0] rresp
.rlast(rlast), // output wire rlast
.rvalid(rvalid), // output wire rvalid
.rready(rready) // input wire rready
);
//test
initial begin
//initializing
aclk = 1'b0;
arstn = 1'b0;
awid = 4'b1010;//compare with the output bid to see the relations
awaddr =32'hFFFFFF00;
awlen = 8'b0;
awsize = 3'b0;
awburst = 2'b01;
awvalid = 1'b0;
wstrb = 4'b1111;
wvalid = 1'b0;
bready = 1'b1;
arid = 4'b0101;//compare with the output rid to see the relations
araddr = 32'hFFFFFF00;
arlen = 8'b0;
arsize = 3'b0;
arburst = 2'b01;
arvalid = 1'b0;
rready = 1'b1;
wlast = 1'b0;
end
always #5 aclk = ~aclk;//generate global clk
initial begin
#22 arstn =1'b1;//reset before everything
//burst write
#10;
wstrb = 4'b0011;
awaddr = 32'd0;
awlen = 8'd4;
awsize = 3'd1;
awvalid = 1'b1;
wait(awready == 1'b1 && aclk == 1'b1);
#1 //awvalid = 1'b0;
wvalid = 1'b1;
//#2 awaddr = 32'bx;
#2 wdata = 32'hAAAA0001;
#10 awvalid = 1'b0;
repeat(3) begin
#10 wdata = wdata + 32'h11110001;
awvalid = 1'b0;
wstrb = ~wstrb;
end
#10 wlast = 1'b1;
wdata = wdata + 32'd1;
wstrb = ~wstrb;
#10 wlast = 1'b0;
wvalid <= 1'b0;
//read to check if we've written the correct data
#5;
araddr = 32'd0;
arlen = 8'd4;
arsize = 3'd2;
arvalid = 1'b1;
wait(arready == 1'b1 && aclk == 1'b1);
#1 //arvalid = 1'b0;
//arid = 4'bx;
#5 rready = 1'b1;
//araddr = 32'bx;
#5 araddr = 32'd0;
arvalid = 1'b0;
arid = 4'd15;
#45 arvalid = 1'b0;
end
endmodule
仿真测试结果
时钟上升沿变化的数据,检测到的应该是右边!
写时序如下:45ns写地址有效,55ns开始写入数据,95ns开始写入最后一个数据;这里测试了byte write功能,即写入时总线宽度只有2字节,而数据是4字节的;
读时序如下:115ns读地址有效,125ns开始读数据,175n读完最后一个数据;
还有待优化的地方
1、部分信号仍然有依赖关系。真正的AXI4 SLAVE甚至可以先接收数据而后再接收数据的地址,按照本文的程序是不能实现这一点的;
2、鲁棒性较低。本文实现的AXI4 SLAVE需要依赖于主机正确发送信号而工作。若主机发送不合理信号,则会导致错误;
3、采用相对保守的策略,ready信号需要在检测到valid信号之后才拉高,因而整体输出会延迟一个时钟周期。
我近期在阅读Xilinx官方的AXI4FULL SLAVE文件,后续应该会做进一步优化。我在AXI4 FULL SLAVE的Verilog实现(二)中进行了改进。