1、摘要
Verilog、时序优化、Skip_Buffer、valid和ready寄存器打拍握手。
2、应用场景
用于切断stream握手之间的关键逻辑路径,以优化时序,跑更高速率。具体原理此处不做赘述,详见参考链接。
3、关键截图
图1、状态转移图
图2、级联示意图
4、代码实现
//------------------------------------------------------------------------------------
// /---\ +- flow
// | |
// load | V fill
// ------- + ------- + -------
// | | ---> | | ---> | |
// | Empty | | Busy | | Full |
// | | <--- | | <--- | |
// ------- - ------- - -------
// unload flush
//------------------------------------------------------------------------------------
module skip_buffer #(
parameter simTdly = 1,
parameter dataWidth = 8
) (
input wire clk,
input wire [dataWidth-1:0] din,
output wire din_rdy,
input wire din_vld,
output wire [dataWidth-1:0] dout,
input wire dout_rdy,
output wire dout_vld
);
localparam StateWidth = 3;
localparam [StateWidth-1:0] Empty = 2**0;
localparam [StateWidth-1:0] Busy = 2**1;
localparam [StateWidth-1:0] Full = 2**2;
reg din_rdy_r = 1'b1;
reg [dataWidth-1:0] dout_r = {dataWidth{1'b0}};
reg dout_vld_r = 1'b0;
reg [StateWidth-1:0] state = Empty;
reg [StateWidth-1:0] state_next;
wire push;
wire pop;
wire load;
wire unload;
wire flow;
wire fill;
wire flush;
wire data_buffer_en;
reg [dataWidth-1:0] data_buffer = {dataWidth{1'b0}};
wire from_data_buffer;
wire [dataWidth-1:0] data_mux;
wire dout_en;
always @(posedge clk ) begin
din_rdy_r <= #simTdly (state_next != Full );
dout_vld_r <= #simTdly (state_next != Empty);
end
assign din_rdy = din_rdy_r;
assign dout = dout_r;
assign dout_vld = dout_vld_r;
always @(posedge clk ) begin
state <= #simTdly state_next;
end
always@(*)begin
case (state)
Empty : state_next = load ? Busy : Empty;
Busy : state_next = unload ? Empty : (fill ? Full : Busy);
Full : state_next = flush ? Busy : Full;
default: state_next = state;
endcase
end
assign push = din_rdy && din_vld;
assign pop = dout_rdy && dout_vld;
assign load = (state == Empty) && ( push) && (~pop);
assign flow = (state == Busy ) && ( push) && ( pop);
assign fill = (state == Busy ) && ( push) && (~pop);
assign flush = (state == Full ) && (~push) && ( pop);
assign unload = (state == Busy ) && (~push) && ( pop);
assign data_buffer_en = fill;
always @(posedge clk ) begin
data_buffer <= #simTdly data_buffer_en ? din : data_buffer;
end
assign from_data_buffer = flush;
assign data_mux = from_data_buffer ? data_buffer : din;
assign dout_en = load || flow || flush;
always @(posedge clk ) begin
dout_r <= #simTdly dout_en ? data_mux : dout_r;
end
//
/*
skip_buffer #(
.simTdly (simTdly ),
.dataWidth (dataWidth )
)
u_skip_buffer(
.clk (clk ),
.din (din ),
.din_rdy (din_rdy ),
.din_vld (din_vld ),
.dout (dout ),
.dout_rdy (dout_rdy ),
.dout_vld (dout_vld )
);
*/
endmodule
5、仿真
//~ `New testbench
`timescale 1ns / 1ps
module tb_skip_buffer;
// skip_buffer Parameters
parameter PERIOD = 10;
parameter simTdly = 1;
parameter dataWidth = 8;
parameter simSingle = "true";
// skip_buffer Inputs
reg clk = 0 ;
reg [dataWidth-1:0] din = 0 ;
reg din_vld = 0 ;
reg dout_rdy = 0 ;
reg [dataWidth-1:0] dout_compare = 0 ;
// skip_buffer Outputs
wire din_rdy ;
wire [dataWidth-1:0] dout ;
wire dout_vld ;
wire dout_err ;
always @(posedge clk ) begin
din_vld <= #simTdly $random%2;
dout_rdy <= #simTdly $random%2;
end
always @(posedge clk ) begin
din <= #simTdly (din_vld && din_rdy) ? din + 'd1 : din;
dout_compare <= #simTdly (dout_vld && dout_rdy) ? dout_compare + 'd1 : dout_compare;
end
assign dout_err = (dout_vld && dout_rdy) && (dout != dout_compare);
initial
begin
forever #(PERIOD/2) clk=~clk;
end
generate if (simSingle == "true") begin
skip_buffer #(
.simTdly ( simTdly ),
.dataWidth ( dataWidth ))
u_skip_buffer (
.clk ( clk ),
.din ( din [dataWidth-1:0] ),
.din_vld ( din_vld ),
.dout_rdy ( dout_rdy ),
.din_rdy ( din_rdy ),
.dout ( dout [dataWidth-1:0] ),
.dout_vld ( dout_vld )
);
end else begin
skip_buffer_vec #(
.simTdly (simTdly ),
.dataWidth (dataWidth ),
.vecNum (4 )
)
u_skip_buffer_vec(
.clk (clk ),
.din (din ),
.din_rdy (din_rdy ),
.din_vld (din_vld ),
.dout (dout ),
.dout_rdy (dout_rdy ),
.dout_vld (dout_vld )
);
end endgenerate
endmodule
6、级联实现
//--------------------------------------------------------------------------------------------------------
// din ---> (din[0], dot[0]) ---> (din[1], dot[1]) ---> (din[2], dot[2]) ---> (din[3], dot[3]) ---> dot
// vld ---> (vld[0], vld[0]) ---> (vld[1], vld[1]) ---> (vld[2], vld[2]) ---> (vld[3], vld[3]) ---> vld
// rdy <--- (rdy[0], rdy[0]) <--- (rdy[1], rdy[1]) <--- (rdy[2], rdy[2]) <--- (rdy[3], rdy[3]) <--- rdy
//--------------------------------------------------------------------------------------------------------
module skip_buffer_vec #(
parameter simTdly = 1,
parameter dataWidth = 8,
parameter vecNum = 4
) (
input wire clk,
input wire [dataWidth-1:0] din,
output wire din_rdy,
input wire din_vld,
output wire [dataWidth-1:0] dout,
input wire dout_rdy,
output wire dout_vld
);
wire [dataWidth-1:0] din_vec [vecNum-1:0];
wire [vecNum-1:0] din_rdy_vec;
wire [vecNum-1:0] din_vld_vec;
wire [dataWidth-1:0] dout_vec [vecNum-1:0];
wire [vecNum-1:0] dout_rdy_vec;
wire [vecNum-1:0] dout_vld_vec;
generate begin
genvar i;
for (i = 0; i < vecNum; i = i + 1) begin : gen_skip_buf_vec
skip_buffer #(
.simTdly (simTdly ),
.dataWidth (dataWidth )
)
u_skip_buffer(
.clk (clk ),
.din (din_vec[i] ),
.din_rdy (din_rdy_vec[i] ),
.din_vld (din_vld_vec[i] ),
.dout (dout_vec[i] ),
.dout_rdy (dout_rdy_vec[i] ),
.dout_vld (dout_vld_vec[i] )
);
if (i == 0) begin
assign din_vec[i] = din;
assign din_vld_vec[i] = din_vld;
end else begin
assign din_vec[i] = dout_vec[i-1];
assign din_vld_vec[i] = dout_vld_vec[i-1];
end
if (i == vecNum-1) begin
assign dout_rdy_vec[i] = dout_rdy;
end else begin
assign dout_rdy_vec[i] = din_rdy_vec[i+1];
end
end
end endgenerate
assign din_rdy = din_rdy_vec[0];
assign dout_vld = dout_vld_vec[vecNum-1];
assign dout = dout_vec[vecNum-1];
//
/*
skip_buffer_vec #(
.simTdly (simTdly ),
.dataWidth (dataWidth ),
.vecNum (vecNum )
)
u_skip_buffer_vec(
.clk (clk ),
.din (din ),
.din_rdy (din_rdy ),
.din_vld (din_vld ),
.dout (dout ),
.dout_rdy (dout_rdy ),
.dout_vld (dout_vld )
);
*/
endmodule
6、级联仿真
修改保存后,按照如下操作,查看仿真波形