PCIE驱动学习之8 对几个基本的verilog模块的分析

这里开始分析一下VIVADO项目中的几个VERILOG模块:

1,频率估计模块

//
// Copyright (c) 2016-2020 Fairwaves, Inc.
// SPDX-License-Identifier: CERN-OHL-W-2.0
//

module clk_estimator #(
    parameter EST_BITS = 20
)(
    input         rst,
    input         clk,

    input         meas_clk,

    input         cntr_ready,
    output        cntr_valid,
    output [31:0] cntr_data
);


reg [EST_BITS-1:0] div_clk;
always @(posedge clk) begin
  if (rst) begin
    div_clk <= 0;
  end else begin
    div_clk <= div_clk + 1'b1;
  end
end

wire out_dclk;
sync_reg  self_estim(
    .clk(meas_clk),
    .rst(rst),
    .in(div_clk[EST_BITS-1]),
    .out(out_dclk)
);

reg [EST_BITS-1:0] cntr_clk;
reg [EST_BITS-1:0] ref_cntr_data_r;
reg [3:0]  evnts;
reg prev_out_dclk;

always @(posedge meas_clk) begin
  if (rst) begin
    cntr_clk        <= 0;
    prev_out_dclk   <= 0;
    ref_cntr_data_r <= 0;
    evnts           <= 0;
  end else begin
    prev_out_dclk   <= out_dclk;

    if (prev_out_dclk == 0 && out_dclk == 1'b1) begin
      cntr_clk        <= 0;
      ref_cntr_data_r <= cntr_clk;
      evnts           <= evnts + 1'b1;
    end else begin
      cntr_clk        <= cntr_clk + 1'b1;
    end
  end
end

// Self clock estimation
assign cntr_valid = 1'b1;
assign cntr_data = { evnts, {(28 - EST_BITS){1'b0}}, ref_cntr_data_r };



endmodule

这里是根据用户频率大约估算另外一个频率。比如我们有100M的用户接口频率,还有RF芯片产生的sample rare clk. 这时候可以使用这个模块进行大体估算。

2,一位同步模块

//
// Copyright (c) 2016-2020 Fairwaves, Inc.
// SPDX-License-Identifier: CERN-OHL-W-2.0
//

module sync_reg #(
   parameter INIT         = 0,
   parameter ASYNC_RESET  = 0
) (
   input  clk,
   input  rst,
   input  in,
   output out

);

(* ASYNC_REG = "TRUE" *) reg sync1;
(* ASYNC_REG = "TRUE" *) reg sync2;

assign out = sync2;

generate
if (ASYNC_RESET) begin
  always @(posedge clk or posedge rst) begin
    if (rst) begin
      sync1 <= INIT;
      sync2 <= INIT;
    end else begin
      sync1 <= in;
      sync2 <= sync1;
    end
  end
end else begin
  always @(posedge clk) begin
    if (rst) begin
      sync1 <= INIT;
      sync2 <= INIT;
    end else begin
      sync1 <= in;
      sync2 <= sync1;
    end
  end
end
endgenerate

endmodule

简单说就是用新的时钟去打两个拍子,这里主要是要看有(* ASYNC_REG = "TRUE" *) 这个约束。有了这个约束相当月时钟路径分析设置为false.

3,穿越时钟域的计数器

//
// Copyright (c) 2016-2020 Fairwaves, Inc.
// SPDX-License-Identifier: CERN-OHL-W-2.0
//

module cross_counter #(
   parameter WIDTH = 8,
   parameter GRAY_BITS = WIDTH,
   parameter OUT_WIDTH = WIDTH,
   parameter OUT_LOWER_SKIP = 0,
   parameter OUT_RESET_ASYNC = 0,
   parameter OUT_PIPELINED = 0
)(
   input                                 inrst,
   input                                 inclk,
   input                                 incmdvalid,
   input                                 incmdinc,
   output [WIDTH - 1:0]                  incnt,

   input                                 outclk,
   input                                 outrst,
   output [OUT_WIDTH - 1:OUT_LOWER_SKIP] outcnt
);

genvar i;

reg [WIDTH - 1:0] counter;

reg  [GRAY_BITS - 1:OUT_LOWER_SKIP] gray_encoded;
wire [GRAY_BITS - 1:OUT_LOWER_SKIP] sync_out;

wire [GRAY_BITS - 1:OUT_LOWER_SKIP] gcode =
	counter[GRAY_BITS - 1:OUT_LOWER_SKIP] ^ counter[GRAY_BITS - 1:OUT_LOWER_SKIP+1];


always @(posedge inclk) begin
  if (inrst) begin
    counter      <= 0;
    gray_encoded <= 0;
  end else begin
    if (incmdvalid) begin
      if (incmdinc) begin
        counter <= counter + 1;
      end else begin
        counter <= counter - 1;
      end
    end
    gray_encoded <= gcode;
  end
end

assign incnt = counter;


generate
  for (i = OUT_LOWER_SKIP; i < GRAY_BITS; i=i+1) begin: forreg
    sync_reg #(.ASYNC_RESET(OUT_RESET_ASYNC)) sreg (
      .clk(outclk),
      .rst(outrst),
      .in(gray_encoded[i]),
      .out(sync_out[i])
    );
  end
endgenerate


wire [GRAY_BITS - 1:OUT_LOWER_SKIP] outgray;
assign outgray[GRAY_BITS - 1] = sync_out[GRAY_BITS - 1];

generate
for (i = GRAY_BITS - 1; i > OUT_LOWER_SKIP; i=i-1) begin
  assign outgray[i - 1] = outgray[i] ^ sync_out[i - 1];
end
endgenerate



generate
if (OUT_PIPELINED || OUT_WIDTH != GRAY_BITS) begin
  reg [OUT_WIDTH - 1:OUT_LOWER_SKIP] oval;
  assign outcnt = oval;

  if (OUT_RESET_ASYNC) begin
    always @(posedge outclk or posedge outrst) begin
      if (outrst) begin
        oval[GRAY_BITS - 1:OUT_LOWER_SKIP] <= 0;
      end else begin
        oval[GRAY_BITS - 1:OUT_LOWER_SKIP] <= outgray[GRAY_BITS - 1:OUT_LOWER_SKIP];
      end
    end
  end else begin
    always @(posedge outclk) begin
      if (outrst) begin
        oval[GRAY_BITS - 1:OUT_LOWER_SKIP] <= 0;
      end else begin
        oval[GRAY_BITS - 1:OUT_LOWER_SKIP] <= outgray[GRAY_BITS - 1:OUT_LOWER_SKIP];
      end
    end
  end

  if (OUT_WIDTH != GRAY_BITS) begin
    wire wrap_pos = (oval[GRAY_BITS - 1:OUT_LOWER_SKIP] > outgray[GRAY_BITS - 1:OUT_LOWER_SKIP]);

    if (OUT_RESET_ASYNC) begin
      always @(posedge outclk or posedge outrst) begin
        if (outrst) begin
          oval[OUT_WIDTH - 1:GRAY_BITS]      <= 0;
        end else begin
          oval[OUT_WIDTH - 1:GRAY_BITS]      <= oval[OUT_WIDTH - 1:GRAY_BITS] + wrap_pos;
        end
      end
    end else begin
      always @(posedge outclk) begin
        if (outrst) begin
          oval[OUT_WIDTH - 1:GRAY_BITS]      <= 0;
        end else begin
          oval[OUT_WIDTH - 1:GRAY_BITS]      <= oval[OUT_WIDTH - 1:GRAY_BITS] + wrap_pos;
        end
      end
    end
  end
end else begin
  assign outcnt = outgray[OUT_WIDTH - 1:OUT_LOWER_SKIP];
end
endgenerate


endmodule

这里分两个时钟域去看,第一个时钟域看到就是一个可以加可以减的计数器。第二个时钟域的输出就是第一个时钟域计数器的输出,这个输出保证了使用格雷码进行了时钟域的穿越。由于使用格雷码,数据一周期最多变化一位,编译软件分析到了后也不会按照穿越时钟域来报路径。

4,aixs接口的多路器.

//
// Copyright (c) 2016-2020 Fairwaves, Inc.
// SPDX-License-Identifier: CERN-OHL-W-2.0
//

module axis_mux4(
    input         s_axis_clk,
    input         s_arstn,

    input         m_axis_tready,
    output [63:0] m_axis_tdata,
    output [7:0]  m_axis_tkeep,
    output        m_axis_tlast,
    output        m_axis_tvalid,

    output        s0_axis_tready,
    input  [63:0] s0_axis_tdata,
    input  [7:0]  s0_axis_tkeep,
    input         s0_axis_tlast,
    input         s0_axis_tvalid,

    output        s1_axis_tready,
    input  [63:0] s1_axis_tdata,
    input  [7:0]  s1_axis_tkeep,
    input         s1_axis_tlast,
    input         s1_axis_tvalid,

    output        s2_axis_tready,
    input  [63:0] s2_axis_tdata,
    input  [7:0]  s2_axis_tkeep,
    input         s2_axis_tlast,
    input         s2_axis_tvalid,

    output        s3_axis_tready,
    input  [63:0] s3_axis_tdata,
    input  [7:0]  s3_axis_tkeep,
    input         s3_axis_tlast,
    input         s3_axis_tvalid
);


localparam MUX_S0   = 2'h0;
localparam MUX_S1   = 2'h1;
localparam MUX_S2   = 2'h2;
localparam MUX_S3   = 2'h3;

reg [1:0] state;

assign m_axis_tdata = (state == MUX_S0) ? s0_axis_tdata :
                      (state == MUX_S1) ? s1_axis_tdata :
                      (state == MUX_S2) ? s2_axis_tdata :
                    /*(state == MUX_S3) ?*/ s3_axis_tdata;

assign m_axis_tkeep = (state == MUX_S0) ? s0_axis_tkeep :
                      (state == MUX_S1) ? s1_axis_tkeep :
                      (state == MUX_S2) ? s2_axis_tkeep :
                    /*(state == MUX_S3) ?*/ s3_axis_tkeep;

assign m_axis_tvalid = (state == MUX_S0) ? s0_axis_tvalid :
                       (state == MUX_S1) ? s1_axis_tvalid :
                       (state == MUX_S2) ? s2_axis_tvalid :
                       (state == MUX_S3) ? s3_axis_tvalid :
                                                      1'b0;

assign m_axis_tlast = (state == MUX_S0) ? s0_axis_tlast :
                      (state == MUX_S1) ? s1_axis_tlast :
                      (state == MUX_S2) ? s2_axis_tlast :
                    /*(state == MUX_S3) ?*/ s3_axis_tlast;

assign s0_axis_tready = (state == MUX_S0) ? m_axis_tready : 1'b0;
assign s1_axis_tready = (state == MUX_S1) ? m_axis_tready : 1'b0;
assign s2_axis_tready = (state == MUX_S2) ? m_axis_tready : 1'b0;
assign s3_axis_tready = (state == MUX_S3) ? m_axis_tready : 1'b0;

wire [2:0] sel_prio_state = (s0_axis_tvalid) ? {1'b1, MUX_S0 } :
                            (s1_axis_tvalid) ? {1'b1, MUX_S1 } :
                            (s2_axis_tvalid) ? {1'b1, MUX_S2 } :
                            (s3_axis_tvalid) ? {1'b1, MUX_S3 } : {1'b0, MUX_S0 };

reg started;
wire last_transfer = m_axis_tready && m_axis_tvalid && m_axis_tlast;

always @(posedge s_axis_clk) begin
  if (~s_arstn) begin
    state   <= MUX_S0;
    started <= 1'b0;
  end else begin
    if (~started) begin
      if (m_axis_tvalid) begin
        started <= 1'b1;
      end else begin
        state   <= sel_prio_state[1:0];
      end
    end
    if (last_transfer) begin
      state   <= sel_prio_state[1:0];
      started <= (state != sel_prio_state[1:0]) && sel_prio_state[2];
    end
  end
end


endmodule

这个是四个输入到一个输出的多路器,跟一般多路器相比就是可以自主选择某一路。这个选择是根据这四路里的valid信号。并且可以看到这思路优先级是按照0-3。也就说mux0具备最高的优先级。

5,中断路由器

//
// Copyright (c) 2016-2020 Fairwaves, Inc.
// SPDX-License-Identifier: CERN-OHL-W-2.0
//

module int_router #(
    parameter COUNT = 16,
    parameter DELAY_CYCLE = 0
) (
    input clk,
    input reset,

    // PCI-e interface
    input            interrupt_msi_enabled,
    input            interrupt_rdy,

    output reg       interrupt,
    output reg       interrupt_assert,
    output reg [7:0] interrupt_num,

    input            legacy_interrupt_disabled,
    input      [2:0] interrupt_mmenable,

    output     [4:0] cap_interrupt_msgnum,
    output           cap_interrupt_stat,

    // User Interrupt status
    input                 int_stat_ready,
    output                int_stat_valid,
    output [COUNT - 1: 0] int_stat_data,

    // User Interrupt control
    output                int_ctrl_ready,
    input                 int_ctrl_valid,
    input [COUNT - 1: 0]  int_ctrl_data,


    // User Interrupr interfcae
    input  [COUNT - 1:0]  int_valid,
    output [COUNT - 1:0]  int_ready
);
assign cap_interrupt_stat   = 1'b0;

reg [COUNT-1:0] int_en;
wire [31:0] int_active = int_valid & int_en;


wire [4:0] msi_num_x;
wire       msi_num_xval;

wire [31:0] int_active_r;
genvar i;
generate
for (i = 0; i < 32; i=i+1) begin: rev
    assign int_active_r[31-i] = int_active[i];
end
endgenerate

clz #(.B_WIDTH(5)) clz_decode (
  .data(int_active_r),

  .count(msi_num_x),
  .count_nvalid(msi_num_xval)
);
wire [4:0] msi_num = /*(msi_num_xval) ? 0 :*/ msi_num_x;


wire [36:0] msi_msk_and_cap =
  (interrupt_mmenable == 3'b000 && (COUNT > 1))  ? { 5'b00001, 32'hffff_ffff } :
  (interrupt_mmenable == 3'b001 && (COUNT > 2))  ? { 5'b00010, 32'hffff_fffe } :
  (interrupt_mmenable == 3'b010 && (COUNT > 4))  ? { 5'b00100, 32'hffff_fff8 } :
  (interrupt_mmenable == 3'b011 && (COUNT > 8))  ? { 5'b01000, 32'hffff_ff80 } :
  (interrupt_mmenable == 3'b100 && (COUNT > 16)) ? { 5'b10000, 32'hffff_8000 } :
                                                   { COUNT[4:0], 32'h0000_0000 };
assign cap_interrupt_msgnum = msi_msk_and_cap[36:32];
wire [31:0] msi_ready_clean_msk =
  (interrupt_msi_enabled) ? msi_msk_and_cap[31:0] : 32'hffff_ffff;

wire [5:0] msi_num_fit =
  (interrupt_mmenable == 3'b000 && (COUNT > 1))                    ? 6'b1_00000 :
  (interrupt_mmenable == 3'b001 && (COUNT > 2)  && (msi_num > 0))  ? 6'b1_00001 :
  (interrupt_mmenable == 3'b010 && (COUNT > 4)  && (msi_num > 2))  ? 6'b1_00011 :
  (interrupt_mmenable == 3'b011 && (COUNT > 8)  && (msi_num > 6))  ? 6'b1_00111 :
  (interrupt_mmenable == 3'b100 && (COUNT > 16) && (msi_num > 14)) ? 6'b1_01111 :
                                                                     { 1'b0, msi_num };
wire [4:0] msi_num_gen = msi_num_fit[4:0];
wire       msi_no_fit  = msi_num_fit[5];

assign int_ctrl_ready = 1'b1;
always @(posedge clk) begin
  if (reset) begin
    int_en <= 0;
  end else begin
    if (int_ctrl_ready && int_ctrl_valid) begin
      int_en <= int_ctrl_data;
    end
  end
end

reg [COUNT - 1:0]      reg_int_ready;
reg [1:0]              pcie_int_state;

assign int_ready = reg_int_ready;
assign int_stat_valid = 1'b1;
assign int_stat_data  = int_valid;

reg assert_halt;
wire int_new_avail = ~assert_halt && (int_active != 0) || assert_halt && (int_active & ~msi_ready_clean_msk != 0);

localparam PCIE_INT_IDLE          = 0;
localparam PCIE_INT_WAIT_USER_ACK = 1; // For legacy PCI interrupts only
localparam PCIE_INT_WAIT_REL_L    = 2; // for MSI interrupt covering multi-vector
localparam PCIE_INT_WAIT_REL_H    = 3; // for MSI interrupt

always @(posedge clk) begin
  if (reset) begin
    pcie_int_state   <= PCIE_INT_IDLE;
    reg_int_ready    <= 0;
    interrupt        <= 0;
    interrupt_assert <= 0;
    assert_halt      <= 0;
    interrupt_num    <= 0;
  end else begin
    if (int_stat_ready && int_stat_valid && (assert_halt || interrupt_assert)) begin
      assert_halt    <= 1'b0;
      reg_int_ready  <= int_valid & msi_ready_clean_msk;
    end else begin
      reg_int_ready  <= 0;
    end

    case (pcie_int_state)
      PCIE_INT_IDLE: begin
      if (int_new_avail && (~legacy_interrupt_disabled || interrupt_msi_enabled)) begin
        interrupt          <= 1'b1;

        if (interrupt_msi_enabled) begin
          interrupt_num      <= msi_num_gen;

          if (msi_no_fit) begin
            pcie_int_state   <= PCIE_INT_WAIT_REL_L;
            assert_halt      <= 1'b1;
          end else begin
            pcie_int_state   <= PCIE_INT_WAIT_REL_H;
          end
        end else begin
          // Legacy interrupt
          interrupt_num    <= 0;
          interrupt_assert <= 1'b1;
          pcie_int_state   <= PCIE_INT_WAIT_USER_ACK;
        end
      end
      end

      PCIE_INT_WAIT_USER_ACK: begin
        if (int_stat_ready && int_stat_valid && (assert_halt || interrupt_assert)) begin
          interrupt_assert <= 0;
          interrupt        <= 1'b1;
        end else begin
          if (interrupt_rdy) begin
            interrupt        <= 1'b0;
            if (~interrupt_assert) begin
              pcie_int_state <= PCIE_INT_IDLE;
            end
          end
        end
      end

      PCIE_INT_WAIT_REL_L, PCIE_INT_WAIT_REL_H: begin
        if (interrupt_rdy) begin
          interrupt                      <= 1'b0;
          if (pcie_int_state == PCIE_INT_WAIT_REL_H) begin
            reg_int_ready[interrupt_num] <= 1'b1;
          end
        end

        if (~interrupt) begin
          pcie_int_state  <= PCIE_INT_IDLE;
        end
      end

    endcase
  end
end


endmodule

PCIE中断接口比较个别,使用这个模块转成跟用户逻辑交互的接口。

6,PCIE到UL接口的转换

//
// Copyright (c) 2016-2019 Fairwaves, Inc.
// SPDX-License-Identifier: CERN-OHL-W-2.0
//
// PCIe to master UL
//
// NOTICE: Read around Write isn't supported right now

module pcie_to_ul #(
    parameter ADDR_WIDTH = 10,
    parameter HOST_LE = 1
)(
    input             clk,
    input             rst_n,

    // Configuration
    input  [15:0] cfg_completer_id,

    // AXIs PCIe RX
    input  [63:0] m_axis_rx_tdata,
    input  [7:0]  m_axis_rx_tkeep,
    input         m_axis_rx_tlast,
    input         m_axis_rx_tvalid,
    output        m_axis_rx_tready,

    // AXIs PCIe TX
    input              s_axis_tx_tready,
    output  reg [63:0] s_axis_tx_tdata,
    output  reg [7:0]  s_axis_tx_tkeep,
    output  reg        s_axis_tx_tlast,
    output  reg        s_axis_tx_tvalid,

    /
    // UL Write channel
    output reg [ADDR_WIDTH - 1:0]       m_ul_waddr,
    output [31:0]                       m_ul_wdata,
    output reg                          m_ul_wvalid,
    input                               m_ul_wready,

    // AXI Read address channel
    output reg [ADDR_WIDTH - 1:0]       m_ul_araddr,
    output reg                          m_ul_arvalid,
    input                               m_ul_arready,

    // AXI Read data channel signals
    input [31:0]                        m_ul_rdata,
    input                               m_ul_rvalid,
    output reg                          m_ul_rready
);

reg [31:0] m_ul_wdata_t;
reg [31:0] readdata_t;

wire [31:0] readdata;

genvar i;
generate
  if (HOST_LE == 1) begin
    assign readdata = readdata_t;
    assign m_ul_wdata = m_ul_wdata_t;
  end else begin
    for (i = 0; i < 4; i=i+1) begin
      assign readdata[(i+1)*8-1:i*8]   = readdata_t[(4-i)*8-1:(3-i)*8];
      assign m_ul_wdata[(i+1)*8-1:i*8] = m_ul_wdata_t[(4-i)*8-1:(3-i)*8];
    end
  end
endgenerate


// Memory
localparam MEM_RD32_FMT_TYPE =      7'b00_00000;   // 3DW
localparam MEM_RD64_FMT_TYPE =      7'b01_00000;   // 4DW

localparam MEM_RD32_LOCK_FMT_TYPE = 7'b00_00001;   // 3DW
localparam MEM_RD64_LOCK_FMT_TYPE = 7'b01_00001;   // 4DW

localparam MEM_WR32_FMT_TYPE =      7'b10_00000;   // 3DW + data
localparam MEM_WR64_FMT_TYPE =      7'b11_00000;   // 4DW + data

// IO
localparam IO_RD32_FMT_TYPE  =      7'b00_00010;   // 3DW
localparam IO_WR32_FMT_TYPE  =      7'b10_00010;   // 3DW + data

// Config Type 0/1
localparam CFGT0_RD_FMT_TYPE =      7'b00_00100;   // 3DW
localparam CFGT0_WR_FMT_TYPE =      7'b10_00100;   // 3DW + data
localparam CFGT1_RD_FMT_TYPE =      7'b00_00101;   // 3DW
localparam CFGT1_WR_FMT_TYPE =      7'b10_00101;   // 3DW + data

// Message
localparam MSG_FMT_TYPE  =          7'b00_10xxx;   // 4DW
localparam MSG_DATA_FMT_TYPE  =     7'b10_10xxx;   // 4DW + data

// Completion
localparam CPL_FMT_TYPE =           7'b00_01010;   // 3DW
localparam CPL_DATA_FMT_TYPE =      7'b10_01010;   // 3DW + data
localparam CPL_LOCK_FMT_TYPE =      7'b00_01011;   // 3DW
localparam CPL_LOCK_DATA_FMT_TYPE = 7'b10_01011;   // 3DW + data


wire   sop;         // First TLP QWORD

reg [3:0]   state;
localparam STATE_RESET      = 0; // Initial TLP header parse
localparam STATE_WR32_AD    = 1; // Address & Data on UL
localparam STATE_WR32_D0    = 2; // Next data
localparam STATE_WR32_D1    = 3; // Next data 2

localparam STATE_RD32_A     = 4;
localparam STATE_RD32_STALL = 5;
localparam STATE_RD32_NF    = 6; // wait for notification
localparam STATE_RD32_R1    = 7;
localparam STATE_RD32_R2    = 8;

localparam STATE_SKIP       = 9;

localparam STATE_RD32_ANS   = 10;
localparam STATE_RD32_RN    = 11;
localparam STATE_RD32_ANS2  = 12;
localparam STATE_RD32_RN2   = 13;


reg [15:0] pcie_req_id;
reg [7:0]  pcie_tag;
reg [2:0]  pcie_tc;
reg [1:0]  pcie_attr;
reg [6:2]  pcie_low_addr; // For Completion

reg [7:0]  pcie_len_dw; // max 128 DW -> 512 bytes

  //
  // req_id[63:48] | tag[47:40] | ldwbe[39:36] fdwbe[35:32] || # fmt_type[30:24] | # tc[22:20] #### | td[15] ep[14] attr[13:12] ## length[9:0]
  //             data[63:32]                                ||      addr[31:2]                                                             ##


// only valid when SOP is asserted
wire [9:0] tlp_length = m_axis_rx_tdata[9:0];
//      reserved        m_axis_rx_tdata[11:10]
wire [1:0] tlp_attr   = m_axis_rx_tdata[13:12];
wire       tlp_ep     = m_axis_rx_tdata[14];
wire       tlp_dp     = m_axis_rx_tdata[15];
//      reserved        m_axis_rx_tdata[19:16]
wire [2:0] tlp_tc     = m_axis_rx_tdata[22:20];
//      reserved        m_axis_rx_tdata[23]
wire [4:0] tlp_type   = m_axis_rx_tdata[28:24];
wire [1:0] tlp_fmt    = m_axis_rx_tdata[30:29];
//      reserved        m_axis_rx_tdata[31]
wire [3:0] tlp_ldwbe  = m_axis_rx_tdata[39:36];
wire [3:0] tlp_fdwbe  = m_axis_rx_tdata[35:32];


assign m_axis_rx_tready = (state == STATE_RESET)  ||
                          (state == STATE_RD32_A) ||
                          (state == STATE_SKIP)   ||
                          (state == STATE_WR32_AD && (~m_ul_wvalid || m_ul_wready)) ||
                          ((state == STATE_WR32_D0 && ~m_axis_rx_tkeep[7]) || state == STATE_WR32_D1) && m_ul_wready;

always @( posedge clk ) begin
  if (!rst_n) begin
    s_axis_tx_tvalid <= 1'b0;
    s_axis_tx_tlast  <= 1'b0;
    s_axis_tx_tkeep  <= 8'b0;

    m_ul_wvalid      <= 1'b0;
    m_ul_arvalid     <= 1'b0;
    m_ul_rready      <= 1'b0;

    state            <= STATE_RESET;
  end else begin
    if (m_ul_wready && m_ul_wvalid && (state != STATE_WR32_AD) && (state != STATE_WR32_D0) &&  (state != STATE_WR32_D1)) begin
      m_ul_wvalid <= 1'b0;
    end

    case (state)
    STATE_RESET: begin
      if (/*m_axis_rx_tready &&*/ m_axis_rx_tvalid) begin
        case ({tlp_fmt,tlp_type})
        MEM_RD32_FMT_TYPE : begin

          if (/* tlp_length == 10'h1 && */ // 1-DW
                      tlp_ep     == 1'b0 &&   // Data isn't poisoned
                      tlp_fdwbe  == 4'hF)     // 32-bit transfer
            state <= STATE_RD32_A;
          else
            state <= STATE_SKIP;

          pcie_req_id <= m_axis_rx_tdata[63:48];
          pcie_tag    <= m_axis_rx_tdata[47:40];
          pcie_tc     <= tlp_tc;
          pcie_attr   <= tlp_attr;
          pcie_len_dw <= tlp_length[7:0]; // holds up to 128DW == 512 Bytes
        end

        MEM_WR32_FMT_TYPE : begin
                  // we accept only 32 bit 1-DW command only
          if (/* tlp_length == 10'h1 && */  // 1-DW
                      tlp_ep     == 1'b0 &&   // Data isn't poisoned
                      tlp_fdwbe  == 4'hF)     // 32-bit transfer
            state <= STATE_WR32_AD;
          else
            state <= STATE_SKIP;
        end

        default: begin
          state <= STATE_SKIP;
        end
        endcase
      end
    end // STATE_RESET

    // WRITE STATES
    STATE_WR32_AD: begin
      if (/*m_axis_rx_tready && */ (~m_ul_wvalid || m_ul_wready) && m_axis_rx_tvalid) begin
        m_ul_wdata_t     <= m_axis_rx_tdata[63:32];
        m_ul_waddr       <= m_axis_rx_tdata[ADDR_WIDTH + 1:2];
        m_ul_wvalid      <= 1'b1;

        if (m_axis_rx_tlast) begin
          state             <= STATE_RESET;
        end else begin
          state             <= STATE_WR32_D0;
        end
      end else if (m_ul_wready && m_ul_wvalid) begin
        m_ul_wvalid      <= 1'b0;
      end
    end

    STATE_WR32_D0: begin
      if (/*m_axis_rx_tready && */ m_ul_wready && m_axis_rx_tvalid) begin
        m_ul_waddr       <= m_ul_waddr + 1'b1;
        m_ul_wdata_t     <= m_axis_rx_tdata[31:0];
        m_ul_wvalid      <= 1'b1;

        if (m_axis_rx_tlast && ~m_axis_rx_tkeep[7]) begin
          state            <= STATE_RESET;
        end else begin
          state            <= STATE_WR32_D1;
        end
      end else if (m_ul_wready && m_ul_wvalid) begin
        m_ul_wvalid      <= 1'b0;
      end
    end

    STATE_WR32_D1: begin
      if (/*m_axis_rx_tready && */ m_ul_wready && m_axis_rx_tvalid) begin
        m_ul_waddr       <= m_ul_waddr + 1'b1;
        m_ul_wdata_t     <= m_axis_rx_tdata[63:32];
        m_ul_wvalid      <= 1'b1;

        if (m_axis_rx_tlast) begin
          state            <= STATE_RESET;
        end else begin
          state            <= STATE_WR32_D0;
        end
      end else if (m_ul_wready && m_ul_wvalid) begin
        m_ul_wvalid      <= 1'b0;
      end
    end

    STATE_RD32_A: begin
      if (/*m_axis_rx_tready &&*/ m_axis_rx_tvalid) begin
        m_ul_araddr   <= m_axis_rx_tdata[ADDR_WIDTH + 1:2];
        pcie_low_addr <= m_axis_rx_tdata[6:2];

        if (m_axis_rx_tlast) begin
          m_ul_arvalid <= 1'b1;
          state        <= STATE_RD32_STALL;
        end
      end
    end

    STATE_RD32_STALL: begin
      if (m_ul_arvalid && m_ul_arready) begin
        m_ul_arvalid <= 1'b0;
        m_ul_rready  <= 1'b1;

        state        <= STATE_RD32_NF;
      end
    end

    STATE_RD32_NF: begin
      if (m_ul_rvalid && m_ul_rready) begin
        m_ul_rready  <= 1'b0;
        readdata_t   <= m_ul_rdata;
        state        <= STATE_RD32_R1;

        s_axis_tx_tdata  <= {cfg_completer_id, 3'b000, 1'b0, { 2'b0, pcie_len_dw, 2'b0 },
                             1'b0, CPL_DATA_FMT_TYPE, 1'b0, pcie_tc, 4'b0, 1'b0, 1'b0, pcie_attr, 2'b0, { 2'b0, pcie_len_dw } };
        s_axis_tx_tkeep  <= 8'hFF;
        s_axis_tx_tvalid <= 1'b1;
      end
    end

    STATE_RD32_R1: begin
      if (s_axis_tx_tready) begin
        s_axis_tx_tdata <= { readdata,
                             pcie_req_id, pcie_tag, 1'b0, pcie_low_addr, 2'b0 };
        state           <= STATE_RD32_R2;
        pcie_len_dw     <= pcie_len_dw - 1'b1;
        if (pcie_len_dw == 8'h1) begin
          s_axis_tx_tlast <= 1'b1;
        end
      end
    end

    STATE_RD32_R2: begin
      if (s_axis_tx_tready) begin
        if (s_axis_tx_tlast) begin
          state            <= STATE_RESET;
        end else begin
          m_ul_araddr     <= m_ul_araddr + 1'b1;
          m_ul_arvalid    <= 1'b1;
          state           <= STATE_RD32_ANS;
        end

        s_axis_tx_tlast  <= 1'b0;
        s_axis_tx_tvalid <= 1'b0;
      end
    end

    STATE_RD32_ANS: begin
      if (m_ul_arvalid && m_ul_arready) begin
        m_ul_arvalid <= 1'b0;
        m_ul_rready  <= 1'b1;
        state        <= STATE_RD32_RN;
      end
    end

    STATE_RD32_RN: begin
      if (m_ul_rvalid && m_ul_rready) begin
        m_ul_rready           <= 1'b0;
        s_axis_tx_tdata[31:0] <= m_ul_rdata;
        s_axis_tx_tkeep       <= 8'h0f;
        pcie_len_dw           <= pcie_len_dw - 1'b1;
        if (pcie_len_dw == 8'h1) begin
          state            <= STATE_RD32_R2;
          s_axis_tx_tlast  <= 1'b1;
          s_axis_tx_tvalid <= 1'b1;
        end else begin
          state            <= STATE_RD32_ANS2;
          m_ul_araddr      <= m_ul_araddr + 1'b1;
          m_ul_arvalid     <= 1'b1;
        end
      end
    end

    STATE_RD32_ANS2: begin
      if (m_ul_arvalid && m_ul_arready) begin
        m_ul_arvalid <= 1'b0;
        m_ul_rready  <= 1'b1;
        state        <= STATE_RD32_RN2;
      end
    end

    STATE_RD32_RN2: begin
      if (m_ul_rvalid && m_ul_rready) begin
        m_ul_rready            <= 1'b0;
        s_axis_tx_tdata[63:32] <= m_ul_rdata;
        s_axis_tx_tkeep        <= 8'hff;
        state                  <= STATE_RD32_R2;
        s_axis_tx_tvalid       <= 1'b1;
        pcie_len_dw            <= pcie_len_dw - 1'b1;
        if (pcie_len_dw == 8'h1) begin
          s_axis_tx_tlast      <= 1'b1;
        end
      end
    end

    STATE_SKIP: begin
      if (m_axis_rx_tlast && m_axis_rx_tvalid) begin
        state <= STATE_RESET;
      end
    end
    endcase

  end
end

endmodule

PCIE模块出来的是AXIS接口,这里面转成了UL接口。这里注意PCIE的AXIS接口还连接着其他模块。这里UL主要是进行寄存器设置,大块的数据传输不是从这里走。这个UL接口是FPGA逻辑内部反复出现大量使用的,我琢磨UL应该是作者规定的USER LOCAL 的缩写:用户本地接口。看上去跟AXI_LITE 差不多。注意这个模块是双向的:从PCIE收数据也发数据跟PCIE,发数据给用户也从用户那里收数据给PCIE发出。我在这里设置过ILA,看到write32和read32实际上就是对这些寄存器的操作。

7,收数据的PCIE请求模块:

//
// Copyright (c) 2016-2020 Fairwaves, Inc.
// SPDX-License-Identifier: CERN-OHL-W-2.0
//

module 
 #(
    parameter LOW_ADDDR_BITS = 14
)(
    // UL
    input             s_ul_clk,
    input             s_ul_aresetn,

    input             txdma_active,

    input   [15:0]    cfg_pcie_reqid,

    // AXIs PCIe TX (completion)
    output            s_axis_rx_tready,
    input   [63:0]    s_axis_rx_tdata,
    input   [7:0]     s_axis_rx_tkeep,
    input             s_axis_rx_tlast,
    input             s_axis_rx_tvalid,

    input             m_axis_tx_tready,
    output reg [63:0] m_axis_tx_tdata,
    output reg [7:0]  m_axis_tx_tkeep,
    output reg        m_axis_tx_tlast,
    output reg        m_axis_tx_tvalid,

    // RAM interface
    output [63:0]                m_ram_tdata,
    output [LOW_ADDDR_BITS-1:0]  m_ram_taddr,
    output                       m_ram_tvalid,

    // Request & notify
    input                        ul_ml_rvalid,
    output                       ul_ml_rready,
    input [LOW_ADDDR_BITS+3-1:3] ul_ml_rlocaddr,
    input [31:3]                 ul_ml_rbusaddr,
    input [8:0]                  ul_ml_rlength,
    input [4:0]                  ul_ml_rtag,

    output                       ul_ml_tvalid,
    input                        ul_ml_tready,
    output [4:0]                 ul_ml_ttag
);

localparam REQ_TAG_BITS = 5;

wire [LOW_ADDDR_BITS-1:0] buffer_burst_cpld_addr;

wire        pcie_rtag_rd_latch;
wire [4:0]  pcie_rtag_rd;         // cpld TAG

ram32xsdp #(.WIDTH(LOW_ADDDR_BITS)) tag_waddr(
    .wclk(s_ul_clk),
    .we(ul_ml_rvalid),
    .waddr(ul_ml_rtag),
    .datai(ul_ml_rlocaddr),
    .raddr(pcie_rtag_rd),
    .datao(buffer_burst_cpld_addr)
);

localparam ST_W0 = 0;
localparam ST_W1 = 1;
reg state;

assign ul_ml_rready = m_axis_tx_tready && state;

wire [31:2] pcie_addr   = { ul_ml_rbusaddr, 1'b0 };
wire [7:0]  pcie_tag    =   ul_ml_rtag;
wire [9:0]  pcie_length = { ul_ml_rlength + 1'b1, 1'b0 };

wire [1:0]  cfg_pcie_attr = 0;

always @(posedge s_ul_clk) begin
  if (~s_ul_aresetn) begin
    state            <= 1'b0;
    m_axis_tx_tvalid <= 1'b0;
  end else begin

    case (state)
      ST_W0: begin
        if (ul_ml_rvalid && (!m_axis_tx_tvalid || m_axis_tx_tvalid && m_axis_tx_tready) ) begin
          m_axis_tx_tdata[63:32] <= { cfg_pcie_reqid, pcie_tag, 8'hff};
          m_axis_tx_tdata[31:0]  <= { 16'h00_00,      2'b00, cfg_pcie_attr, pcie_length };
          m_axis_tx_tkeep        <= 8'hff;
          m_axis_tx_tlast        <= 1'b0;
          m_axis_tx_tvalid       <= 1'b1;
          state                  <= state + 1;
        end else if (m_axis_tx_tvalid && m_axis_tx_tready) begin
          m_axis_tx_tvalid <= 1'b0;
        end
      end

      ST_W1: begin
        if (m_axis_tx_tready) begin
          m_axis_tx_tdata[31:0] <= {pcie_addr, 2'b0};
          m_axis_tx_tkeep       <= 8'h0f;
          m_axis_tx_tlast       <= 1'b1;
          m_axis_tx_tvalid      <= 1'b1;
          state                 <= state + 1;
        end
      end

    endcase

  end
end


assign ul_ml_tvalid = pcie_rtag_rd_latch;
assign ul_ml_ttag   = pcie_rtag_rd;


localparam CPL_FMT_TYPE =           7'b00_01010;   // 3DW
localparam CPL_DATA_FMT_TYPE =      7'b10_01010;   // 3DW + data
localparam MEM_WR32_FMT_TYPE =      7'b10_00000;   // 3DW + data

localparam ST_RX_TLP_HDR    = 0;
localparam ST_RX_TLP_W0     = 1;
localparam ST_RX_TLP_WBULK  = 2;
localparam ST_RX_TLP_SKIP   = 3;


reg [1:0]  dma_rx_state;

reg [31:0] tmp_axis_rx_data_wrap;

wire [6:0] tlp_type   = s_axis_rx_tdata[30:24];
wire       tlp_ep     = s_axis_rx_tdata[14];
wire       tlp_dp     = s_axis_rx_tdata[15];
wire [3:0] tlp_ldwbe  = s_axis_rx_tdata[39:36];
wire [3:0] tlp_fdwbe  = s_axis_rx_tdata[35:32];
wire [2:0] cpld_status_bits = s_axis_rx_tdata[47:45];

reg        first_word;

assign s_axis_rx_tready = 1'b1;


reg       pcie_last_cpld_packet;

assign pcie_rtag_rd       = s_axis_rx_tdata[REQ_TAG_BITS - 1 + 8:8];
wire   pcie_rtag_rd_pres  = (dma_rx_state == ST_RX_TLP_W0) && s_axis_rx_tvalid && s_axis_rx_tready;
assign pcie_rtag_rd_latch = pcie_rtag_rd_pres && pcie_last_cpld_packet;

//reg       buffer_burst_cpld_lastreq_reg;
reg [10:1] data_remain;
reg       last_burst_in_buffer;

wire [63:0] pcie_to_fifo;
bsswap bs0(.in(s_axis_rx_tdata[31:0]),  .out(pcie_to_fifo[31:0]));
bsswap bs1(.in(s_axis_rx_tdata[63:32]), .out(pcie_to_fifo[63:32]));


reg [7:0] invalid_cpld;
reg [7:0] cpl_stat_ur;
reg [7:0] cpl_stat_csr;
reg [7:0] cpl_stat_ca;

localparam CPL_UR  = 3'b001;
localparam CPL_CSR = 3'b010;
localparam CPL_CA  = 3'b100;

reg [LOW_ADDDR_BITS-1:0]    fifo_wr_addr;
reg [63:0]                  fifo_data_in;
reg                         fifo_wr_en_rx;

reg pcie_cpl_trans;

always @(posedge s_ul_clk) begin
  if (txdma_active) begin
    fifo_wr_addr         <= 0;

    fifo_wr_en_rx        <= 1'b0;

    dma_rx_state         <= ST_RX_TLP_HDR;

    invalid_cpld <= 0;
    cpl_stat_ur  <= 0;
    cpl_stat_csr <= 0;
    cpl_stat_ca  <= 0;
  end else begin
    case (dma_rx_state)
      ST_RX_TLP_HDR: begin
        //pcie_rx_valid            <= 1'b0;
        fifo_wr_en_rx            <= 1'b0;

        if (s_axis_rx_tready && s_axis_rx_tvalid) begin
          // Last CplD packet is when Length == ByteCount >> 2, garbage on MemWr
          pcie_last_cpld_packet <= (s_axis_rx_tdata[9:0] == s_axis_rx_tdata[32+2+9:32+2]);
          data_remain[9:1]      <=  s_axis_rx_tdata[32+2+9:32+2+1];
          data_remain[10]       <=  (s_axis_rx_tdata[32+2+9:32+2+1] == 9'b0) ? 1'b1 : 1'b0;
          //pcie_rx_length_qw[9:1]<= s_axis_rx_tdata[9:1];

          if (tlp_ep == 1'b0 &&
               (/*fifo_dma_en &&*/ tlp_type == CPL_DATA_FMT_TYPE || /*~fifo_dma_en &&*/ tlp_fdwbe == 4'hF && tlp_type == MEM_WR32_FMT_TYPE)) begin
            pcie_cpl_trans <= (tlp_type == CPL_DATA_FMT_TYPE);
            dma_rx_state <= ST_RX_TLP_W0;
          end else begin
            // Log type of abortion
            if (tlp_type == CPL_FMT_TYPE) begin
              if (cpld_status_bits & CPL_UR) begin
                cpl_stat_ur <= cpl_stat_ur + 1'b1;
              end
              if (cpld_status_bits & CPL_CSR) begin
                cpl_stat_csr <= cpl_stat_csr + 1'b1;
              end
              if (cpld_status_bits & CPL_CA) begin
                cpl_stat_ca <= cpl_stat_ca + 1'b1;
              end
            end

            invalid_cpld <= invalid_cpld + 1'b1;
            dma_rx_state <= ST_RX_TLP_SKIP;
          end

        end
      end

      ST_RX_TLP_W0: begin
        if (s_axis_rx_tready && s_axis_rx_tvalid) begin
          tmp_axis_rx_data_wrap    <= pcie_to_fifo[63:32];
          dma_rx_state             <= ST_RX_TLP_WBULK;

          // For PCIe write
          // load fifo_wr_addr
          if (/*~fifo_dma_en*/ ~pcie_cpl_trans) begin
            fifo_wr_addr                <= s_axis_rx_tdata[LOW_ADDDR_BITS+2:3];
          end else begin
            fifo_wr_addr                <= buffer_burst_cpld_addr - data_remain;
          end

          first_word                    <= 1'b0;
        end
      end

      ST_RX_TLP_WBULK: begin
        if (s_axis_rx_tready && s_axis_rx_tvalid) begin
          fifo_data_in[31:0]       <= tmp_axis_rx_data_wrap;
          fifo_data_in[63:32]      <= pcie_to_fifo[31:0];
          tmp_axis_rx_data_wrap    <= pcie_to_fifo[63:32];

          fifo_wr_en_rx            <= 1'b1;
          fifo_wr_addr             <= fifo_wr_addr + first_word;
          first_word               <= 1'b1;
          if (s_axis_rx_tlast) begin
            dma_rx_state           <= ST_RX_TLP_HDR;
          end
        end else begin
          fifo_wr_en_rx            <= 1'b0;
        end
      end

      ST_RX_TLP_SKIP: begin
        // Write metada for next burst
        if (s_axis_rx_tready && s_axis_rx_tvalid && s_axis_rx_tlast) begin
          dma_rx_state           <= ST_RX_TLP_HDR;
        end
      end
    endcase

  end
end


assign m_ram_tdata = fifo_data_in;
assign m_ram_taddr = fifo_wr_addr;
assign m_ram_tvalid = fifo_wr_en_rx;

endmodule

这里模块管理着接收的处理。可以发送给PC数据请求,也可以收到数据输出,带地址的形式输出出去(在这个设计中给了fifo_mem_tx模块)。

下图是fifo_mem_tx

这是一个双端口的存储器。这里作用是进行时钟穿越,并且作为缓冲器。但是问题是同步是如何实现的。

8,字转换模块

//
// Copyright (c) 2016-2020 Fairwaves, Inc.
// SPDX-License-Identifier: CERN-OHL-W-2.0
//

module bsswap #(
    parameter BYTES = 4
)(
    input  [BYTES*8 - 1:0] in,
    output [BYTES*8 - 1:0] out
);

genvar i;
genvar j;
generate
for (i = 0; i < BYTES; i=i+1) begin: byteb
  for (j = 0; j < 8; j=j+1) begin: bitc
    assign out[8*(BYTES - i - 1) + j] = in[8*i + j];
  end
end
endgenerate

// AXI   7   6   5   4   3   2   1   0
// PCIe  1.0 1.1 1.2 1.3 0.0 0.1 0.2 0.3

endmodule

这里算法很简单,实现PCIE和AXI数据的格式 转换。

9,无延迟读的32个元素的双口RAM

//
// Copyright (c) 2016-2020 Fairwaves, Inc.
// SPDX-License-Identifier: CERN-OHL-W-2.0
//

module ram32xsdp #(
    parameter WIDTH = 6
)(
   input        wclk,
   input        we,
   input [4:0]  waddr,
   input [WIDTH-1:0]  datai,

   input [4:0]  raddr,
   output [WIDTH-1:0]  datao
);

`ifdef SYM
// Make debug friendly

reg [WIDTH-1:0] r0;
reg [WIDTH-1:0] r1;
reg [WIDTH-1:0] r2;
reg [WIDTH-1:0] r3;
reg [WIDTH-1:0] r4;
reg [WIDTH-1:0] r5;
reg [WIDTH-1:0] r6;
reg [WIDTH-1:0] r7;
reg [WIDTH-1:0] r8;
reg [WIDTH-1:0] r9;
reg [WIDTH-1:0] r10;
reg [WIDTH-1:0] r11;
reg [WIDTH-1:0] r12;
reg [WIDTH-1:0] r13;
reg [WIDTH-1:0] r14;
reg [WIDTH-1:0] r15;
reg [WIDTH-1:0] r16;
reg [WIDTH-1:0] r17;
reg [WIDTH-1:0] r18;
reg [WIDTH-1:0] r19;
reg [WIDTH-1:0] r20;
reg [WIDTH-1:0] r21;
reg [WIDTH-1:0] r22;
reg [WIDTH-1:0] r23;
reg [WIDTH-1:0] r24;
reg [WIDTH-1:0] r25;
reg [WIDTH-1:0] r26;
reg [WIDTH-1:0] r27;
reg [WIDTH-1:0] r28;
reg [WIDTH-1:0] r29;
reg [WIDTH-1:0] r30;
reg [WIDTH-1:0] r31;

always @(posedge wclk) begin
  if (we) begin
    case (waddr)
      0:  r0 <= datai;
      1:  r1 <= datai;
      2:  r2 <= datai;
      3:  r3 <= datai;
      4:  r4 <= datai;
      5:  r5 <= datai;
      6:  r6 <= datai;
      7:  r7 <= datai;
      8:  r8 <= datai;
      9:  r9 <= datai;
      10: r10<= datai;
      11: r11<= datai;
      12: r12<= datai;
      13: r13<= datai;
      14: r14<= datai;
      15: r15<= datai;
      16: r16<= datai;
      17: r17<= datai;
      18: r18<= datai;
      19: r19<= datai;
      20: r20<= datai;
      21: r21<= datai;
      22: r22<= datai;
      23: r23<= datai;
      24: r24<= datai;
      25: r25<= datai;
      26: r26<= datai;
      27: r27<= datai;
      28: r28<= datai;
      29: r29<= datai;
      30: r30<= datai;
      31: r31<= datai;
    endcase
  end
end

assign datao = 
    (raddr == 0) ? r0 :
    (raddr == 1) ? r1 :
    (raddr == 2) ? r2 :
    (raddr == 3) ? r3 :
    (raddr == 4) ? r4 :
    (raddr == 5) ? r5 :
    (raddr == 6) ? r6 :
    (raddr == 7) ? r7 :
    (raddr == 8) ? r8 :
    (raddr == 9) ? r9 :
    (raddr == 10) ? r10 :
    (raddr == 11) ? r11 :
    (raddr == 12) ? r12 :
    (raddr == 13) ? r13 :
    (raddr == 14) ? r14 :
    (raddr == 15) ? r15 :
    (raddr == 16) ? r16 :
    (raddr == 17) ? r17 :
    (raddr == 18) ? r18 :
    (raddr == 19) ? r19 :
    (raddr == 20) ? r20 :
    (raddr == 21) ? r21 :
    (raddr == 22) ? r22 :
    (raddr == 23) ? r23 :
    (raddr == 24) ? r24 :
    (raddr == 25) ? r25 :
    (raddr == 26) ? r26 :
    (raddr == 27) ? r27 :
    (raddr == 28) ? r28 :
    (raddr == 29) ? r29 :
    (raddr == 30) ? r30 :
                    r31;

`else
localparam COUNT = (WIDTH + 5) / 6;

wire [6*COUNT-1:0] xdatao;
assign datao = xdatao[WIDTH-1:0];

wire [6*COUNT-1:0] xdatai = datai;

genvar i;
generate
for (i = 0; i < COUNT; i=i+1) begin: part

RAM32M #(
  .INIT_A(64'h0000000000000000), // Initial contents of A Port
  .INIT_B(64'h0000000000000000), // Initial contents of B Port
  .INIT_C(64'h0000000000000000), // Initial contents of C Port
  .INIT_D(64'h0000000000000000)  // Initial contents of D Port
) RAM32X6SDP (
  .DOA(xdatao[6*i+1:6*i+0]),
  .DOB(xdatao[6*i+3:6*i+2]),
  .DOC(xdatao[6*i+5:6*i+4]),
  .DOD(),

  .ADDRA(raddr),
  .ADDRB(raddr),
  .ADDRC(raddr),
  .ADDRD(waddr),

  .DIA(xdatai[6*i+1:6*i+0]),
  .DIB(xdatai[6*i+3:6*i+2]),
  .DIC(xdatai[6*i+5:6*i+4]),
  .DID(2'b0),

  .WCLK(wclk),
  .WE(we)
);

end
endgenerate
`endif


endmodule

通过之前可仿真代码分析,这个RAM最大特点是读无延迟。

10,UL转AXI_LITE读

//
// Copyright (c) 2016-2020 Fairwaves, Inc.
// SPDX-License-Identifier: CERN-OHL-W-2.0
//
// UL GPIO 32bit
//       READ
// x: IN DATA (not sampled)

module ul_read_axis #(
    parameter DATA_WIDTH = 32,
    parameter NBITS      = 4,
    parameter N = (1 << NBITS)
)(
    // UL clocks
    input                         s_ul_clk,
    input                         s_ul_aresetn,

    // UL Read address channel 0
    input  [NBITS - 1:0]          s_ul_araddr,
    input                         s_ul_arvalid,
    output                        s_ul_arready,
    // UL Write data channel 0 signals
    output reg [DATA_WIDTH - 1:0] s_ul_rdata,
    output reg                    s_ul_rvalid,
    input                         s_ul_rready,

    // read port 0..N-1
    output reg [N - 1:0]          axis_port_ready,
    input  [N - 1:0]              axis_port_valid,
    input  [DATA_WIDTH*N - 1:0]   axis_port_data,

    output [NBITS - 1:0]          axis_port_addr,
    output                        axis_port_addr_valid
);


reg [NBITS - 1:0]       selector;
wire [DATA_WIDTH - 1:0] axis_data;

genvar i;
generate
for (i = 0; i < DATA_WIDTH; i=i+1) begin: gen
  assign axis_data[i] = axis_port_data[DATA_WIDTH*selector + i];
end
endgenerate

wire axis_valid = axis_port_valid[selector];

localparam ST_WAIT_READ_ADDR = 1'b0;
localparam ST_WAIT_TRANSFER  = 1'b1;

reg state;

assign s_ul_arready = ((s_ul_rvalid && s_ul_rready) || ~s_ul_rvalid);

always @(posedge s_ul_clk) begin
  if (~s_ul_aresetn) begin
    axis_port_ready <= 0;
    s_ul_rvalid     <= 0;
    selector        <= 0;
    state           <= ST_WAIT_READ_ADDR;
  end else begin
    if (state == ST_WAIT_READ_ADDR) begin
      if (s_ul_arvalid && s_ul_arready) begin
        selector                      <= s_ul_araddr;
        state                         <= ST_WAIT_TRANSFER;
        axis_port_ready[s_ul_araddr]  <= 1'b1;
      end

      if (s_ul_rvalid && s_ul_rready) begin
        s_ul_rvalid                  <= 1'b0;
      end
    end else begin
      if (axis_valid) begin
        axis_port_ready <= 0;
        s_ul_rdata      <= axis_data;
        s_ul_rvalid     <= 1'b1;
        state           <= ST_WAIT_READ_ADDR;
      end
    end
  end
end

assign axis_port_addr = selector;
assign axis_port_addr_valid = (state == ST_WAIT_TRANSFER);


endmodule

这里是UL的从接口转成AXI_LITE读接口。但是读地址无须用ready确认,这可能也就是为什么模块名字里有axis的原因。

11,UL接口转通用GPO输出

//
// Copyright (c) 2016-2020 Fairwaves, Inc.
// SPDX-License-Identifier: CERN-OHL-W-2.0
//
// UL GPIO 32bit
//
//        WRITE
// 0: OUT DATA (not sampled)
//

module ul_go_base #(
    parameter DATA_WIDTH = 32,
    parameter ADDR_WIDTH = 1,
    parameter ADDR_TOTAL = ( 1 << (ADDR_WIDTH))
)(
    // UL Write channel
    input [ADDR_WIDTH - 1:0]  s_ul_waddr,
    input [DATA_WIDTH - 1:0]  s_ul_wdata,
    input                     s_ul_wvalid,
    output                    s_ul_wready,

    // GPO
    output [DATA_WIDTH - 1:0]  gp_out,
    output [ADDR_TOTAL - 1:0]  gp_out_strobe,
    input  [ADDR_TOTAL - 1:0]  gp_in_ready
);

genvar i;
generate
  for (i = 0; i < ADDR_TOTAL; i = i + 1) begin: strobe_gen
    assign gp_out_strobe[i] = (s_ul_wvalid && (s_ul_waddr == i));
  end
endgenerate

assign s_ul_wready = gp_in_ready[s_ul_waddr];
assign gp_out = s_ul_wdata;

endmodule

12,单时钟的AXIS_FIFO

//
// Copyright (c) 2016-2020 Fairwaves, Inc.
// SPDX-License-Identifier: CERN-OHL-W-2.0
//

module axis_fifo32 #(
  parameter WIDTH = 32,
  parameter DEEP_BITS = 5
) (
  input clk,
  input axisrst,

  input [WIDTH-1:0]  axis_rx_tdata,
  input              axis_rx_tvalid,
  output             axis_rx_tready,

  output [WIDTH-1:0] axis_tx_tdata,
  output             axis_tx_tvalid,
  input              axis_tx_tready,

  output [DEEP_BITS-1:0] fifo_used,

  output reg             fifo_empty
);

reg [DEEP_BITS-1:0] rpos;

assign fifo_used = rpos;

localparam FIFO_FULL = ((1 << DEEP_BITS) - 1);

assign axis_tx_tvalid = (~fifo_empty);
assign axis_rx_tready = (rpos != FIFO_FULL);

wire fifo_wr_strobe = axis_rx_tvalid && axis_rx_tready;
wire fifo_rd_strobe = axis_tx_tvalid && axis_tx_tready;

`ifdef SYM
localparam MAX_DEEP = 1 << DEEP_BITS;

reg [WIDTH-1:0] fifo[MAX_DEEP - 1:0];
always @(posedge clk) begin
  if (fifo_wr_strobe) begin
    fifo[0] <= axis_rx_tdata;
  end
end

generate
genvar i;
for (i = 1; i < MAX_DEEP; i=i+1) begin : srl
  always @(posedge clk) begin
    if (fifo_wr_strobe)
      fifo[i] <= fifo[i - 1];
  end
end
endgenerate

assign axis_tx_tdata = fifo[rpos];

`else
genvar i;
generate
if (DEEP_BITS == 5) begin
  for (i = 0; i < WIDTH; i=i+1) begin : srl32
   SRLC32E #(
     .INIT(32'h00000000)
   ) fifo32(
        .CLK(clk),
        .CE(fifo_wr_strobe),
        .D(axis_rx_tdata[i]),
        .A(rpos),
        .Q(axis_tx_tdata[i]),
        .Q31()
   );
  end
end else if (DEEP_BITS == 4) begin
  for (i = 0; i < WIDTH; i=i+1) begin : srl16
   SRL16E #(
     .INIT(16'h0000)
   ) fifo16 (
        .CLK(clk),
        .CE(fifo_wr_strobe),
        .D(axis_rx_tdata[i]),
        .A0(rpos[0]),
        .A1(rpos[1]),
        .A2(rpos[2]),
        .A3(rpos[3]),
        .Q(axis_tx_tdata[i])
   );
  end
end
endgenerate
`endif

always @(posedge clk) begin
  if (axisrst) begin
    fifo_empty <= 1;
    rpos       <= 0;
  end else begin
    if (fifo_wr_strobe && fifo_rd_strobe) begin
    end else if (fifo_wr_strobe) begin
      if (fifo_empty)
        fifo_empty <= 1'b0;
      else
        rpos <= rpos + 1;

    end else if (fifo_rd_strobe) begin
      if (rpos == 0)
        fifo_empty <= 1'b1;
      else
        rpos <= rpos - 1;

    end
  end
end


endmodule

13,dma配置保存表

//
// Copyright (c) 2016-2020 Fairwaves, Inc.
// SPDX-License-Identifier: CERN-OHL-W-2.0
//

module dma_config #(
    parameter DMA_BUFFS_BITS = 5
)(
    // UL
    input           s_ul_clk,

    // UL Write channel
    input [DMA_BUFFS_BITS - 1:0]  s_ul_waddr,
    input [31:0]                  s_ul_wdata,
    input                         s_ul_wvalid,

    // Control IF
    input                         dma_en,
    input [DMA_BUFFS_BITS - 1:0]  dma_bufno,

    // Output config
    output [31:12]                dma_addr_out,
    output [11:0]                 dma_buflen_out
);

//
// DMA Write REG
// 20bit physical addr + 12bit length of buffer in 16-bytes values

wire [DMA_BUFFS_BITS - 1:0]  dma_addr = (dma_en) ? dma_bufno : s_ul_waddr;
wire                         dma_we   = (~dma_en && s_ul_wvalid);

ram32xsp #(.WIDTH(32)) dmacfg(
    .wclk(s_ul_clk),
    .we(dma_we),
    .addr(dma_addr),
    .datai(s_ul_wdata),
    .datao({dma_addr_out, dma_buflen_out})
);

endmodule

这个就看到跟我们之前分析驱动的32个缓冲区对应了。并且低12位保存的是数据长度,其余位置保存的是地址。这里例化了ram32xsp在之前也有介绍。

14,UL接口桥接DRP

//
// Copyright (c) 2016-2020 Fairwaves, Inc.
// SPDX-License-Identifier: CERN-OHL-W-2.0
//

module ul_drp_cfg #(
  parameter PORTS = 4,
  parameter GPIO_RESET_P0 = 0,
  parameter GPIO_RESET_P1 = 0,
  parameter GPIO_RESET_P2 = 0,
  parameter GPIO_RESET_P3 = 0
)(
    input reset,
    input axis_clk,

    input [31:0] axis_in_data,
    input        axis_in_valid,
    output       axis_in_ready,

    output [31:0] axis_out_data,
    output        axis_out_valid,
    input         axis_out_ready,


    output        drp_clk,

    // DRP port0
    output [15:0] drp_di_0,
    output [6:0]  drp_daddr_0,
    output        drp_den_0,
    output        drp_dwe_0,
    input  [15:0] drp_do_0,
    input         drp_drdy_0,

    output [3:0]  drp_gpio_out_0,
    input  [3:0]  drp_gpio_in_0,

    // DRP port1
    output [15:0] drp_di_1,
    output [6:0]  drp_daddr_1,
    output        drp_den_1,
    output        drp_dwe_1,
    input  [15:0] drp_do_1,
    input         drp_drdy_1,

    output [3:0]  drp_gpio_out_1,
    input  [3:0]  drp_gpio_in_1,

    // DRP port2
    output [15:0] drp_di_2,
    output [6:0]  drp_daddr_2,
    output        drp_den_2,
    output        drp_dwe_2,
    input  [15:0] drp_do_2,
    input         drp_drdy_2,

    output [3:0]  drp_gpio_out_2,
    input  [3:0]  drp_gpio_in_2,

    // DRP port3
    output [15:0] drp_di_3,
    output [6:0]  drp_daddr_3,
    output        drp_den_3,
    output        drp_dwe_3,
    input  [15:0] drp_do_3,
    input         drp_drdy_3,

    output [3:0]  drp_gpio_out_3,
    input  [3:0]  drp_gpio_in_3
);

`include "xtrxll_regs.vh"

reg [1:0]  drp_selector;

reg [15:0] drp_di;
reg [6:0]  drp_daddr;
reg        drp_dwe;
reg [3:0]  drp_den;

assign drp_di_0 = drp_di;
assign drp_di_1 = drp_di;
assign drp_di_2 = drp_di;
assign drp_di_3 = drp_di;

assign drp_daddr_0 = drp_daddr;
assign drp_daddr_1 = drp_daddr;
assign drp_daddr_2 = drp_daddr;
assign drp_daddr_3 = drp_daddr;

assign drp_dwe_0 = drp_dwe;
assign drp_dwe_1 = drp_dwe;
assign drp_dwe_2 = drp_dwe;
assign drp_dwe_3 = drp_dwe;

assign drp_den_0 = drp_den[0];
assign drp_den_1 = drp_den[1];
assign drp_den_2 = drp_den[2];
assign drp_den_3 = drp_den[3];

assign drp_clk = axis_clk;

wire [15:0] drp_do = (drp_selector == 2'b00) ? drp_do_0 :
                     (drp_selector == 2'b01) ? drp_do_1 :
                     (drp_selector == 2'b10) ? drp_do_2 : drp_do_3;
wire        drp_drdy = (drp_selector == 2'b00) ? drp_drdy_0 :
                       (drp_selector == 2'b01) ? drp_drdy_1 :
                       (drp_selector == 2'b10) ? drp_drdy_2 : drp_drdy_3;



wire [15:0] di    = axis_in_data[15:0];
wire [6:0]  daddr = axis_in_data[GP_PORT_DRP_ADDR_BITS - 1 + GP_PORT_DRP_ADDR_OFF:GP_PORT_DRP_ADDR_OFF];
wire cmd_regacc   = axis_in_data[GP_PORT_DRP_REGEN];
wire cmd_regacc_wr= axis_in_data[GP_PORT_DRP_REGWR];
wire [3:0] c_gpio = axis_in_data[4 - 1 + GP_PORT_DRP_GPIO_OFF:GP_PORT_DRP_GPIO_OFF];

// 29 is reservd for future port extention
wire [1:0]  sel   = axis_in_data[31:30];


assign axis_out_valid = 1'b1;

assign axis_out_data[15:0]  = drp_do;

assign axis_out_data[19:16] = drp_gpio_in_0;
assign axis_out_data[23:20] = drp_gpio_in_1;
assign axis_out_data[27:24] = drp_gpio_in_2;
assign axis_out_data[31:28] = drp_gpio_in_3;

reg [3:0] gpio_out_0;
reg [3:0] gpio_out_1;
reg [3:0] gpio_out_2;
reg [3:0] gpio_out_3;

assign drp_gpio_out_0 = gpio_out_0;
assign drp_gpio_out_1 = gpio_out_1;
assign drp_gpio_out_2 = gpio_out_2;
assign drp_gpio_out_3 = gpio_out_3;


reg         state_rdy;
assign axis_in_ready = state_rdy; //1'b1;

always @(posedge axis_clk) begin
  if (reset) begin
    gpio_out_0 <= GPIO_RESET_P0;
    gpio_out_1 <= GPIO_RESET_P1;
    gpio_out_2 <= GPIO_RESET_P2;
    gpio_out_3 <= GPIO_RESET_P3;

    drp_den    <= 4'b0;
    drp_dwe    <= 1'b0;

    state_rdy  <= 1'b1;

    drp_di     <= 0;
    drp_daddr  <= 0;
  end else begin
    if (axis_in_valid && axis_in_ready && ~cmd_regacc) begin
      case (sel)
        2'b00: gpio_out_0 <= c_gpio;
        2'b01: gpio_out_1 <= c_gpio;
        2'b10: gpio_out_2 <= c_gpio;
        2'b11: gpio_out_3 <= c_gpio;
      endcase
      drp_selector <= sel;

      drp_den      <= 4'b0;
      //drp_dwe   <= 1'b0;
      state_rdy    <= 1'b1;
    end else begin
      if (axis_in_ready && axis_in_valid) begin
        drp_den      <= (4'b0001 << sel);
        drp_dwe      <= cmd_regacc_wr;
        drp_di       <= di;
        drp_daddr    <= daddr;
        state_rdy    <= 1'b0;
        drp_selector <= sel;
      end else begin
        if (~state_rdy && drp_drdy) begin
          state_rdy <= 1'b1;
        end

        if (|drp_den) begin
          drp_den <= 4'b0;
          //drp_dwe <= 1'b0;
        end
      end
    end
  end
end



endmodule

15, UL转DRP配置

//
// Copyright (c) 2016-2020 Fairwaves, Inc.
// SPDX-License-Identifier: CERN-OHL-W-2.0
//

module ul_drp_cfg #(
  parameter PORTS = 4,
  parameter GPIO_RESET_P0 = 0,
  parameter GPIO_RESET_P1 = 0,
  parameter GPIO_RESET_P2 = 0,
  parameter GPIO_RESET_P3 = 0
)(
    input reset,
    input axis_clk,

    input [31:0] axis_in_data,
    input        axis_in_valid,
    output       axis_in_ready,

    output [31:0] axis_out_data,
    output        axis_out_valid,
    input         axis_out_ready,


    output        drp_clk,

    // DRP port0
    output [15:0] drp_di_0,
    output [6:0]  drp_daddr_0,
    output        drp_den_0,
    output        drp_dwe_0,
    input  [15:0] drp_do_0,
    input         drp_drdy_0,

    output [3:0]  drp_gpio_out_0,
    input  [3:0]  drp_gpio_in_0,

    // DRP port1
    output [15:0] drp_di_1,
    output [6:0]  drp_daddr_1,
    output        drp_den_1,
    output        drp_dwe_1,
    input  [15:0] drp_do_1,
    input         drp_drdy_1,

    output [3:0]  drp_gpio_out_1,
    input  [3:0]  drp_gpio_in_1,

    // DRP port2
    output [15:0] drp_di_2,
    output [6:0]  drp_daddr_2,
    output        drp_den_2,
    output        drp_dwe_2,
    input  [15:0] drp_do_2,
    input         drp_drdy_2,

    output [3:0]  drp_gpio_out_2,
    input  [3:0]  drp_gpio_in_2,

    // DRP port3
    output [15:0] drp_di_3,
    output [6:0]  drp_daddr_3,
    output        drp_den_3,
    output        drp_dwe_3,
    input  [15:0] drp_do_3,
    input         drp_drdy_3,

    output [3:0]  drp_gpio_out_3,
    input  [3:0]  drp_gpio_in_3
);

`include "xtrxll_regs.vh"

reg [1:0]  drp_selector;

reg [15:0] drp_di;
reg [6:0]  drp_daddr;
reg        drp_dwe;
reg [3:0]  drp_den;

assign drp_di_0 = drp_di;
assign drp_di_1 = drp_di;
assign drp_di_2 = drp_di;
assign drp_di_3 = drp_di;

assign drp_daddr_0 = drp_daddr;
assign drp_daddr_1 = drp_daddr;
assign drp_daddr_2 = drp_daddr;
assign drp_daddr_3 = drp_daddr;

assign drp_dwe_0 = drp_dwe;
assign drp_dwe_1 = drp_dwe;
assign drp_dwe_2 = drp_dwe;
assign drp_dwe_3 = drp_dwe;

assign drp_den_0 = drp_den[0];
assign drp_den_1 = drp_den[1];
assign drp_den_2 = drp_den[2];
assign drp_den_3 = drp_den[3];

assign drp_clk = axis_clk;

wire [15:0] drp_do = (drp_selector == 2'b00) ? drp_do_0 :
                     (drp_selector == 2'b01) ? drp_do_1 :
                     (drp_selector == 2'b10) ? drp_do_2 : drp_do_3;
wire        drp_drdy = (drp_selector == 2'b00) ? drp_drdy_0 :
                       (drp_selector == 2'b01) ? drp_drdy_1 :
                       (drp_selector == 2'b10) ? drp_drdy_2 : drp_drdy_3;

// Bits in GP_PORT_WR_TXMMCM

/*
localparam GP_PORT_DRP_ADDR_OFF  = 16;
localparam GP_PORT_DRP_ADDR_BITS = 7;
localparam GP_PORT_DRP_REGEN     = 23;
localparam GP_PORT_DRP_REGWR     = 24;
localparam GP_PORT_DRP_GPIO_OFF  = 25;
localparam GP_PORT_DRP_NUM_OFF   = 30;
*/

wire [15:0] di    = axis_in_data[15:0];
wire [6:0]  daddr = axis_in_data[GP_PORT_DRP_ADDR_BITS - 1 + GP_PORT_DRP_ADDR_OFF:GP_PORT_DRP_ADDR_OFF];
wire cmd_regacc   = axis_in_data[GP_PORT_DRP_REGEN];
wire cmd_regacc_wr= axis_in_data[GP_PORT_DRP_REGWR];
wire [3:0] c_gpio = axis_in_data[4 - 1 + GP_PORT_DRP_GPIO_OFF:GP_PORT_DRP_GPIO_OFF];

// 29 is reservd for future port extention
wire [1:0]  sel   = axis_in_data[31:30];


assign axis_out_valid = 1'b1;

assign axis_out_data[15:0]  = drp_do;

assign axis_out_data[19:16] = drp_gpio_in_0;
assign axis_out_data[23:20] = drp_gpio_in_1;
assign axis_out_data[27:24] = drp_gpio_in_2;
assign axis_out_data[31:28] = drp_gpio_in_3;

reg [3:0] gpio_out_0;
reg [3:0] gpio_out_1;
reg [3:0] gpio_out_2;
reg [3:0] gpio_out_3;

assign drp_gpio_out_0 = gpio_out_0;
assign drp_gpio_out_1 = gpio_out_1;
assign drp_gpio_out_2 = gpio_out_2;
assign drp_gpio_out_3 = gpio_out_3;


reg         state_rdy;
assign axis_in_ready = state_rdy; //1'b1;

always @(posedge axis_clk) begin
  if (reset) begin
    gpio_out_0 <= GPIO_RESET_P0;
    gpio_out_1 <= GPIO_RESET_P1;
    gpio_out_2 <= GPIO_RESET_P2;
    gpio_out_3 <= GPIO_RESET_P3;

    drp_den    <= 4'b0;
    drp_dwe    <= 1'b0;

    state_rdy  <= 1'b1;

    drp_di     <= 0;
    drp_daddr  <= 0;
  end else begin
    if (axis_in_valid && axis_in_ready && ~cmd_regacc) begin
      case (sel)
        2'b00: gpio_out_0 <= c_gpio;
        2'b01: gpio_out_1 <= c_gpio;
        2'b10: gpio_out_2 <= c_gpio;
        2'b11: gpio_out_3 <= c_gpio;
      endcase
      drp_selector <= sel;

      drp_den      <= 4'b0;
      //drp_dwe   <= 1'b0;
      state_rdy    <= 1'b1;
    end else begin
      if (axis_in_ready && axis_in_valid) begin
        drp_den      <= (4'b0001 << sel);
        drp_dwe      <= cmd_regacc_wr;
        drp_di       <= di;
        drp_daddr    <= daddr;
        state_rdy    <= 1'b0;
        drp_selector <= sel;
      end else begin
        if (~state_rdy && drp_drdy) begin
          state_rdy <= 1'b1;
        end

        if (|drp_den) begin
          drp_den <= 4'b0;
          //drp_dwe <= 1'b0;
        end
      end
    end
  end
end



endmodule

这里我们看到实际是两种使用无地址的32位,转成一个可以具备多功能的接口。这其实将32位接口退化成了16位,并且可以产生各种自定义的信号。这就要求通过UL发来数据要拼凑出有意义的内容。

  • 3
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值