Xilinx FPGA乘法的实现

最新推荐文章于 2024-05-27 16:53:38 发布

MmikerR

最新推荐文章于 2024-05-27 16:53:38 发布

阅读量1w

点赞数 12

分类专栏： FPGA 文章标签： fpga verilog

本文链接：https://blog.csdn.net/MmikerR/article/details/106465513

版权

FPGA 专栏收录该内容

5 篇文章 4 订阅

订阅专栏

要在Xilinx FPGA中实现乘法运算有以下几种方法：

1、直接用*表示

wire [7:0]  a,b;
wire [15:0] c;

assign c= a*b;

这种情况下，乘法可能被综合为LUT逻辑实现，也可能会调用FPGA内部的DSP硬核实现。如果加一条约束：

wire [7:0]  a,b;
(* use_dsp = “yes” *)wire [15:0] c;

assign c= a*b;

那会调用DSP来实现乘法。

2、调用DSP硬核

2.1 例化DSP原语

例如7系列FPGA的原语为：

   DSP48E1 #(
      // Feature Control Attributes: Data Path Selection
      .A_INPUT("DIRECT"),               // Selects A input source, "DIRECT" (A port) or "CASCADE" (ACIN port)
      .B_INPUT("DIRECT"),               // Selects B input source, "DIRECT" (B port) or "CASCADE" (BCIN port)
      .USE_DPORT("FALSE"),              // Select D port usage (TRUE or FALSE)
      .USE_MULT("MULTIPLY"),            // Select multiplier usage ("MULTIPLY", "DYNAMIC", or "NONE")
      .USE_SIMD("ONE48"),               // SIMD selection ("ONE48", "TWO24", "FOUR12")
      // Pattern Detector Attributes: Pattern Detection Configuration
      .AUTORESET_PATDET("NO_RESET"),    // "NO_RESET", "RESET_MATCH", "RESET_NOT_MATCH" 
      .MASK(48'h3fffffffffff),          // 48-bit mask value for pattern detect (1=ignore)
      .PATTERN(48'h000000000000),       // 48-bit pattern match for pattern detect
      .SEL_MASK("MASK"),                // "C", "MASK", "ROUNDING_MODE1", "ROUNDING_MODE2" 
      .SEL_PATTERN("PATTERN"),          // Select pattern value ("PATTERN" or "C")
      .USE_PATTERN_DETECT("NO_PATDET"), // Enable pattern detect ("PATDET" or "NO_PATDET")
      // Register Control Attributes: Pipeline Register Configuration
      .ACASCREG(1),                     // Number of pipeline stages between A/ACIN and ACOUT (0, 1 or 2)
      .ADREG(1),                        // Number of pipeline stages for pre-adder (0 or 1)
      .ALUMODEREG(1),                   // Number of pipeline stages for ALUMODE (0 or 1)
      .AREG(1),                         // Number of pipeline stages for A (0, 1 or 2)
      .BCASCREG(1),                     // Number of pipeline stages between B/BCIN and BCOUT (0, 1 or 2)
      .BREG(1),                         // Number of pipeline stages for B (0, 1 or 2)
      .CARRYINREG(1),                   // Number of pipeline stages for CARRYIN (0 or 1)
      .CARRYINSELREG(1),                // Number of pipeline stages for CARRYINSEL (0 or 1)
      .CREG(1),                         // Number of pipeline stages for C (0 or 1)
      .DREG(1),                         // Number of pipeline stages for D (0 or 1)
      .INMODEREG(1),                    // Number of pipeline stages for INMODE (0 or 1)
      .MREG(1),                         // Number of multiplier pipeline stages (0 or 1)
      .OPMODEREG(1),                    // Number of pipeline stages for OPMODE (0 or 1)
      .PREG(1)                          // Number of pipeline stages for P (0 or 1)
   )
   DSP48E1_inst (
      // Cascade: 30-bit (each) output: Cascade Ports
      .ACOUT(ACOUT),                   // 30-bit output: A port cascade output
      .BCOUT(BCOUT),                   // 18-bit output: B port cascade output
      .CARRYCASCOUT(CARRYCASCOUT),     // 1-bit output: Cascade carry output
      .MULTSIGNOUT(MULTSIGNOUT),       // 1-bit output: Multiplier sign cascade output
      .PCOUT(PCOUT),                   // 48-bit output: Cascade output
      // Control: 1-bit (each) output: Control Inputs/Status Bits
      .OVERFLOW(OVERFLOW),             // 1-bit output: Overflow in add/acc output
      .PATTERNBDETECT(PATTERNBDETECT), // 1-bit output: Pattern bar detect output
      .PATTERNDETECT(PATTERNDETECT),   // 1-bit output: Pattern detect output
      .UNDERFLOW(UNDERFLOW),           // 1-bit output: Underflow in add/acc output
      // Data: 4-bit (each) output: Data Ports
      .CARRYOUT(CARRYOUT),             // 4-bit output: Carry output
      .P(P),                           // 48-bit output: Primary data output
      // Cascade: 30-bit (each) input: Cascade Ports
      .ACIN(ACIN),                     // 30-bit input: A cascade data input
      .BCIN(BCIN),                     // 18-bit input: B cascade input
      .CARRYCASCIN(CARRYCASCIN),       // 1-bit input: Cascade carry input
      .MULTSIGNIN(MULTSIGNIN),         // 1-bit input: Multiplier sign input
      .PCIN(PCIN),                     // 48-bit input: P cascade input
      // Control: 4-bit (each) input: Control Inputs/Status Bits
      .ALUMODE(ALUMODE),               // 4-bit input: ALU control input
      .CARRYINSEL(CARRYINSEL),         // 3-bit input: Carry select input
      .CLK(CLK),                       // 1-bit input: Clock input
      .INMODE(INMODE),                 // 5-bit input: INMODE control input
      .OPMODE(OPMODE),                 // 7-bit input: Operation mode input
      // Data: 30-bit (each) input: Data Ports
      .A(A),                           // 30-bit input: A data input
      .B(B),                           // 18-bit input: B data input
      .C(C),                           // 48-bit input: C data input
      .CARRYIN(CARRYIN),               // 1-bit input: Carry input signal
      .D(D),                           // 25-bit input: D data input
      // Reset/Clock Enable: 1-bit (each) input: Reset/Clock Enable Inputs
      .CEA1(CEA1),                     // 1-bit input: Clock enable input for 1st stage AREG
      .CEA2(CEA2),                     // 1-bit input: Clock enable input for 2nd stage AREG
      .CEAD(CEAD),                     // 1-bit input: Clock enable input for ADREG
      .CEALUMODE(CEALUMODE),           // 1-bit input: Clock enable input for ALUMODE
      .CEB1(CEB1),                     // 1-bit input: Clock enable input for 1st stage BREG
      .CEB2(CEB2),                     // 1-bit input: Clock enable input for 2nd stage BREG
      .CEC(CEC),                       // 1-bit input: Clock enable input for CREG
      .CECARRYIN(CECARRYIN),           // 1-bit input: Clock enable input for CARRYINREG
      .CECTRL(CECTRL),                 // 1-bit input: Clock enable input for OPMODEREG and CARRYINSELREG
      .CED(CED),                       // 1-bit input: Clock enable input for DREG
      .CEINMODE(CEINMODE),             // 1-bit input: Clock enable input for INMODEREG
      .CEM(CEM),                       // 1-bit input: Clock enable input for MREG
      .CEP(CEP),                       // 1-bit input: Clock enable input for PREG
      .RSTA(RSTA),                     // 1-bit input: Reset input for AREG
      .RSTALLCARRYIN(RSTALLCARRYIN),   // 1-bit input: Reset input for CARRYINREG
      .RSTALUMODE(RSTALUMODE),         // 1-bit input: Reset input for ALUMODEREG
      .RSTB(RSTB),                     // 1-bit input: Reset input for BREG
      .RSTC(RSTC),                     // 1-bit input: Reset input for CREG
      .RSTCTRL(RSTCTRL),               // 1-bit input: Reset input for OPMODEREG and CARRYINSELREG
      .RSTD(RSTD),                     // 1-bit input: Reset input for DREG and ADREG
      .RSTINMODE(RSTINMODE),           // 1-bit input: Reset input for INMODEREG
      .RSTM(RSTM),                     // 1-bit input: Reset input for MREG
      .RSTP(RSTP)                      // 1-bit input: Reset input for PREG
   );

可以实现最大25bit*18bit有符号数乘法运算。直接使用原语比较复杂，需要了解DSP硬核的各项参数设置，适合进阶开发，不适用于初学者。

2.2 通过vivado调用DSP48 IP

vivado软件提供了DSP48的IP，便于用户配置和使用。若要实现乘法，则将其配置为A*B：

pipeline选项，用户可以自定义也可以使用默认配置。就实现A*B来说，pipeline仅仅影响延时的时钟周期数，默认使能DSP内部所有pipeline register。pipeline register也有助于提高逻辑的时序性能。

实现18bit*18bit有符号数的乘法，配置如下：

2.3 使用宏

由于原语的使用较为复杂，调用IP核也相对麻烦，所以Xilinx提供了调用DSP实现乘法的宏：

   MULT_MACRO #(
      .DEVICE("7SERIES"), // Target Device: "7SERIES" 
      .LATENCY(3),        // Desired clock cycle latency, 0-4
      .WIDTH_A(18),       // Multiplier A-input bus width, 1-25
      .WIDTH_B(18)        // Multiplier B-input bus width, 1-18
   ) MULT_MACRO_inst (
      .P(P),     // Multiplier output bus, width determined by WIDTH_P parameter
      .A(A),     // Multiplier input A bus, width determined by WIDTH_A parameter
      .B(B),     // Multiplier input B bus, width determined by WIDTH_B parameter
      .CE(CE),   // 1-bit active high input clock enable
      .CLK(CLK), // 1-bit positive edge clock input
      .RST(RST)  // 1-bit input active high reset
   );

MULT_MACRO的参数设置和端口简洁了很多，它实际上是对DSP原语进行了二次封装，仅实现其乘法的功能，最大实现25bit*18bit有符号数乘法运算。

使用方法：

reg	clk;
reg reset;
	
reg signed [17:0]	a;
reg signed [17:0]	b;	
wire signed [35:0]	c;


initial begin

reset = 1;
clk = 0;
a = 0;
b = 0;

#200

reset = 0;

#200

a = -19;
b = 253;

#200

a = -22;
b = -33;		
	
end

always #10 clk = ~clk;	


MULT_MACRO #
(
  .DEVICE	("7SERIES"), // Target Device: "7SERIES" 
  .LATENCY	(3),        // Desired clock cycle latency, 0-4
  .WIDTH_A	(18),       // Multiplier A-input bus width, 1-25
  .WIDTH_B	(18)        // Multiplier B-input bus width, 1-18
) 
MULT_MACRO_inst 
(
  .P	(c),     // Multiplier output bus, width determined by WIDTH_P parameter
  .A	(a),     // Multiplier input A bus, width determined by WIDTH_A parameter
  .B	(b),     // Multiplier input B bus, width determined by WIDTH_B parameter
  .CE	(1'b1),   // 1-bit active high input clock enable
  .CLK	(clk),    // 1-bit positive edge clock input
  .RST	(reset)   // 1-bit input active high reset
);

仿真结果：