Xilinx FPGA乘法的实现

        要在Xilinx FPGA中实现乘法运算有以下几种方法:

1、直接用*表示

wire [7:0]  a,b;
wire [15:0] c;

assign c= a*b;

        这种情况下,乘法可能被综合为LUT逻辑实现,也可能会调用FPGA内部的DSP硬核实现。如果加一条约束:

wire [7:0]  a,b;
(* use_dsp = “yes” *)wire [15:0] c;

assign c= a*b;

        那会调用DSP来实现乘法。

2、调用DSP硬核

2.1 例化DSP原语

        例如7系列FPGA的原语为:

   DSP48E1 #(
      // Feature Control Attributes: Data Path Selection
      .A_INPUT("DIRECT"),               // Selects A input source, "DIRECT" (A port) or "CASCADE" (ACIN port)
      .B_INPUT("DIRECT"),               // Selects B input source, "DIRECT" (B port) or "CASCADE" (BCIN port)
      .USE_DPORT("FALSE"),              // Select D port usage (TRUE or FALSE)
      .USE_MULT("MULTIPLY"),            // Select multiplier usage ("MULTIPLY", "DYNAMIC", or "NONE")
      .USE_SIMD("ONE48"),               // SIMD selection ("ONE48", "TWO24", "FOUR12")
      // Pattern Detector Attributes: Pattern Detection Configuration
      .AUTORESET_PATDET("NO_RESET"),    // "NO_RESET", "RESET_MATCH", "RESET_NOT_MATCH" 
      .MASK(48'h3fffffffffff),          // 48-bit mask value for pattern detect (1=ignore)
      .PATTERN(48'h000000000000),       // 48-bit pattern match for pattern detect
      .SEL_MASK("MASK"),                // "C", "MASK", "ROUNDING_MODE1", "ROUNDING_MODE2" 
      .SEL_PATTERN("PATTERN"),          // Select pattern value ("PATTERN" or "C")
      .USE_PATTERN_DETECT("NO_PATDET"), // Enable pattern detect ("PATDET" or "NO_PATDET")
      // Register Control Attributes: Pipeline Register Configuration
      .ACASCREG(1),                     // Number of pipeline stages between A/ACIN and ACOUT (0, 1 or 2)
      .ADREG(1),                        // Number of pipeline stages for pre-adder (0 or 1)
      .ALUMODEREG(1),                   // Number of pipeline stages for ALUMODE (0 or 1)
      .AREG(1),                         // Number of pipeline stages for A (0, 1 or 2)
      .BCASCREG(1),                     // Number of pipeline stages between B/BCIN and BCOUT (0, 1 or 2)
      .BREG(1),                         // Number of pipeline stages for B (0, 1 or 2)
      .CARRYINREG(1),                   // Number of pipeline stages for CARRYIN (0 or 1)
      .CARRYINSELREG(1),                // Number of pipeline stages for CARRYINSEL (0 or 1)
      .CREG(1),                         // Number of pipeline stages for C (0 or 1)
      .DREG(1),                         // Number of pipeline stages for D (0 or 1)
      .INMODEREG(1),                    // Number of pipeline stages for INMODE (0 or 1)
      .MREG(1),                         // Number of multiplier pipeline stages (0 or 1)
      .OPMODEREG(1),                    // Number of pipeline stages for OPMODE (0 or 1)
      .PREG(1)                          // Number of pipeline stages for P (0 or 1)
   )
   DSP48E1_inst (
      // Cascade: 30-bit (each) output: Cascade Ports
      .ACOUT(ACOUT),                   // 30-bit output: A port cascade output
      .BCOUT(BCOUT),                   // 18-bit output: B port cascade output
      .CARRYCASCOUT(CARRYCASCOUT),     // 1-bit output: Cascade carry output
      .MULTSIGNOUT(MULTSIGNOUT),       // 1-bit output: Multiplier sign cascade output
      .PCOUT(PCOUT),                   // 48-bit output: Cascade output
      // Control: 1-bit (each) output: Control Inputs/Status Bits
      .OVERFLOW(OVERFLOW),             // 1-bit output: Overflow in add/acc output
      .PATTERNBDETECT(PATTERNBDETECT), // 1-bit output: Pattern bar detect output
      .PATTERNDETECT(PATTERNDETECT),   // 1-bit output: Pattern detect output
      .UNDERFLOW(UNDERFLOW),           // 1-bit output: Underflow in add/acc output
      // Data: 4-bit (each) output: Data Ports
      .CARRYOUT(CARRYOUT),             // 4-bit output: Carry output
      .P(P),                           // 48-bit output: Primary data output
      // Cascade: 30-bit (each) input: Cascade Ports
      .ACIN(ACIN),                     // 30-bit input: A cascade data input
      .BCIN(BCIN),                     // 18-bit input: B cascade input
      .CARRYCASCIN(CARRYCASCIN),       // 1-bit input: Cascade carry input
      .MULTSIGNIN(MULTSIGNIN),         // 1-bit input: Multiplier sign input
      .PCIN(PCIN),                     // 48-bit input: P cascade input
      // Control: 4-bit (each) input: Control Inputs/Status Bits
      .ALUMODE(ALUMODE),               // 4-bit input: ALU control input
      .CARRYINSEL(CARRYINSEL),         // 3-bit input: Carry select input
      .CLK(CLK),                       // 1-bit input: Clock input
      .INMODE(INMODE),                 // 5-bit input: INMODE control input
      .OPMODE(OPMODE),                 // 7-bit input: Operation mode input
      // Data: 30-bit (each) input: Data Ports
      .A(A),                           // 30-bit input: A data input
      .B(B),                           // 18-bit input: B data input
      .C(C),                           // 48-bit input: C data input
      .CARRYIN(CARRYIN),               // 1-bit input: Carry input signal
      .D(D),                           // 25-bit input: D data input
      // Reset/Clock Enable: 1-bit (each) input: Reset/Clock Enable Inputs
      .CEA1(CEA1),                     // 1-bit input: Clock enable input for 1st stage AREG
      .CEA2(CEA2),                     // 1-bit input: Clock enable input for 2nd stage AREG
      .CEAD(CEAD),                     // 1-bit input: Clock enable input for ADREG
      .CEALUMODE(CEALUMODE),           // 1-bit input: Clock enable input for ALUMODE
      .CEB1(CEB1),                     // 1-bit input: Clock enable input for 1st stage BREG
      .CEB2(CEB2),                     // 1-bit input: Clock enable input for 2nd stage BREG
      .CEC(CEC),                       // 1-bit input: Clock enable input for CREG
      .CECARRYIN(CECARRYIN),           // 1-bit input: Clock enable input for CARRYINREG
      .CECTRL(CECTRL),                 // 1-bit input: Clock enable input for OPMODEREG and CARRYINSELREG
      .CED(CED),                       // 1-bit input: Clock enable input for DREG
      .CEINMODE(CEINMODE),             // 1-bit input: Clock enable input for INMODEREG
      .CEM(CEM),                       // 1-bit input: Clock enable input for MREG
      .CEP(CEP),                       // 1-bit input: Clock enable input for PREG
      .RSTA(RSTA),                     // 1-bit input: Reset input for AREG
      .RSTALLCARRYIN(RSTALLCARRYIN),   // 1-bit input: Reset input for CARRYINREG
      .RSTALUMODE(RSTALUMODE),         // 1-bit input: Reset input for ALUMODEREG
      .RSTB(RSTB),                     // 1-bit input: Reset input for BREG
      .RSTC(RSTC),                     // 1-bit input: Reset input for CREG
      .RSTCTRL(RSTCTRL),               // 1-bit input: Reset input for OPMODEREG and CARRYINSELREG
      .RSTD(RSTD),                     // 1-bit input: Reset input for DREG and ADREG
      .RSTINMODE(RSTINMODE),           // 1-bit input: Reset input for INMODEREG
      .RSTM(RSTM),                     // 1-bit input: Reset input for MREG
      .RSTP(RSTP)                      // 1-bit input: Reset input for PREG
   );

        可以实现最大25bit*18bit有符号数乘法运算。直接使用原语比较复杂,需要了解DSP硬核的各项参数设置,适合进阶开发,不适用于初学者。

2.2 通过vivado调用DSP48 IP

        vivado软件提供了DSP48的IP,便于用户配置和使用。若要实现乘法,则将其配置为A*B:

        pipeline选项,用户可以自定义也可以使用默认配置。就实现A*B来说,pipeline仅仅影响延时的时钟周期数,默认使能DSP内部所有pipeline register。pipeline register也有助于提高逻辑的时序性能。

        实现18bit*18bit有符号数的乘法,配置如下:

 

2.3 使用宏

        由于原语的使用较为复杂,调用IP核也相对麻烦,所以Xilinx提供了调用DSP实现乘法的宏:

   MULT_MACRO #(
      .DEVICE("7SERIES"), // Target Device: "7SERIES" 
      .LATENCY(3),        // Desired clock cycle latency, 0-4
      .WIDTH_A(18),       // Multiplier A-input bus width, 1-25
      .WIDTH_B(18)        // Multiplier B-input bus width, 1-18
   ) MULT_MACRO_inst (
      .P(P),     // Multiplier output bus, width determined by WIDTH_P parameter
      .A(A),     // Multiplier input A bus, width determined by WIDTH_A parameter
      .B(B),     // Multiplier input B bus, width determined by WIDTH_B parameter
      .CE(CE),   // 1-bit active high input clock enable
      .CLK(CLK), // 1-bit positive edge clock input
      .RST(RST)  // 1-bit input active high reset
   );

        MULT_MACRO的参数设置和端口简洁了很多,它实际上是对DSP原语进行了二次封装,仅实现其乘法的功能,最大实现25bit*18bit有符号数乘法运算。

        使用方法:

reg	clk;
reg reset;
	
reg signed [17:0]	a;
reg signed [17:0]	b;	
wire signed [35:0]	c;


initial begin

reset = 1;
clk = 0;
a = 0;
b = 0;

#200

reset = 0;

#200

a = -19;
b = 253;

#200

a = -22;
b = -33;		
	
end

always #10 clk = ~clk;	


MULT_MACRO #
(
  .DEVICE	("7SERIES"), // Target Device: "7SERIES" 
  .LATENCY	(3),        // Desired clock cycle latency, 0-4
  .WIDTH_A	(18),       // Multiplier A-input bus width, 1-25
  .WIDTH_B	(18)        // Multiplier B-input bus width, 1-18
) 
MULT_MACRO_inst 
(
  .P	(c),     // Multiplier output bus, width determined by WIDTH_P parameter
  .A	(a),     // Multiplier input A bus, width determined by WIDTH_A parameter
  .B	(b),     // Multiplier input B bus, width determined by WIDTH_B parameter
  .CE	(1'b1),   // 1-bit active high input clock enable
  .CLK	(clk),    // 1-bit positive edge clock input
  .RST	(reset)   // 1-bit input active high reset
);	

        仿真结果:

        LATENCY设置为3,对应DSP内部的3级pipeline register,所以 乘法的输出比输入延时3个时钟周期。

3、调用乘法器IP

        由于DSP硬核实现的乘法位宽受限,当使用位宽较大的数据进行乘法运算时,就需要借助乘法器IP了。例如,用LUT实现1个40bit*40bit的有符号数乘法:

         用多个DSP级联实现32bit*32bit的无符号数乘法:

 

  • 12
    点赞
  • 90
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

MmikerR

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值