(学习笔记!只是笔记)
实验十三 流水线加法器
实验目的
1、掌握寄存器的设计与应用。
2、学习和掌握流水线加法器的设计与实现。
实验内容
1、设计并实现一个 32 位 2 级流水线加法器,并下板验证。
2、主要从 RTL 电路分析、仿真波形、开发板资源使用情况等方面与实验六中
的加法器和系统自带“+”加法器进行比较。
实验原理
32 位的 2 级流水线加法器使用两个 16 位加法器,其中第一个加法器执
行低 16 位的加法,而第二个加法器执行高 32 位的加法,电路结构图如下图
所示(供参考)。
实验步骤(请同学们自行完成)
仿真比较时,需比较当输入的值每个周期都在变化并要求每个周期产生其和
值的情况。
顶层模块,scan_led_hex_disp
将结果显示在开发板(BASYS 3)上的七段数码管连接上。
将 SW3 ~ SW0 作为第一操作数(可显示在左起第一个 7 段数码管上),SW7 ~ SW4 作为第二操作数(可显示在左起第二个 7 段数码管上),SW15=0(1)时做加(减)法,将结果显示在最右侧的 7 段数码管上,进位和溢出标志可通过 LED 灯显示。
`timescale 1ns / 1ps
module scan_led_hex_disp(
input clk,
input reset,
input [3:0] hex0, //第一个数码管显示的数字
input [3:0] hex1,
//input [3:0] dp_in, //小数点控制
output reg [3:0] an, //片选,使能端
output reg [7:0] sseg, //段选
input c0,
output C32,
output Gm, //进位产生标志(该输出为1时,不管c0真值情况,一定会进位)
output Pm //进位传递标志(该输出为1时,需要c0为1,才会进位)
);
wire [32:1] hex3;
localparam N = 18; //使用低16位对50Mhz的时钟进行分频(50MHZ/2^16)
reg [N-1:0] regN; //高两位作为控制信号,低16位为计数器,对时钟进行分频
reg [3:0] hex_in; //段选控制信号
reg dp;
wire dp_operator,dp_result;
adder_substractor_32 adder_substractor_32(
.in1({28'b0,hex0}),
.in2({28'b0,hex1}),
.add_sub(c0),
.Gm(Gm),
.Pm(Pm),
.sum(hex3),
.c32(C32),
.clk(clk),
.rst_n(reset)
);
assign dp_operator=c0,
dp_result=c0&&(~C32);
always@(posedge clk, posedge reset)
begin
if(reset)
regN <= 0;
else
regN <= regN + 1;
end
always@ *
begin
case(regN[N-1:N-2])
2'b00:begin
an = 4'b0111; //选中第1个数码管
hex_in = hex0; //数码管显示的数字由hex_in控制,显示hex0输入的数字;
dp = 0; //控制该数码管的小数点的亮灭
end
2'b01:begin
an = 4'b1011; //选中第二个数码管
hex_in = hex1;//数码管显示的数字由hex_in控制,显示hex1输入的数字;
dp = dp_operator;
end
2'b10:begin
an = 4'b1110;//选中第三个数码管
hex_in = hex3[4:1];//数码管显示的数字由hex_in控制,显示hex3输入的数字;
dp = dp_result;
end
default:begin
an = 4'b1110;//选中第四个数码管
hex_in = hex3[4:1];//数码管显示的数字由hex_in控制,显示hex3输入的数字;
dp = dp_result;
end
endcase
end
always@ *
begin
case(hex_in)
4'h0: sseg[6:0] = 7'b0000001; //共阳极数码管
4'h1: sseg[6:0] = 7'b1001111;
4'h2: sseg[6:0] = 7'b0010010;
4'h3: sseg[6:0] = 7'b0000110;
4'h4: sseg[6:0] = 7'b1001100;
4'h5: sseg[6:0] = 7'b0100100;
4'h6: sseg[6:0] = 7'b0100000;
4'h7: sseg[6:0] = 7'b0001111;
4'h8: sseg[6:0] = 7'b0000000;
4'h9: sseg[6:0] = 7'b0001100;
4'ha: sseg[6:0] = 7'b0001000;
4'hb: sseg[6:0] = 7'b1100000;
4'hc: sseg[6:0] = 7'b1110010;
4'hd: sseg[6:0] = 7'b1000010;
4'he: sseg[6:0] = 7'b0110000;
4'hf: sseg[6:0] = 7'b0111000;
default: sseg[6:0] = 7'b0111000;
endcase
sseg[7] = ~dp;
end
endmodule
adder_substractor_32模块
通过2对第二个输入in2,进行2的补码转换,实现减法操作。当add_sub=1时,是减法,add_sub=0时,是加法。
`timescale 1ns / 1ps
module adder_substractor_32 (
input [32:1]in1,
input [32:1]in2,
input add_sub,
output Gm,
output Pm,
output [32:1]sum,
output c32,
input clk,
input rst_n
);
wire [32:1] in1_exp;
wire [32:1] in2_exp;
wire cin;
wire [32:1]sum_temp1;
wire [32:1]sum_temp2;
assign in1_exp = {in1};
assign in2_exp = add_sub? ~{in2}: {in2}; //做减法时,需要对in2进行补码运算
assign cin = add_sub; //对in2进行2的补码运算
// 因为对输入进行了扩位,所以WIDTH需要加1,在输出的时候会被截位,但对结果无影响
adder32 adder32(
.A( in1_exp ),
.B( in2_exp ),
.c0( cin ),
.Gm(Gm),
.Pm(Pm),
.S( sum_temp1 ),
.C32(c32),
.clk(clk),
.rst_n(rst_n)
);
adder32_carry_lookahead_unit adder32_carry_lookahead_unit_temp(
.A( ~sum_temp1 ),
.B( 0 ),
.c0( 1'b1 ),
.Gm( ),
.Pm( ),
.S( sum_temp2 ),
.C32( )
);
assign sum = (add_sub&(~c32))? {sum_temp2}: {sum_temp1}; //做减法时,若结果为负数,需要对in2进行补码运算
endmodule
adder32模板:
`timescale 1ns / 1ps
//32位并行进位加法器顶层模块
module adder32(A,B,c0,Gm,Pm,S,C32,clk,rst_n);
input [32:1] A;
input [32:1] B;
input c0;
output Gm;
output Pm;
output [32:1] S;
output C32;
input clk;
input rst_n;
wire px1,gx1,px2,gx2;
wire c16;
adder_pipeline_32bit_2level adder32_to_pipeline_32bit(
.FinalSum(S),
.co(C32),
.A(A),
.B(B),
.ci(c0),
.clk(clk),
.reset(rst_n),
.px1(px1),
.gx1(gx1),
.px2(px2),
.gx2(gx2)
);
assign //c16 = gx1 ^ (px1 && c0), //c0 = 0
// C32 = gx2 ^ (px2 && c16),
Gm = gx2 ^ (gx1 & px2) ^ (px1 & c0),
Pm = px1 & px2;
endmodule
adder32_carry_lookahead_unit模板
普通的32位并行加法器
`timescale 1ns / 1ps
//32位并行进位加法器顶层模块
module adder32_carry_lookahead_unit(A,B,c0,Gm,Pm,S,C32);
input [32:1] A;
input [32:1] B;
input c0;
output Gm;
output Pm;
output [32:1] S;
output C32;
wire px1,gx1,px2,gx2;
wire c16;
CLA_16 CLA1(
.A(A[16:1]),
.B(B[16:1]),
.c0(c0),
.S(S[16:1]),
.px(px1),
.gx(gx1)
);
CLA_16 CLA2(
.A(A[32:17]),
.B(B[32:17]),
.c0(c16),
.S(S[32:17]),
.px(px2),
.gx(gx2)
);
assign c16 = gx1 ^ (px1 && c0), //c0 = 0
C32 = gx2 ^ (px2 && c16),
Gm = gx2 ^ (gx1 & px2) ^ (px1 & c0),
Pm = px1 & px2;
endmodule
流水线加法器模板 adder_pipeline_32bit_2level。
不用系统自带的加法器,通过调用16位的CLA(carray lookahead unit)实现加法。
`timescale 1ns / 1ps
module adder_pipeline_32bit_2level(A,B,clk,reset,FinalSum,ci,co,px1,gx1,px2,gx2);
input clk,reset;
input [31:0] A,B;
output [31:0] FinalSum;
input ci;
output co;
output wire px1,gx1,px2,gx2;
/**************************************/
wire carry_d1;
reg [15:0] Lsum_d1;
wire [15:0] Lsum_d1_nxt;
reg [15:0] Lsum_d2;
reg [15:0] Aup_d1,Bup_d2;
reg [15:0] Usum_d2;
wire [15:0] Usum_d2_nxt;
wire [31:0] FinalSum;
/**************************************/
wire c16;
assign c16 = gx1 ^ (px1 && ci),//c0 = 0
co = gx2 ^ (px2 && c16);
CLA_16 CLA1( //assign Lsum_d1_nxt = A[31:0] + B[31:0];
.A(A[15:0]),
.B(B[15:0]),
.c0(ci),
.S(Lsum_d1_nxt),
.px(px1),
.gx(gx1)
);
CLA_16 CLA2( //assign Usum_d2_nxt = carry_d1 + Aup_d1 + Bup_d2;
.A(Aup_d1),
.B(Bup_d2),
.c0(carry_d1),
.S(Usum_d2_nxt),
.px(px2),
.gx(gx2)
);
//assign Lsum_d1_nxt = A[31:0] + B[31:0];
assign carry_d1 = c16;
//assign Usum_d2_nxt = carry_d1 + Aup_d1 + Bup_d2;
assign FinalSum = {Usum_d2,Lsum_d2};
/**************************************/
always@(posedge clk or negedge reset)begin
if(reset)begin
Lsum_d1 <= 0;
Lsum_d2 <= 0;
Aup_d1 <= 0;
Bup_d2 <= 0;
Usum_d2 <= 0;
end
else begin
Lsum_d1 <= Lsum_d1_nxt;
Lsum_d2 <= Lsum_d1[15:0];
Aup_d1 <= A[31:16];
Bup_d2 <= B[31:16];
Usum_d2 <= Usum_d2_nxt;
end
end
endmodule
16位CLA部件:
`timescale 1ns / 1ps
//16位CLA部件
module CLA_16(A,B,c0,S,px,gx);
input [16:1] A;
input [16:1] B;
input c0;
output gx,px;
output [16:1] S;
wire c4,c8,c12;
wire Pm1,Gm1,Pm2,Gm2,Pm3,Gm3,Pm4,Gm4;
adder_4 adder1(
.x(A[4:1]),
.y(B[4:1]),
.c0(c0),
.c4(),
.F(S[4:1]),
.Gm(Gm1),
.Pm(Pm1)
);
adder_4 adder2(
.x(A[8:5]),
.y(B[8:5]),
.c0(c4),
.c4(),
.F(S[8:5]),
.Gm(Gm2),
.Pm(Pm2)
);
adder_4 adder3(
.x(A[12:9]),
.y(B[12:9]),
.c0(c8),
.c4(),
.F(S[12:9]),
.Gm(Gm3),
.Pm(Pm3)
);
adder_4 adder4(
.x(A[16:13]),
.y(B[16:13]),
.c0(c12),
.c4(),
.F(S[16:13]),
.Gm(Gm4),
.Pm(Pm4)
);
assign c4 = Gm1 ^ (Pm1 & c0),
c8 = Gm2 ^ (Pm2 & Gm1) ^ (Pm2 & Pm1 & c0),
c12 = Gm3 ^ (Pm3 & Gm2) ^ (Pm3 & Pm2 & Gm1) ^ (Pm3 & Pm2 & Pm1 & c0);
assign px = Pm1 & Pm2 & Pm3 & Pm4,
gx = Gm4 ^ (Pm4 & Gm3) ^ (Pm4 & Pm3 & Gm2) ^ (Pm4 & Pm3 & Pm2 & Gm1);
endmodule
四位并行进位加法器:
`timescale 1ns / 1ps
//四位并行进位加法器
module adder_4(x,y,c0,c4,F,Gm,Pm);
input [4:1] x;
input [4:1] y;
input c0;
output c4,Gm,Pm;
output [4:1] F;
wire p1,p2,p3,p4,g1,g2,g3,g4;
wire c1,c2,c3;
adder adder1(
.X(x[1]),
.Y(y[1]),
.Cin(c0),
.F(F[1]),
.Cout()
);
adder adder2(
.X(x[2]),
.Y(y[2]),
.Cin(c1),
.F(F[2]),
.Cout()
);
adder adder3(
.X(x[3]),
.Y(y[3]),
.Cin(c2),
.F(F[3]),
.Cout()
);
adder adder4(
.X(x[4]),
.Y(y[4]),
.Cin(c3),
.F(F[4]),
.Cout()
);
CLA CLA(
.c0(c0),
.c1(c1),
.c2(c2),
.c3(c3),
.c4(c4),
.p1(p1),
.p2(p2),
.p3(p3),
.p4(p4),
.g1(g1),
.g2(g2),
.g3(g3),
.g4(g4)
);
assign p1 = x[1] ^ y[1],
p2 = x[2] ^ y[2],
p3 = x[3] ^ y[3],
p4 = x[4] ^ y[4];
assign g1 = x[1] & y[1],
g2 = x[2] & y[2],
g3 = x[3] & y[3],
g4 = x[4] & y[4];
assign Pm = p1 & p2 & p3 & p4,
Gm = g4 ^ (p4 & g3) ^ (p4 & p3 & g2) ^ (p4 & p3 & p2 & g1);
endmodule
一位全加器:
`timescale 1ns / 1ps
//一位全加器
module adder(X,Y,Cin,F,Cout);
input X,Y,Cin;
output F,Cout;
assign F = X ^ Y ^ Cin;
assign Cout = (X ^ Y) & Cin | X & Y;
endmodule
4位CLA部件:
`timescale 1ns / 1ps
/******************4位CLA部件************************/
module CLA(c0,c1,c2,c3,c4,p1,p2,p3,p4,g1,g2,g3,g4);
input c0,g1,g2,g3,g4,p1,p2,p3,p4;
output c1,c2,c3,c4;
assign c1 = g1 ^ (p1 & c0),
c2 = g2 ^ (p2 & g1) ^ (p2 & p1 & c0),
c3 = g3 ^ (p3 & g2) ^ (p3 & p2 & g1) ^ (p3 & p2 & p1 & c0),
c4 = g4^(p4&g3)^(p4&p3&g2)^(p4&p3&p2&g1)^(p4&p3&p2&p1&c0);
endmodule
.xdc 引脚分配:
set_property IOSTANDARD LVCMOS33 [get_ports c0]
set_property IOSTANDARD LVCMOS33 [get_ports C32]
set_property IOSTANDARD LVCMOS33 [get_ports Gm]
set_property IOSTANDARD LVCMOS33 [get_ports Pm]
set_property PACKAGE_PIN R2 [get_ports {c0}]
set_property PACKAGE_PIN P1 [get_ports {C32}]
set_property PACKAGE_PIN P3 [get_ports {Gm}]
set_property PACKAGE_PIN N3 [get_ports {Pm}]
set_property IOSTANDARD LVCMOS33 [get_ports {an[3]}]
set_property IOSTANDARD LVCMOS33 [get_ports {an[2]}]
set_property IOSTANDARD LVCMOS33 [get_ports {an[1]}]
set_property IOSTANDARD LVCMOS33 [get_ports {an[0]}]
set_property IOSTANDARD LVCMOS33 [get_ports {hex0[3]}]
set_property IOSTANDARD LVCMOS33 [get_ports {hex0[2]}]
set_property IOSTANDARD LVCMOS33 [get_ports {hex0[1]}]
set_property IOSTANDARD LVCMOS33 [get_ports {hex0[0]}]
set_property IOSTANDARD LVCMOS33 [get_ports {hex1[3]}]
set_property IOSTANDARD LVCMOS33 [get_ports {hex1[2]}]
set_property IOSTANDARD LVCMOS33 [get_ports {hex1[1]}]
set_property IOSTANDARD LVCMOS33 [get_ports {hex1[0]}]
set_property IOSTANDARD LVCMOS33 [get_ports {sseg[7]}]
set_property IOSTANDARD LVCMOS33 [get_ports {sseg[6]}]
set_property IOSTANDARD LVCMOS33 [get_ports {sseg[5]}]
set_property IOSTANDARD LVCMOS33 [get_ports {sseg[4]}]
set_property IOSTANDARD LVCMOS33 [get_ports {sseg[3]}]
set_property IOSTANDARD LVCMOS33 [get_ports {sseg[2]}]
set_property IOSTANDARD LVCMOS33 [get_ports {sseg[1]}]
set_property IOSTANDARD LVCMOS33 [get_ports {sseg[0]}]
set_property PACKAGE_PIN W4 [get_ports {an[3]}]
set_property PACKAGE_PIN V4 [get_ports {an[2]}]
set_property PACKAGE_PIN U4 [get_ports {an[1]}]
set_property PACKAGE_PIN U2 [get_ports {an[0]}]
set_property PACKAGE_PIN V17 [get_ports {hex0[0]}]
set_property PACKAGE_PIN V16 [get_ports {hex0[1]}]
set_property PACKAGE_PIN W16 [get_ports {hex0[2]}]
set_property PACKAGE_PIN W17 [get_ports {hex0[3]}]
set_property PACKAGE_PIN W15 [get_ports {hex1[0]}]
set_property PACKAGE_PIN V15 [get_ports {hex1[1]}]
set_property PACKAGE_PIN W14 [get_ports {hex1[2]}]
set_property PACKAGE_PIN W13 [get_ports {hex1[3]}]
set_property PACKAGE_PIN W7 [get_ports {sseg[6]}]
set_property PACKAGE_PIN W6 [get_ports {sseg[5]}]
set_property PACKAGE_PIN U8 [get_ports {sseg[4]}]
set_property PACKAGE_PIN V8 [get_ports {sseg[3]}]
set_property PACKAGE_PIN U5 [get_ports {sseg[2]}]
set_property PACKAGE_PIN V5 [get_ports {sseg[1]}]
set_property PACKAGE_PIN U7 [get_ports {sseg[0]}]
set_property PACKAGE_PIN V7 [get_ports {sseg[7]}]
set_property IOSTANDARD LVCMOS33 [get_ports clk]
set_property IOSTANDARD LVCMOS33 [get_ports reset]
set_property PACKAGE_PIN W5 [get_ports clk]
set_property PACKAGE_PIN U1 [get_ports reset]
set_property CLOCK_DEDICATED_ROUTE FALSE [get_nets clk]
仿真文件:
(我不知道为什么仿真的时候,在加法减法切换那一时钟周期,会有出现计算错误,不懂。但是上板做减法没发现错误)
`timescale 1ns / 1ps
//
// Company:
// Engineer:
//
// Create Date: 2022/11/12 21:53:18
// Design Name:
// Module Name: adder_substractor_32_tb
// Project Name:
// Target Devices:
// Tool Versions:
// Description:
//
// Dependencies:
//
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
//
//
`timescale 1ns/1ns
module adder_substractor_32_tb;
reg [32:1] A;
reg [32:1] B;
reg c0;
wire Gm;
wire Pm;
wire [32:1] S;
wire c32;
reg clk;
reg rst_n;
adder_substractor_32 adder_substractor_32(
.in1(A),
.in2(B),
.add_sub(c0),
.Gm(Gm),
.Pm(Pm),
.sum(S),
.c32(c32),
.clk(clk),
.rst_n(rst_n)
);
initial begin
clk = 0;
rst_n = 1;
@(posedge clk) rst_n = 0;
c0=0 ; A = 32'd1; B = 32'd1;
@(posedge clk) c0=0 ; A = 32'd25; B = 32'd78;
@(posedge clk) c0=0 ; A = 32'd3432523; B = 32'd3245325;
@(posedge clk) c0=0 ; A = 32'd11111; B = 32'd11111;
@(posedge clk) c0=0 ; A = 32'd9999; B = 32'd9999;
@(posedge clk) c0=0 ; A = 32'd456; B = 32'd234;
@(posedge clk); c0=0 ; A = 32'd245; B = 32'd678;
@(posedge clk) c0=0 ; A = 32'd0; B = 32'd0;
@(posedge clk); c0=0 ; A = 32'd25; B = 32'd78;
@(posedge clk) c0=0 ; A = 32'd3423; B = 32'd3245325;
@(posedge clk); c0=0 ; A = 32'd11111; B = 32'd11111;
@(posedge clk) c0=0 ; A = 32'd99; B = 32'd9999;
@(posedge clk) c0=0 ; A = 32'd45; B = 32'd234;
@(posedge clk) c0=0 ; A = 32'd245; B = 32'd678;
repeat(10) @(posedge clk);
$finish;
end
always #10 clk = ~clk;
endmodule
仿真结果如下:
因为使两级流水线,每级消耗一个时钟周期。所以得到第一个结果需要两个时钟周期,但是在执行重复操作的时候,只需要一个时钟周期就可以获得接下来的计算结果。
当输入的值每个时钟周期都在变化一次时,计算结果也是每个时钟周期产生相应的变化。但是需要延迟两个时钟周期才得到对应的计算结果。
遇到的低级错误:模块之间的reset都不对应。
vivado中,因模块之间的reset不“对应”,导致的运行结果与预期不同。