一、单周期CPU
顶层文件cpu_1
module cpu_1(
input clk,
input rst
);
wire [31:0]addr;
wire [31:0]instr;
wire [31:0]ext_imm;
wire [31:0]data1;
wire [31:0]data2;
wire lw_en;
wire sw_en;
wire [31:0]data_mem;
instr_mem instr_mem_cpu(addr,instr);
cpu cpu_cpu(clk,rst,data_mem,instr,addr,data1,data2,ext_imm,lw_en,sw_en);
data_mem data_mem_cpu(ext_imm,data1,data2,lw_en,sw_en,data_mem);
endmodule
次级顶层文件cpu
module cpu(
input clk,
input rst,
input [31:0]data_mem,
input [31:0]instr,
output [31:0]addr,
output [31:0]data1,
output [31:0]data2,
output [31:0]ext_imm,
output lw_en,
output sw_en
);
wire jmp_en;
wire jmpr_en;
wire jmpb_en;
wire [31:0]offset;
wire [6:0]op;
wire [4:0]rd;
wire [2:0]func3;
wire [4:0]rs1;
wire [4:0]rs2;
wire func;
wire [11:0]imm;
wire [31:0]jmp;
wire wr_en;
wire [31:0]wr_data;
wire [31:0]data_in2;
wire sub_en;
wire [31:0]data_out;
wire pass;
pc pc_cpu(clk,rst,jmp_en,jmpr_en,jmpb_en,offset,addr);
decode decode_cpu(instr,op,rd,func3,rs1,rs2,func,imm,jmp);
register register_cpu(clk,wr_en,rs1,rs2,rd,wr_data,data1,data2);
alu alu_cpu(data1,data_in2,func3,func,sub_en,data_out);
control control_cpu(op,func,pass,wr_en,sub_en,jmp_en,jmpr_en,jmpb_en,lw_en,sw_en);
ext32 ext32_cpu(imm,ext_imm);
data_in2_sel data_in2_sel_cpu(data2,ext_imm,op[5],data_in2);
wr_data_sel wr_data_sel_cpu(data_out,addr,data_mem,op[4],op[6],wr_data);
offset_sel offset_sel_cpu(jmp,ext_imm,data1,op[3:2],offset);
pass_jud pass_jud_cpu(data1,data2,func3,pass);
endmodule
PC
module pc(
input clk,
input rst,
input jmp_en,
input jmpr_en,
input jmpb_en,
input [31:0]offset,
output reg [31:0]addr
);
reg [31:0]next_addr;
initial begin addr = 32'd0; next_addr = 32'd0; end
always@(negedge clk or posedge rst)
if(rst) next_addr <= 32'd0;
else if(jmp_en) next_addr <= addr + (offset << 1);
else if(jmpr_en) next_addr <= offset;
else if(jmpb_en) next_addr <= addr + (offset << 1);
else next_addr <= addr + 4;
always@(posedge clk or posedge rst)
if(rst) addr <= 32'd0;
else addr <= next_addr;
endmodule
ALU
module alu(
input [31:0]data_in1,
input [31:0]data_in2,
input [2:0]func3,
input func,
input sub_en,
output reg [31:0]data_out
);
wire [4:0]shamt;
assign shamt = data_in2[4:0];
always@(*)
case(func3)
3'b000:
case(sub_en)
1'b0: data_out <= data_in1 + data_in2;
1'b1: data_out <= data_in1 - data_in2;
endcase
3'b001: data_out <= data_in1 << shamt;
3'b010:
if(data_in1[31] > data_in2[31]) data_out <= 32'd1;
else if(data_in1[31] == data_in2[31])begin
if(data_in1[31] < data_in2) data_out <= 32'd1;
else data_out <= 32'd0;
end else data_out <= 32'd0;
3'b011:
if(data_in1 < data_in2) data_out <= 32'd1;
else data_out <= 32'd0;
3'b100: data_out <= data_in1 ^ data_in2;
3'b101:
case(func)
1'b0: data_out <= data_in1 >> shamt;
1'b1: data_out <= {{31{data_in1[31]}},data_in1} >> shamt;
endcase
3'b110: data_out <= data_in1 | data_in2;
3'b111: data_out <= data_in1 & data_in2;
default: data_out <= 32'd0;
endcase
endmodule
Control
module control(
input [6:0]op,
input func,
input pass,
output wr_en,
output sub_en,
output jmp_en,
output jmpr_en,
output jmpb_en,
output lw_en,
output sw_en
);
assign wr_en = ((op == 7'b0_110_011)||(op == 7'b0_010_011)||(op == 7'b1_101_111)||(op == 7'b1_100_111)||(op == 7'b0_000_011)) ? 1'b1 : 1'b0;
assign sub_en = ((op == 7'b0_110_011)&func) ? 1'b1 : 1'b0;
assign jmp_en = (op == 7'b1_101_111) ? 1'b1 : 1'b0;
assign jmpr_en = (op == 7'b1_100_111) ? 1'b1 : 1'b0;
assign jmpb_en = ((op == 7'b1_100_011)&pass) ? 1'b1 : 1'b0;
assign lw_en = (op == 7'b0_000_011) ? 1'b1 : 1'b0;
assign sw_en = (op == 7'b0_100_011) ? 1'b1 : 1'b0;
endmodule
data_in2_sel
module data_in2_sel(
input [31:0]data2,
input [31:0]ext_imm,
input sel,
output [31:0]data_in2
);
assign data_in2 = sel ? data2 : ext_imm;
endmodule
次级顶层文件Data_mem
module data_mem(
input [31:0]ext_imm,
input [31:0]data1,
input [31:0]data2,
input lw_en,
input sw_en,
output reg [31:0]data_mem
);
wire [31:0]addr;
reg [7:0]data[0:31];
assign addr = ext_imm + data1;
always@(lw_en)begin
data_mem[7:0] <= data[addr];
data_mem[15:8] <= data[addr+1];
data_mem[23:16] <= data[addr+2];
data_mem[31:24] <= data[addr+3];
end
always@(sw_en)begin
data[addr+3] <= data2[7:0];
data[addr+2] <= data2[15:8];
data[addr+1] <= data2[23:16];
data[addr] <= data2[31:24];
end
endmodule
Decode
module decode(
input [31:0]instr,
output [6:0]op,
output [4:0]rd,
output [2:0]func3,
output [4:0]rs1,
output [4:0]rs2,
output func,
output [11:0]imm,
output [31:0]jmp
);
assign op = instr[6:0];
assign rd = instr[11:7];
assign func3 = instr[14:12];
assign rs1 = instr[19:15];
assign rs2 = instr[24:20];
assign func = instr[30];
assign jmp = {{12{instr[31]}},instr[31],instr[19:12],instr[20],instr[30:21]};
assign imm = (op[6]&op[5]&~op[4]&~op[3]&op[2]&op[1]&op[0])*instr[31:20]+
(op[6]&op[5]&~op[4]&~op[3]&~op[2]&op[1]&op[0])*{instr[31],instr[7],instr[30:25],instr[11:8]}+
(~op[6]&~op[5]&~op[4]&~op[3]&~op[2]&op[1]&op[0])*instr[31:20]+
(~op[6]&op[5]&~op[4]&~op[3]&~op[2]&op[1]&op[0])*{instr[31:25],instr[11:7]}+
(~op[6]&~op[5]&op[4]&~op[3]&~op[2]&op[1]&op[0])*instr[31:20];
endmodule
ext32
module ext32(
input [11:0]imm,
output [31:0]ext_imm
);
assign ext_imm = {{20{imm[11]}},imm};
endmodule
次级顶层文件Instr_mem
module instr_mem(
input [31:0]addr,
output reg [31:0]instr
);
reg [7:0]instr_mem[0:31];
initial $readmemb("C:/Users/86136/Desktop/vivado/Quartus/cpu/cpu.txt",instr_mem);
always@(addr)begin
instr[7:0] <= instr_mem[addr+3];
instr[15:8] <= instr_mem[addr+2];
instr[23:16] <= instr_mem[addr+1];
instr[31:24] <= instr_mem[addr];
end
endmodule
offset_sel
module offset_sel(
input [31:0]jmp,
input [31:0]ext_imm,
input [31:0]data1,
input [1:0]sel,
output reg [31:0]offset
);
always@(*)
case(sel)
2'b11: offset <= jmp;
2'b01: offset <= ext_imm + data1;
2'b00: offset <= ext_imm;
default: offset <= 31'd0;
endcase
endmodule
pass_jud
module pass_jud(
input [31:0]data1,
input [31:0]data2,
input [2:0]func3,
output reg pass
);
always@(*)
case(func3)
3'b000:
if(data1 == data2) pass <= 1'b1;
else pass <= 1'b0;
3'b001:
if(data1 != data2) pass <= 1'b1;
else pass <= 1'b0;
3'b100:
if(data1[31] > data2[31]) pass <= 32'd1;
else if(data1[31] == data2[31])begin
if(data1[31] < data2) pass <= 32'd1;
else pass <= 32'd0;
end else pass <= 32'd0;
3'b101:
if(data1[31] > data2[31]) pass <= 32'd0;
else if(data1[31] == data2[31])begin
if(data1[31] < data2) pass <= 32'd0;
else pass <= 32'd1;
end else pass <= 32'd1;
3'b110:
if(data1 < data2) pass <= 1'b1;
else pass <= 1'b0;
3'b111:
if(data1 < data2) pass <= 1'b0;
else pass <= 1'b1;
default: pass <= 1'b0;
endcase
endmodule
Register
module register(
input clk,
input wr_en,
input [4:0]rs1,
input [4:0]rs2,
input [4:0]rd,
input [31:0]wr_data,
output [31:0]data1,
output [31:0]data2
);
reg [31:0]register[1:31];
assign data1 = (rs1 != 0) ? register[rs1] : 0;
assign data2 = (rs2 != 0) ? register[rs2] : 0;
always@(negedge clk)
if(wr_en) register[rd] <= wr_data;
endmodule
wr_data_sel
module wr_data_sel(
input [31:0]data_out,
input [31:0]addr,
input [31:0]data_mem,
input sel1,
input sel2,
output [31:0]wr_data
);
assign wr_data = sel1 ? data_out : (sel2 ? (addr + 4) : data_mem);
endmodule
仿真文件sim
`timescale 1ns/1ns
`define clk_period 20
module sim;
reg clk;
reg rst;
cpu_1 cpu_1(.clk(clk),.rst(rst));
initial clk = 1'b1;
always#(`clk_period/2) clk = ~clk;
initial begin
rst = 1'b0;
#100;
rst = 1'b1;
#100;
rst = 1'b0;
end
endmodule
二、五级流水线
顶层文件cpu_2
module cpu_2(
input clk,
input rst
);
wire [31:0]addr;
wire [31:0]instr;
wire [31:0]wr_addr_s;
wire [31:0]data2_s;
wire lw_en_s;
wire sw_en_s;
wire [31:0]data_mem;
instr_mem instr_mem_cpu(addr,instr);
cpu cpu_cpu(clk,rst,instr,data_mem,addr,wr_addr_s,data2_s,lw_en_s,sw_en_s);
data_mem data_mem_cpu(clk,wr_addr_s,data2_s,lw_en_s,sw_en_s,data_mem);
endmodule
cpu
module cpu(
input clk,
input rst,
input [31:0]instr,
input [31:0]data_mem,
output [31:0]addr,
output [31:0]wr_addr_s,
output [31:0]data2_s,
output [31:0]lw_en_s,
output [31:0]sw_en_s
);
wire [31:0]addr_r;
wire [31:0]instr_r;
wire [6:0]op;
wire [4:0]rd;
wire [4:0]rd_d;
wire [4:0]rd_s;
wire [2:0]func3;
wire [2:0]func3_d;
wire [2:0]func3_r;
wire [4:0]rs1;
wire [4:0]rs2;
wire func;
wire func_d;
wire func_r;
wire [11:0]imm;
wire [31:0]jmp;
wire sub_en;
wire sub_en_r;
wire [31:0]data1;
wire [31:0]data1_d;
wire [31:0]data1_r;
wire [31:0]data2;
wire [31:0]data2_d;
wire [31:0]data_out;
wire wr_en;
wire wr_en_s;
wire [31:0]wr_data;
wire [31:0]wr_data_r;
wire [31:0]ext_imm;
wire [31:0]data_in2;
wire [31:0]data_in2_r;
wire pass;
wire jmp_en;
wire cancel;
wire [31:0]offset;
wire [31:0]offset_r;
wire [31:0]wr_addr;
wire [31:0]data;
wire [31:0]data_r;
wire [31:0]lw_en;
wire [31:0]sw_en;
pc pc_cpu(clk,rst,cancel,offset_r,addr);
receive #32 r_instr(clk,instr,instr_r);
receive #32 r_addr(clk,addr,addr_r);
decode decode_cpu(instr_r,op,rd,func3,rs1,rs2,func,imm,jmp);
ext32 ext32_cpu(imm,ext_imm);
pass_jud pass_jud_cpu(data1,data2,func3,pass);
control control_cpu(clk,op,func,pass,cancel,sub_en,wr_en,jmp_en,lw_en,sw_en);
data_in2_sel data_in2_sel_cpu(clk,data2,ext_imm,op[5],data_in2);
offset_sel offset_sel_cpu(clk,jmp,addr_r,ext_imm,data1,op[3:2],offset);
wr_addr wr_addr_cpu(clk,ext_imm,data1,wr_addr);
delay #32 d_data1(clk,data1,data1_d);
delay #32 d_data2(clk,data2,data2_d);
delay #5 d_rd(clk,rd,rd_d);
delay #3 d_func3(clk,func3,func3_d);
delay #1 d_func(clk,func,func_d);
shift #(2,32) s_wr_addr(clk,wr_addr,wr_addr_s);
shift #(2,32) s_data2(clk,data2_d,data2_s);
shift #(2,1) s_lw_en(clk,lw_en,lw_en_s);
shift #(2,1) s_sw_en(clk,sw_en,sw_en_s);
shift #(3,5) s_rd(clk,rd_d,rd_s);
shift #(3,1) s_wr_en(clk,wr_en,wr_en_s);
receive #32 r_offset(clk,offset,offset_r);
receive #32 r_data1(clk,data1_d,data1_r);
receive #32 r_data_in2(clk,data_in2,data_in2_r);
receive #3 r_func3(clk,func3_d,func3_r);
receive #1 r_func(clk,func_d,func_r);
receive #1 r_sub_en(clk,sub_en,sub_en_r);
receive #1 r_jmp_en(clk,jmp_en,cancel);
alu alu(data1_r,data_in2_r,func3_r,func_r,sub_en_r,data_out);
wr_data_sel wr_data_sel1(clk,data_out,addr_r,cancel,wr_data);
receive #32 r_wr_data(clk,wr_data,wr_data_r);
wr_data_sel wr_data_sel2(clk,wr_data_r,data_mem,lw_en_s,data);
receive #32 r_data(clk,data,data_r);
register register_cpu(clk,rs1,rs2,rd_s,data_r,wr_en_s,data1,data2);
endmodule
ALU
module alu(
input [31:0]data_in1,
input [31:0]data_in2,
input [2:0]func3,
input func,
input sub_en,
output reg [31:0]data_out
);
wire [4:0]shamt;
assign shamt = data_in2[4:0];
always@(*)
case(func3)
3'b000:
case(sub_en)
1'b0: data_out <= data_in1 + data_in2;
1'b1: data_out <= data_in1 - data_in2;
endcase
3'b001: data_out <= data_in1 << shamt;
3'b010:
if(data_in1[31] > data_in2[31]) data_out <= 32'd1;
else if(data_in1[31] == data_in2[31])begin
if(data_in1[31] < data_in2) data_out <= 32'd1;
else data_out <= 32'd0;
end else data_out <= 32'd0;
3'b011:
if(data_in1 < data_in2) data_out <= 32'd1;
else data_out <= 32'd0;
3'b100: data_out <= data_in1 ^ data_in2;
3'b101:
case(func)
1'b0: data_out <= data_in1 >> shamt;
1'b1: data_out <= {{31{data_in1[31]}},data_in1} >> shamt;
endcase
3'b110: data_out <= data_in1 | data_in2;
3'b111: data_out <= data_in1 & data_in2;
default: data_out <= 32'd0;
endcase
endmodule
Control
module control(
input clk,
input [6:0]op,
input func,
input pass,
input cancel,
output reg sub_en,
output reg wr_en,
output reg jmp_en,
output reg lw_en,
output reg sw_en
);
always@(negedge clk)begin
if(cancel)begin
sw_en <= 1'b0; lw_en <= 1'b0; jmp_en <= 1'b0; sub_en <= 1'b0; wr_en <= 1'b0;
end else begin
case(op)
7'b0_110_011: begin sw_en <= 1'b0; lw_en <= 1'b0; jmp_en <= 1'b0; sub_en <= func ? 1'b1 : 1'b0; wr_en <= 1'b1; end
7'b0_010_011: begin sw_en <= 1'b0; lw_en <= 1'b0; jmp_en <= 1'b0; sub_en <= 1'b0; wr_en <= 1'b1; end
7'b1_101_111: begin sw_en <= 1'b0; lw_en <= 1'b0; jmp_en <= 1'b1; sub_en <= 1'b0; wr_en <= 1'b1; end
7'b1_100_111: begin sw_en <= 1'b0; lw_en <= 1'b0; jmp_en <= 1'b1; sub_en <= 1'b0; wr_en <= 1'b1; end
7'b1_100_011: begin sw_en <= 1'b0; lw_en <= 1'b0; jmp_en <= pass; sub_en <= 1'b0; wr_en <= 1'b0; end
7'b0_000_011: begin sw_en <= 1'b0; lw_en <= 1'b1; jmp_en <= 1'b0; sub_en <= 1'b0; wr_en <= 1'b1; end
7'b0_100_011: begin sw_en <= 1'b1; lw_en <= 1'b0; jmp_en <= 1'b0; sub_en <= 1'b0; wr_en <= 1'b0; end
default: begin sw_en <= 1'b0; lw_en <= 1'b0; jmp_en <= 1'b0; sub_en <= 1'b0; wr_en <= 1'b0; end
endcase
end
end
endmodule
data_in2_sel
module data_in2_sel(
input clk,
input [31:0]data2,
input [31:0]ext_imm,
input sel,
output reg [31:0]data_in2
);
always@(negedge clk) data_in2 <= sel ? data2 : ext_imm;
endmodule
Data_mem
module data_mem(
input clk,
input [31:0]addr,
input [31:0]data2,
input lw_en,
input sw_en,
output reg [31:0]data_mem
);
reg [7:0]data[0:255];
always@(lw_en)
if(lw_en)begin
data_mem[7:0] <= data[addr+3];
data_mem[15:8] <= data[addr+2];
data_mem[23:16] <= data[addr+1];
data_mem[31:24] <= data[addr];
end
always@(negedge clk)
if(sw_en)begin
data[addr+3] <= data2[7:0];
data[addr+2] <= data2[15:8];
data[addr+1] <= data2[23:16];
data[addr] <= data2[31:24];
end
endmodule
Decode
module decode(
input [31:0]instr,
output [6:0]op,
output [4:0]rd,
output [2:0]func3,
output [4:0]rs1,
output [4:0]rs2,
output func,
output [11:0]imm,
output [31:0]jmp
);
assign op = instr[6:0];
assign rd = instr[11:7];
assign func3 = instr[14:12];
assign rs1 = instr[19:15];
assign rs2 = instr[24:20];
assign func = instr[30];
assign jmp = {{12{instr[31]}},instr[31],instr[19:12],instr[20],instr[30:21]};
assign imm = (op[6]&op[5]&~op[4]&~op[3]&op[2]&op[1]&op[0])*instr[31:20]+
(op[6]&op[5]&~op[4]&~op[3]&~op[2]&op[1]&op[0])*{instr[31],instr[7],instr[30:25],instr[11:8]}+
(~op[6]&~op[5]&~op[4]&~op[3]&~op[2]&op[1]&op[0])*instr[31:20]+
(~op[6]&op[5]&~op[4]&~op[3]&~op[2]&op[1]&op[0])*{instr[31:25],instr[11:7]}+
(~op[6]&~op[5]&op[4]&~op[3]&~op[2]&op[1]&op[0])*instr[31:20];
endmodule
delay
module delay #(parameter N = 32)(
input clk,
input [N-1:0]data,
output reg [N-1:0]data_d
);
always@(negedge clk) data_d <= data;
endmodule
ext32
module ext32 #(parameter N = 12)(
input [N-1:0]imm,
output [31:0]ext_imm
);
assign ext_imm = {{(32-N){imm[N-1]}},imm};
endmodule
Instr_mem
module instr_mem(
input [31:0]addr,
output reg [31:0]instr
);
reg [7:0]instr_mem[0:255];
initial $readmemb("C:/Users/86136/Desktop/vivado/Quartus/cpu/cpu.txt",instr_mem);
always@(addr)begin
instr[7:0] <= instr_mem[addr+3];
instr[15:8] <= instr_mem[addr+2];
instr[23:16] <= instr_mem[addr+1];
instr[31:24] <= instr_mem[addr];
end
endmodule
offset_sel
module offset_sel(
input clk,
input [31:0]jmp,
input [31:0]addr,
input [31:0]ext_imm,
input [31:0]data1,
input [1:0]sel,
output reg [31:0]offset
);
always@(negedge clk)
case(sel)
2'b11: offset <= addr + (jmp << 1);
2'b01: offset <= ext_imm + data1;
2'b00: offset <= addr + (ext_imm << 1);
default: offset <= 32'd0;
endcase
endmodule
pass_jud
module pass_jud(
input [31:0]data1,
input [31:0]data2,
input [2:0]func3,
output reg pass
);
always@(*)
case(func3)
3'b000:
if(data1 == data2) pass <= 1'b1;
else pass <= 1'b0;
3'b001:
if(data1 != data2) pass <= 1'b1;
else pass <= 1'b0;
3'b100:
if(data1[31] > data2[31]) pass <= 32'd1;
else if(data1[31] == data2[31])begin
if(data1[31] < data2) pass <= 32'd1;
else pass <= 32'd0;
end else pass <= 32'd0;
3'b101:
if(data1[31] > data2[31]) pass <= 32'd0;
else if(data1[31] == data2[31])begin
if(data1[31] < data2) pass <= 32'd0;
else pass <= 32'd1;
end else pass <= 32'd1;
3'b110:
if(data1 < data2) pass <= 1'b1;
else pass <= 1'b0;
3'b111:
if(data1 < data2) pass <= 1'b0;
else pass <= 1'b1;
default: pass <= 1'b0;
endcase
endmodule
PC
module pc(
input clk,
input rst,
input cancel,
input [31:0]offset,
output reg [31:0]addr
);
initial addr = 32'd0;
always@(negedge clk or posedge rst)
if(rst) addr <= 32'd0;
else if(cancel) addr <= offset;
else addr <= addr + 4;
endmodule
receive
module receive #(parameter N = 32)(
input clk,
input [N-1:0]data,
output reg [N-1:0]data_r
);
always@(posedge clk) data_r <= data;
endmodule
Register
module register(
input clk,
input [4:0]rs1,
input [4:0]rs2,
input [4:0]rd,
input [31:0]wr_data,
input wr_en,
output [31:0]data1,
output [31:0]data2
);
reg [31:0]register[1:31];
assign data1 = (rs1 != 0) ? register[rs1] : 0;
assign data2 = (rs2 != 0) ? register[rs2] : 0;
always@(negedge clk)
if(wr_en) register[rd] <= wr_data;
endmodule
shift
module shift #(parameter M = 3, N = 32)(
input clk,
input [N-1:0]data,
output [N-1:0]data_s
);
reg [M*N-1:0]shift;
always@(posedge clk) shift <= {shift[(M-1)*N-1:0],data};
assign data_s = shift[M*N-1:(M-1)*N];
endmodule
wr_addr
module wr_addr(
input clk,
input [31:0]ext_imm,
input [31:0]data1,
output reg [31:0]wr_addr
);
always@(negedge clk) wr_addr <= ext_imm + data1;
endmodule
wr_data_sel
module wr_data_sel(
input clk,
input [31:0]data_out,
input [31:0]addr,
input sel,
output reg [31:0]wr_data
);
always@(negedge clk) wr_data <= sel ? addr : data_out;
endmodule
仿真文件sim
`timescale 1ns/1ns
`define clk_period 20
module sim;
reg clk;
reg rst;
initial clk = 1'b1;
always#(`clk_period/2) clk = ~clk;
initial begin
rst = 0;
#100;
rst = 1;
#100;
rst =0;
end
cpu_2 cpux(.clk(clk),.rst(rst));
endmodule