一个单周期riscvCPU
什么是单周期CPU?
单周期CPU是指在一个时钟周期内,CPU完成以下五个步骤:
1、IF-取指令。根据PC中的地址在指令存储器中取出一条指令。
2、ID-指令译码。由取出的指令生成各种控制信号,明确该指令要进行的行为。
3、EX-执行。对指令的各种操作数进行运算。
4、MEM-存储器访问。将数据写入存储器或从存储器中读出数据。
5、WB-写回。将指令运算结果存入指定的寄存器。
IF-取指令:
在取指令阶段需要CPU根据此时的PC值进行取指令。PC可能存在的情况:
1、分支指令 beq bnq
2、跳跃指令 jal jalr
3、正常指令
为了更好的理解上述过程我们可以通过两个always块进行实现。
第一个always块实现:根据时钟跳转此时的PC值等于下一次的PC值:
第二个always块实现对下一次pc的取值
`include "defines.v"
module ifu (
input rst,
input clk,
input jump_flag,
input [`REG_BUS]jump_addr,
output [`REG_BUS]in_addr
);
reg [63:0] pc;
reg [63:0] next_pc;
always @(posedge clk) begin //上升沿触发
pc <= next_pc;
end
always @(*)
if(rst == 1) begin next_pc = 64'h80000000; end
else if(jump_flag == 1'b1) begin next_pc = jump_addr; end
else begin next_pc = pc + 4; end
assign in_addr = pc;
endmodule
指令译码
根据PC得到的inst我们需要对其进行译码,实现判断该指令的类型,本人采用的译码思路如下:
1、先将所有的域提取出来,包括;立即数域,function域、opcode域,部分代码展示:
指令域解析
wire[6:0] opcode = inst[6:0];
wire[2:0] funct3 = inst[14:12];
wire[6:0] funct7 = inst[31:25];
wire[4:0] rd = inst[11:7];
wire[4:0] rs1 = inst[19:15];
wire[4:0] rs2 = inst[24:20];
wire[11:0] type_i_imm_11_0 = inst[31:20];
wire[6:0] type_s_imm_11_5 = inst[31:25];
wire[4:0] type_s_imm_4_0 = inst[11:7];
wire[6:0] type_b_imm_12_10_5 = inst[31:25];
wire[4:0] type_b_imm_4_1_11 = inst[11:7];
wire[19:0] type_u_imm_31_12 = inst[31:12];
wire[19:0] type_j_imm_31_12 = inst[31:12];
// 指令opcode域的取值
wire opcode_0110111 = (opcode == 7'b0110111);
wire opcode_0010111 = (opcode == 7'b0010111);
wire opcode_1101111 = (opcode == 7'b1101111);
wire opcode_1100111 = (opcode == 7'b1100111);
wire opcode_1100011 = (opcode == 7'b1100011);
wire opcode_0000011 = (opcode == 7'b0000011);
wire opcode_0100011 = (opcode == 7'b0100011);
wire opcode_0010011 = (opcode == 7'b0010011);
wire opcode_0110011 = (opcode == 7'b0110011);
wire opcode_0001111 = (opcode == 7'b0001111);
wire opcode_1110011 = (opcode == 7'b1110011);
// 指令funct3域的取值
wire funct3_000 = (funct3 == 3'b000);
wire funct3_001 = (funct3 == 3'b001);
wire funct3_010 = (funct3 == 3'b010);
wire funct3_011 = (funct3 == 3'b011);
wire funct3_100 = (funct3 == 3'b100);
wire funct3_101 = (funct3 == 3'b101);
wire funct3_110 = (funct3 == 3'b110);
wire funct3_111 = (funct3 == 3'b111);
// 指令funct7域的取值
wire funct7_0000000 = (funct7 == 7'b0000000);
wire funct7_0100000 = (funct7 == 7'b0100000);
wire funct7_0000001 = (funct7 == 7'b0000001);
根据域解析指令
可以通过上述的域进行指令的解析,部分代码如下所示:
wire inst_lui = opcode_0110111;
wire inst_auipc = opcode_0010111;
wire inst_jal = opcode_1101111;
wire inst_jalr = opcode_1100111 & funct3_000;
wire inst_beq = opcode_1100011 & funct3_000;
wire inst_bne = opcode_1100011 & funct3_001;
wire inst_blt = opcode_1100011 & funct3_100;
wire inst_bge = opcode_1100011 & funct3_101;
wire inst_bltu = opcode_1100011 & funct3_110;
wire inst_bgeu = opcode_1100011 & funct3_111;
wire inst_lb = opcode_0000011 & funct3_000;
wire inst_lh = opcode_0000011 & funct3_001;
wire inst_lw = opcode_0000011 & funct3_010;
wire inst_lbu = opcode_0000011 & funct3_100;
wire inst_lhu = opcode_0000011 & funct3_101;
wire inst_sb = opcode_0100011 & funct3_000;
wire inst_sh = opcode_0100011 & funct3_001;
wire inst_sw = opcode_0100011 & funct3_010;
wire inst_addi = opcode_0010011 & funct3_000;
wire inst_slti = opcode_0010011 & funct3_010;
wire inst_sltiu = opcode_0010011 & funct3_011;
wire inst_xori = opcode_0010011 & funct3_100;
wire inst_ori = opcode_0010011 & funct3_110;
wire inst_andi = opcode_0010011 & funct3_111;
wire inst_slli = opcode_0010011 & funct3_001 & funct7_0000000;
wire inst_srli = opcode_0010011 & funct3_101 & funct7_0000000;
wire inst_srai = opcode_0010011 & funct3_101 & funct7_0100000;
wire inst_add = opcode_0110011 & funct3_000 & funct7_0000000;
wire inst_sub = opcode_0110011 & funct3_000 & funct7_0100000;
wire inst_sll = opcode_0110011 & funct3_001 & funct7_0000000;
wire inst_slt = opcode_0110011 & funct3_010 & funct7_0000000;
wire inst_sltu = opcode_0110011 & funct3_011 & funct7_0000000;
wire inst_xor = opcode_0110011 & funct3_100 & funct7_0000000;
wire inst_srl = opcode_0110011 & funct3_101 & funct7_0000000;
wire inst_sra = opcode_0110011 & funct3_101 & funct7_0100000;
wire inst_or = opcode_0110011 & funct3_110 & funct7_0000000;
wire inst_and = opcode_0110011 & funct3_111 & funct7_0000000;
wire inst_fence = opcode_0001111 & funct3_000;
wire inst_ecall = (inst == `INST_ECALL);
wire inst_ebreak = (inst == `INST_EBREAK);
wire inst_fence_i = opcode_0001111 & funct3_001;
wire inst_csrrw = opcode_1110011 & funct3_001;
wire inst_csrrs = opcode_1110011 & funct3_010;
wire inst_csrrc = opcode_1110011 & funct3_011;
wire inst_csrrwi = opcode_1110011 & funct3_101;
wire inst_csrrsi = opcode_1110011 & funct3_110;
wire inst_csrrci = opcode_1110011 & funct3_111;
wire inst_mul = opcode_0110011 & funct3_000 & funct7_0000001;
wire inst_mulh = opcode_0110011 & funct3_001 & funct7_0000001;
wire inst_mulhsu = opcode_0110011 & funct3_010 & funct7_0000001;
wire inst_mulhu = opcode_0110011 & funct3_011 & funct7_0000001;
wire inst_div = opcode_0110011 & funct3_100 & funct7_0000001;
wire inst_divu = opcode_0110011 & funct3_101 & funct7_0000001;
wire inst_rem = opcode_0110011 & funct3_110 & funct7_0000001;
wire inst_remu = opcode_0110011 & funct3_111 & funct7_0000001;
wire inst_nop = (inst == `INST_NOP);
wire inst_mret = (inst == `INST_MRET);
将指令进行分类
由于不同的指令执行的方式不同,我们将指令分为:
1、ALU类指令:
inst_lui inst_auipc inst_add inst_addi inst_sub inst_sll inst_slli inst_slt inst_slti inst_sltu inst_sltiu inst_xor inst_xori inst_srl inst_srli inst_sra inst_srai inst_or inst_ori inst_and inst_andi
2、乘除指令
inst_mul inst_mulh inst_mulhsu inst_mulhu inst_div inst_rem inst_remu
3、跳跃指令
inst_jal inst_jalr inst_beq inst_bne inst_blt inst_bge inst_bltu inst_bgeu inst_jalr
4、存储指令
inst_lb inst_lh inst_lw inst_lbu inst_lhu inst_sb inst_sh inst_sw
5、系统特殊指令
inst_ecall inst_ebreak inst_nop inst_mret inst_fence inst_fence_i
指令数据线
通过一根数据线将分类指令的值进行存储
wire[`DECINFO_ALU_BUS_WIDTH-1:0] dec_alu_info_bus;
assign dec_alu_info_bus[`DECINFO_GRP_BUS] = `DECINFO_GRP_ALU;
assign dec_alu_info_bus[`DECINFO_ALU_LUI] = inst_lui;
assign dec_alu_info_bus[`DECINFO_ALU_AUIPC] = inst_auipc;
assign dec_alu_info_bus[`DECINFO_ALU_ADD] = inst_add | inst_addi; //有立即数的可以共用一个ALU
assign dec_alu_info_bus[`DECINFO_ALU_SUB] = inst_sub;
assign dec_alu_info_bus[`DECINFO_ALU_SLL] = inst_sll | inst_slli;
assign dec_alu_info_bus[`DECINFO_ALU_SLT] = inst_slt | inst_slti;
assign dec_alu_info_bus[`DECINFO_ALU_SLTU] = inst_sltu | inst_sltiu;
assign dec_alu_info_bus[`DECINFO_ALU_XOR] = inst_xor | inst_xori;
assign dec_alu_info_bus[`DECINFO_ALU_SRL] = inst_srl | inst_srli;
assign dec_alu_info_bus[`DECINFO_ALU_SRA] = inst_sra | inst_srai;
assign dec_alu_info_bus[`DECINFO_ALU_OR] = inst_or | inst_ori;
assign dec_alu_info_bus[`DECINFO_ALU_AND] = inst_and | inst_andi;
assign dec_alu_info_bus[`DECINFO_ALU_OP2IMM] = opcode_0010011 | inst_lui | inst_auipc;
assign dec_alu_info_bus[`DECINFO_ALU_OP1PC] = inst_auipc;
wire[`DECINFO_BJP_BUS_WIDTH-1:0] dec_bjp_info_bus;
assign dec_bjp_info_bus[`DECINFO_GRP_BUS] = `DECINFO_GRP_BJP;
assign dec_bjp_info_bus[`DECINFO_BJP_JUMP] = inst_jal | inst_jalr;
assign dec_bjp_info_bus[`DECINFO_BJP_BEQ] = inst_beq;
assign dec_bjp_info_bus[`DECINFO_BJP_BNE] = inst_bne;
assign dec_bjp_info_bus[`DECINFO_BJP_BLT] = inst_blt;
assign dec_bjp_info_bus[`DECINFO_BJP_BGE] = inst_bge;
assign dec_bjp_info_bus[`DECINFO_BJP_BLTU] = inst_bltu;
assign dec_bjp_info_bus[`DECINFO_BJP_BGEU] = inst_bgeu;
assign dec_bjp_info_bus[`DECINFO_BJP_OP1RS1] = inst_jalr;
wire[`DECINFO_MULDIV_BUS_WIDTH-1:0] dec_muldiv_info_bus;
assign dec_muldiv_info_bus[`DECINFO_GRP_BUS] = `DECINFO_GRP_MULDIV;
assign dec_muldiv_info_bus[`DECINFO_MULDIV_MUL] = inst_mul;
assign dec_muldiv_info_bus[`DECINFO_MULDIV_MULH] = inst_mulh;
assign dec_muldiv_info_bus[`DECINFO_MULDIV_MULHSU] = inst_mulhsu;
assign dec_muldiv_info_bus[`DECINFO_MULDIV_MULHU] = inst_mulhu;
assign dec_muldiv_info_bus[`DECINFO_MULDIV_DIV] = inst_div;
assign dec_muldiv_info_bus[`DECINFO_MULDIV_DIVU] = inst_divu;
assign dec_muldiv_info_bus[`DECINFO_MULDIV_REM] = inst_rem;
assign dec_muldiv_info_bus[`DECINFO_MULDIV_REMU] = inst_remu;
wire[`DECINFO_CSR_BUS_WIDTH-1:0] dec_csr_info_bus;
assign dec_csr_info_bus[`DECINFO_GRP_BUS] = `DECINFO_GRP_CSR;
assign dec_csr_info_bus[`DECINFO_CSR_CSRRW] = inst_csrrw | inst_csrrwi;
assign dec_csr_info_bus[`DECINFO_CSR_CSRRS] = inst_csrrs | inst_csrrsi;
assign dec_csr_info_bus[`DECINFO_CSR_CSRRC] = inst_csrrc | inst_csrrci;
assign dec_csr_info_bus[`DECINFO_CSR_RS1IMM] = inst_csrrwi | inst_csrrsi | inst_csrrci;
assign dec_csr_info_bus[`DECINFO_CSR_CSRADDR] = inst[31:20];
wire[`DECINFO_MEM_BUS_WIDTH-1:0] dec_mem_info_bus;
assign dec_mem_info_bus[`DECINFO_GRP_BUS] = `DECINFO_GRP_MEM;
assign dec_mem_info_bus[`DECINFO_MEM_LB] = inst_lb;
assign dec_mem_info_bus[`DECINFO_MEM_LH] = inst_lh;
assign dec_mem_info_bus[`DECINFO_MEM_LW] = inst_lw;
assign dec_mem_info_bus[`DECINFO_MEM_LBU] = inst_lbu;
assign dec_mem_info_bus[`DECINFO_MEM_LHU] = inst_lhu;
assign dec_mem_info_bus[`DECINFO_MEM_SB] = inst_sb;
assign dec_mem_info_bus[`DECINFO_MEM_SH] = inst_sh;
assign dec_mem_info_bus[`DECINFO_MEM_SW] = inst_sw;
wire[`DECINFO_SYS_BUS_WIDTH-1:0] dec_sys_info_bus;
assign dec_sys_info_bus[`DECINFO_GRP_BUS] = `DECINFO_GRP_SYS;
assign dec_sys_info_bus[`DECINFO_SYS_ECALL] = inst_ecall;
assign dec_sys_info_bus[`DECINFO_SYS_EBREAK] = inst_ebreak;
assign dec_sys_info_bus[`DECINFO_SYS_NOP] = inst_nop;
assign dec_sys_info_bus[`DECINFO_SYS_MRET] = inst_mret;
assign dec_sys_info_bus[`DECINFO_SYS_FENCE] = inst_fence | inst_fence_i;
该总线的输出指令为:
对上述指令机型判断实现输出数据的提取
assign dec_info_bus_o = ({`DECINFO_WIDTH{op_alu}} & {{`DECINFO_WIDTH-`DECINFO_ALU_BUS_WIDTH{1'b0}}, dec_alu_info_bus}) |
({`DECINFO_WIDTH{op_bjp}} & {{`DECINFO_WIDTH-`DECINFO_BJP_BUS_WIDTH{1'b0}}, dec_bjp_info_bus}) |
({`DECINFO_WIDTH{op_muldiv}} & {{`DECINFO_WIDTH-`DECINFO_MULDIV_BUS_WIDTH{1'b0}}, dec_muldiv_info_bus}) |
({`DECINFO_WIDTH{op_csr}} & {{`DECINFO_WIDTH-`DECINFO_CSR_BUS_WIDTH{1'b0}}, dec_csr_info_bus}) |
({`DECINFO_WIDTH{op_mem}} & {{`DECINFO_WIDTH-`DECINFO_MEM_BUS_WIDTH{1'b0}}, dec_mem_info_bus}) |
({`DECINFO_WIDTH{op_sys}} & {{`DECINFO_WIDTH-`DECINFO_SYS_BUS_WIDTH{1'b0}}, dec_sys_info_bus});
对不同类型的立即数机型扩展并得到输出
wire[63:0] inst_u_type_imm = {{32{inst[31]}}, inst[31:12], 12'b0};
wire[63:0] inst_j_type_imm = {{44{inst[31]}}, inst[19:12], inst[20], inst[30:21], 1'b0};
wire[63:0] inst_b_type_imm = {{52{inst[31]}}, inst[7], inst[30:25], inst[11:8], 1'b0};
wire[63:0] inst_s_type_imm = {{52{inst[31]}}, inst[31:25], inst[11:7]};
wire[63:0] inst_i_type_imm = {{52{inst[31]}}, inst[31:20]};
wire[63:0] inst_csr_type_imm = {59'h0, inst[19:15]};
wire[63:0] inst_shift_type_imm = {59'h0, inst[24:20]};
assign dec_imm_o = ({64{inst_sel_u_imm}} & inst_u_type_imm) |
({64{inst_sel_j_imm}} & inst_j_type_imm) |
({64{inst_sel_b_imm}} & inst_b_type_imm) |
({64{inst_sel_s_imm}} & inst_s_type_imm) |
({64{inst_sel_i_imm}} & inst_i_type_imm) |
({64{inst_sel_csr_imm}} & inst_csr_type_imm) |
({64{inst_sel_shift_imm}} & inst_shift_type_imm);
根据指令的译码我们的到如下的数据:
1、数据总线的值(包含所取指令的类型)
2、输出立即数的值
3、是否访问rs1 是否访问rs2 是否进行写操作 (三个数据信号) 并且 得到rs1地址 rs2地址 rd的地址
EXE执行
在执行操作中,我们需要执行ALU类型指令与跳跃分支类型的指令,需要通过数据总线对指令进行判断为分支指令还是ALU类型指令:
assign alu_flag = dec_info_bus[`DECINFO_GRP_BUS] == `DECINFO_GRP_ALU;
assign bjp_flag = dec_info_bus[`DECINFO_GRP_BUS] == `DECINFO_GRP_BJP;
得到控制信号实现不同的指令判断
Regfile写回
需要实现对数据寄存器进行初始化,并且判断当写使能的对数据进行写入
部分代码如下所示:
reg [63:0] gpr[0 : 31];
integer i;
always @(posedge clk)
begin
if(rst == 1'b1) begin
for(i = 0; i < 32; i++)
begin gpr[i] <= `ZERO_WORD; end
end else begin
if(en_w == 1'b1)
gpr[waddr] <= w_data;
end
end
assign rdata_1 = gpr[raddr1];
assign rdata_2 = gpr[raddr2];
出现的问题总结
本人在linux环境下,利用verilator进行仿真
1、指令与PC岔开一个周期
出现问题的原因:由于在verilator中无法在时钟上升沿后对pc进行读取
解决的方案:通过DPI-C的机制实现数据的提取,具体实现方式如下:
wire [64-1:0] _mem_data;
import "DPI-C" function void pmem_read(
input longint raddr,
output longint rdata
);
/* 仿真使用,传递当前 pc 给仿真环境,根据pc 取指令 */
always @(*) begin
pmem_read(inst_addr, _mem_data);
end
assign inst_data = _mem_data[31:0];
整个单周期CPU执行指令的顺序
在时钟上升沿实现PC的跳转,随后进行取指令,解析指令,执行执行,写回在下一个时钟周期上升沿到来的时候