整理《Verilog数字系统设计教程》第17章“简化的RISC_CPU设计”所遇到的问题以及解决办法附带仿真图。
RISC_CPU可以分为以下8各部分:
(1)时钟发生器:clk_gen(clk,rst_n,fetch,alu_ena);
(2)指令寄存器:register(clk,rst_n,ena,data,opc_iraddr);
(3)累加器:accum(accum,data,ena,clk,rst_n);
(4)算术逻辑运算单元:alu(clk,alu_out,zero,data,accum,alu_ena,opcode);
(5)数据控制器:datactl(data,in,data_ena);
(6)状态控制器:machinectl(clk,rst_n,ena,fetch);
(7)程序计数器:counter(clk,rst_n,load,ir_addr,pc_addr);
(8)地址多路器:addr(ir_addr,pc_addr,fetch,addr);
1.时钟发生器:clk_gen(clk,rst_n,fetch,alu_ena);其中fetch信号为时钟信号clk的八分频输出,控制地址多路器输出指令地址和数据地址,alu_ena用于控制算数逻辑运算单元。
`timescale 1ns/1ns
module clk_gen(clk,rst_n,fetch,alu_ena);
input clk;
input rst_n;
output fetch;
output alu_ena;
wire clk,rst_n;
reg fetch,alu_ena;
reg [7:0]state;
parameter S1 = 8'b00000001,
S2 = 8'B00000010,
S3 = 8'B00000100,
S4 = 8'B00001000,
S5 = 8'B00010000,
S6 = 8'B00100000,
S7 = 8'B01000000,
S8 = 8'B10000000,
IDLE = 8'B00000000;
always@(posedge clk )
if(!rst_n) begin
fetch<=1'b0;
alu_ena<=1'b0;
state<=IDLE;
end
else begin
case(state)
S1:begin
alu_ena<=1'b1;
state<=S2;
end
S2:begin
alu_ena<=1'b0;
state<=S3;
end
S3:begin
fetch<=1'b1;
state<=S4;
end
S4:begin
state<=S5;
end
S5:begin
state<=S6;
end
S6:begin
state<=S7;
end
S7:begin
state<=S8;
fetch<=1'b0;
end
S8:begin
state<=S1;
end
IDLE:state<=S1;
default:state<=IDLE;
endcase
end
endmodule
- 指令寄存器:register(clk,rst_n,ena,data,opc_iraddr);每条指令为两个字节16位,数组总线宽度位8位,所以一条指令需要分两次读取。一条指令高3位为操作码,低13位为地址。
.
`timescale 1ns/1ns
module register(clk,rst_n,ena,data,opc_iraddr);
input clk;
input rst_n;
input ena;
input[7:0]data;
output reg [15:0]opc_iraddr;
reg state;
always@(posedge clk)
if(!rst_n)begin
opc_iraddr<=16'b0000_0000_0000_0000;
state<=1'b0;
end
else begin
if(ena)begin
casex(state)
1'b0:begin
opc_iraddr[15:8]<=data;//读取高八位
state<=1;
end
1'b1:begin
opc_iraddr[7:0]<=data;//读取低八位
state<=0;
end
default:begin
opc_iraddr[15:0]<=16'bxxxxxxxxxxxxxxxx;
state<=1'bx;
end
endcase
end
else
state<=1'b0;
end
endmodule
3.累加器:accum(accum,data,ena,clk,rst_n);用来存放算术逻辑运算单元alu的运算结果。
module accum(accum,data,ena,clk,rst_n);
output reg[7:0]accum;
input [7:0]data;
input ena;
input clk;
input rst_n;
always@(posedge clk )
if(!rst_n)
accum<=8'd0;
else if(ena)
accum<=data;
endmodule
4.算术逻辑运算单元:alu(clk,alu_out,zero,data,accum,alu_ena,opcode);实现8种操作码的运算。
`timescale 1ns/1ns
module alu(clk,alu_out,zero,data,accum,alu_ena,opcode);
input clk;
output reg [7:0]alu_out;
output zero;
input [7:0]data,accum;
input alu_ena;
input [2:0]opcode;
//操作码定义
parameter HLT =3'B000,
SKZ =3'B001,
ADD =3'B010,
ANDD=3'B011,
XORR=3'B100,
LDA =3'B101,
STO =3'B110,
JMP =3'B111;
assign zero = !accum;
always@(posedge clk )
if(alu_ena)begin
casex(opcode)
HLT:alu_out<=accum;//停机操作
SKZ:alu_out<=accum;//如果累加器的输出accum为0,则跳过下一条语句,否则继续执行
ADD:alu_out<=data+accum;//将累加器中的值与地址所指的ram中的数据相加
ANDD:alu_out<=data&accum;//相与
XORR:alu_out<=data^accum;//相异或
LDA:alu_out<=data;//将指令中给出地址的数据放入累加器
STO:alu_out<=accum;//将累加器中的数据放入指令中给出的地址
JMP:alu_out<=accum;//无条件跳转语句,跳转指令给出目的地址,继续执行
default:alu_out<=8'bxxxx_xxxx;
endcase
end
endmodule
5.数据控制器:datactl(data,in,data_ena);数据控制器的作用是控制算数运算器的结果何时输出到总线上。总线上不同时候传送的东西也不相同,有时候传送rom指令,有时候传送ram数据,有时候传送算数运算器alu_out的输出数据。
6.地址多路器:addr(ir_addr,pc_addr,fetch,addr);用于选择是程序计数地址还是是数据地址。前四个周期读ROM后四个周期读RAM。
module addr(ir_addr,pc_addr,fetch,addr);
input [12:0]ir_addr,pc_addr;
input fetch;
output [12:0]addr;
assign addr=fetch?pc_addr:ir_addr;
endmodule
7.程序计数器:counter(clk,rst_n,load,ir_addr,pc_addr);程序一般按照顺序执行,当遇到JMP跳转语句时程序地址指向ir_addr的地址即指令跳转。
module counter(clk,rst_n,load,ir_addr,pc_addr);
input clk;
input rst_n;
input load;
input [12:0]ir_addr;
output reg [12:0]pc_addr;
always@(posedge clk or negedge rst_n)begin
if(!rst_n)
pc_addr<=13'b0_0000_0000_0000;
else if(load)
pc_addr<=ir_addr;
else
pc_addr<=pc_addr+1'b1;
end
endmodule
8.状态控制器:machinectl(clk,rst_n,ena,fetch);分为两部分:状态机和状态控制器。此处和书中略有不同。
`timescale 1ns/1ns
module machinectl(clk,rst_n,ena,fetch);
input clk;
input rst_n;
input fetch;
output reg ena;
reg state;
always@(posedge clk)begin
if(!rst_n)
ena<=0;
else if(fetch)
ena<=1;
end
endmodule
`timescale 1ns/1ns
module machine(inc_pc,load_acc,load_pc,rd,wr,load_ir,datactl_ena,halt,clk,zero,ena,opcode);
output reg inc_pc,load_acc,load_pc,rd,wr,load_ir;
output reg datactl_ena,halt;
input clk,zero,ena;
input [2:0]opcode;
reg [2:0]state;
parameter HLT =3'B000,
SKZ =3'B001,
ADD =3'B010,
ANDD=3'B011,
XORR=3'B100,
LDA =3'B101,
STO =3'B110,
JMP =3'B111;
always@(posedge clk)begin
if(!ena)begin
state<=3'b000;
{inc_pc,load_acc,load_pc,rd}<=4'b0000;
{wr,load_ir,datactl_ena,halt}<=4'b0000;
end
else
ctl_cycle;
end
//任务设计
task ctl_cycle;
begin
casex(state)
3'b000:begin
{inc_pc,load_acc,load_pc,rd}<=4'b0001;
{wr,load_ir,datactl_ena,halt}<=4'b0100;
state<=3'b001;
end
3'b001:begin
{inc_pc,load_acc,load_pc,rd}<=4'b1001;
{wr,load_ir,datactl_ena,halt}<=4'b0100;
state<=3'b010;
end
3'b010:begin
{inc_pc,load_acc,load_pc,rd}<=4'b0000;
{wr,load_ir,datactl_ena,halt}<=4'b0000;
state<=3'b011;
end
3'b011:begin
if(opcode==HLT)begin
{inc_pc,load_acc,load_pc,rd}<=4'b1000;
{wr,load_ir,datactl_ena,halt}<=4'b0001;
end
else begin
{inc_pc,load_acc,load_pc,rd}<=4'b1000;
{wr,load_ir,datactl_ena,halt}<=4'b0000;
end
state<=3'b100;
end
3'b100:begin
if(opcode==JMP)begin
{inc_pc,load_acc,load_pc,rd}<=4'b0010;
{wr,load_ir,datactl_ena,halt}<=4'b0000;
end
else if(opcode==ADD||opcode==ANDD||opcode==XORR||opcode==LDA)begin
{inc_pc,load_acc,load_pc,rd}<=4'b0001;
{wr,load_ir,datactl_ena,halt}<=4'b0000;
end
else if(opcode==STO)begin
{inc_pc,load_acc,load_pc,rd}<=4'b0000;
{wr,load_ir,datactl_ena,halt}<=4'b0010;
end
else begin
{inc_pc,load_acc,load_pc,rd}<=4'b0000;
{wr,load_ir,datactl_ena,halt}<=4'b0000;
end
state<=3'b101;
end
3'b101:begin
if(opcode==ANDD||opcode==ADD||opcode==XORR||opcode==LDA)begin
{inc_pc,load_acc,load_pc,rd}<=4'b0101;
{wr,load_ir,datactl_ena,halt}<=4'b0000;
end
else if(opcode==SKZ&&zero==1)begin
{inc_pc,load_acc,load_pc,rd}<=4'b1000;
{wr,load_ir,datactl_ena,halt}<=4'b0000;
end
else if(opcode==JMP)begin
{inc_pc,load_acc,load_pc,rd}<=4'b1010;
{wr,load_ir,datactl_ena,halt}<=4'b0000;
end
else if(opcode==STO)begin
{inc_pc,load_acc,load_pc,rd}<=4'b0000;
{wr,load_ir,datactl_ena,halt}<=4'b1010;
end
else begin
{inc_pc,load_acc,load_pc,rd}<=4'b0000;
{wr,load_ir,datactl_ena,halt}<=4'b0000;
end
state<=3'b110;
end
3'b110:begin
if(opcode==STO)begin
{inc_pc,load_acc,load_pc,rd}<=4'b0000;
{wr,load_ir,datactl_ena,halt}<=4'b0010;
end
else if(opcode==ANDD||opcode==ADD||opcode==XORR||opcode==LDA)begin
{inc_pc,load_acc,load_pc,rd}<=4'b0000;
{wr,load_ir,datactl_ena,halt}<=4'b0000;
end
else begin
{inc_pc,load_acc,load_pc,rd}<=4'b0000;
{wr,load_ir,datactl_ena,halt}<=4'b0000;
end
state<=3'b111;
end
3'b111:begin
if(opcode==SKZ&&zero==1)begin
{inc_pc,load_acc,load_pc,rd}<=4'b1000;
{wr,load_ir,datactl_ena,halt}<=4'b0000;
end
else begin
{inc_pc,load_acc,load_pc,rd}<=4'b0000;
{wr,load_ir,datactl_ena,halt}<=4'b0000;
end
state<=3'b000;
end
default:begin
{inc_pc,load_acc,load_pc,rd}<=4'b0000;
{wr,load_ir,datactl_ena,halt}<=4'b0000;
state<=3'b000;
end
endcase
end
endtask
endmodule
将以上8部分组合起来即是一个简易cpu。
module cpu(clk,rst_n,halt,rd,wr,addr,data,opcode,fetch,ir_addr,pc_addr);
input clk,rst_n;
output rd,wr,halt,fetch;
output [12:0]addr;
output [2:0]opcode;
output [12:0]ir_addr,pc_addr;
inout [7:0]data;
wire rd,wr;
wire clk,fetch,alu_ena;
wire [2:0]opcode;
wire [12:0]ir_addr,pc_addr;
wire [7:0]alu_out,accum;
wire zero,inc_pc,load_acc,load_pc,load_ir,data_ena,contr_ena;
clk_gen clk_gen(
.clk(clk),
.rst_n(rst_n),
.fetch(fetch),
.alu_ena(alu_ena)
);
register register(
.clk(clk),
.rst_n(rst_n),
.ena(load_ir),
.data(data),
.opc_iraddr({opcode,ir_addr})
);
accum accum1(
.accum(accum),
.data(alu_out),
.ena(load_acc),
.clk(clk),
.rst_n(rst_n)
);
alu alu(
.clk(clk),
.alu_out(alu_out),
.zero(zero),
.data(data),
.accum(accum),
.alu_ena(alu_ena),
.opcode(opcode)
);
datactl datactl(
.data(data),
.in(alu_out),
.data_ena(data_ena)
);
counter counter(
.clk(inc_pc),
.rst_n(rst_n),
.load(load_pc),
.ir_addr(ir_addr),
.pc_addr(pc_addr)
);
addr addr1(
.ir_addr(ir_addr),
.pc_addr(pc_addr),
.fetch(fetch),
.addr(addr)
);
machinectl machinectl(
.clk(clk),
.rst_n(rst_n),
.ena(contr_ena),
.fetch(fetch)
);
machine machine(
.inc_pc(inc_pc),
.load_acc(load_acc),
.load_pc(load_pc),
.rd(rd),
.wr(wr),
.load_ir(load_ir),
.datactl_ena(data_ena),
.halt(halt),
.clk(clk),
.zero(zero),
.ena(contr_ena),
.opcode(opcode)
);
endmodule
单单有一个cpu核是不行的,我们要需要一个RAM和ROM分别用来存放数据和程序。此处和书中略有不同。
module ram(data,addr,ena,read,write);
inout [7:0]data;
input [12:0]addr;
input ena;
input read,write;
reg [7:0]memory_ram[13'h1fff:13'h1800];
assign data=(read&&ena)?memory_ram[addr]:8'hzz;
always@(posedge write)
begin
memory_ram[addr]<=data;
end
endmodule
module rom(data,addr,read,ena);
output wire [7:0]data;
input [12:0]addr;
input read,ena;
reg [7:0]memory_rom[13'h1fff:13'h0000];
assign data=(read&&ena)?memory_rom[addr]:8'hzz;
endmodule
有了RAM和ROM以后我们还需要有一个控制器用来产生片选信号。
module addr_decode(addr,ram_sel,rom_sel);
input [12:0]addr;
output reg ram_sel,rom_sel;
always@(addr)begin
casex(addr)
13'b1_1xxx_xxxx_xxxx:{rom_sel,ram_sel}<=2'b01;
13'b0_xxxx_xxxx_xxxx:{rom_sel,ram_sel}<=2'b10;
13'b1_0xxx_xxxx_xxxx:{rom_sel,ram_sel}<=2'b10;
default:{rom_sel,ram_sel}<=2'b00;
endcase
end
endmodule
至此我们已经完成了一个简化RISC_CPU的设计。后面我们将编写测试文件进行测试。
注:本程序基本上和书中程序一致只有部分地方稍作修改,这是因为我在运行书上原程序时,发现并没有得到预想的结果,然后自己通过仿真波形查找了问题原因,对原程序进行了修改,修改后的程序经仿真验证后达到了设计要求,后续我也会将自己调试过程写出来。