对比复习一下阻塞赋值和非阻塞赋值:
always @ (*),描述的是组合逻辑,里面不管用阻塞赋值(=)还是非阻塞赋值(<=),都是阻塞的效果
awlays @ (posedge clk),描述的是时序逻辑,阻塞赋值和非阻塞赋值是起作用的,阻塞赋值可以当作一堆组合逻辑连接了一个触发器,非阻塞可以看做流水结构
为什么需要cache?
要想运行一段程序,需要将可执行程序从flash中load到主存,然后在主存中执行,执行完了还会把执行结果再store到内存
CPU的性能也取决于相关存储部件的存取速度,而实际上CPU工作速度很高,内存存取速度很慢,就会造成CPU等待,降低了处理速度,也就拉低了CPU执行效率
CPU register的速度一般小于1ns,主存的速度一般是65ns左右。速度差异近百倍。因此,当CPU试图从主存中load/store 操作时,由于主存的速度限制,CPU不得不等待这漫长的65ns时间
为了减少CPU与内存的速度差异,可以在慢速的RAM和快速的CPU之间插入一至多级的速度较快、容量较小的SRAM起到缓冲作用;使CPU既可以较快地存取主存中的数据,又不使系统成本上升过高,这就是Cache法
cache的局部性原理
- 程序访问的局部性:在一个较短的时间间隔内,由程序产生的地址往往集中在存储器逻辑地址空间的很小范围内,这种对局部范围的存储器地址频繁访问,而对此范围以外的地址则访问甚少的一种现象。
- 时间局部性:当程序访问一个存储位置时,有很大可能性程序在不久的将来还会再次访问同一位置,程序的循环结构和过程调用体现了时间局部性
- 空间局部性:当程序访问了某个存储单元后,则不久之后其位置附近的存储单元也将被访问,因为计算机中的数据通常被连续的存放在主存,程序的数据结构体现了空间局部性
- 程序访问的局部性原理和Cache的关系:利用程序访问的局部性原理,可以在主存和CPU的通用寄存器之间设置Cache,把正在执行的指令地址附近的一部分指令或数据从主存调入这个寄存器,供CPU在一段时间内使用,从而提高CPU访问存储系统的速度。当程序继续执行时,程序访问的局部性原理也不断变化,新的数据将不断地从主存调入Cache中替换掉原有的数据
cache工作过程
CPU发出一个地址,cache经过地址映射,得到对应的块地址,根据地址索引在cache中查找CPU找访问的数据是否在cache的块中,如果命中,从cache中找到对应的数据完成读写,如果没有命中,就需要从内存中读取数据,其中牵涉到脏位和cache替换
cache写回策略
- 写回 write back
只有在一个cache行被选中要替换回主存事,如果cache行的脏位为1,才将它写回主存
脏位用来标志缓存中的cache行是否被修改过
如果一个内存块在加载到cache后为被修改过,那么对这个cache行进行替换时,就不需要把数据写回主存,可以降低从cache到主存的写次数 - 写通 write through
每当cache收到写数据(store)指令时,如果写命中,CPU回同时将数据写到cache和主存
写分配:当数据写不命中时,给数据分配一个cache行,先吧cache中的数据写回主存,然后分配一个cache行,将数据写到cache
非写分配:直接把数据写回主存而不加载到cache,这样可以减少读主存的时间,没有利用好空间局部性
写回写分配:CPU先对cache发起读写访问,若读写命中,要么返回给CPU对应的数据,要么在cache中完成写操作,若读写不命中,并且对应块脏位为1,那么需要把这个块写回主存,然后再从主存读出一个块的数据更新cache,之后再对更新的这个块进行读写操作
直接相连direct mapped cache
一个主存块只能拷贝到cache的一个特定行位置,cache的行号i和主存的块号j满足i = j mod m,m时cache的总行数
对应的颜色只能缓存放到对应颜色的cache行中,容易产生cache颠簸
// 直接相联cache direct mapped cache
// cache大小为32块,主存大小为1024块,1块=4字,1字=32bit
// 主存地址为12bit,其中[1:0]是块内偏移,[6:2]是索引,[11:7]是tag
假设我想要访问黄色部分的数据,手里现在有对应的地址00010_00001_10,先用index找到cache中对应的行,也就是cache中的line1,然后把line1的tag和地址高位的tag作比较,如果相同则命中,表示cache的line1中缓存了内存中包含黄色数据的那个块(tag2的对应的那个块),然后就可可以根据offset块内偏移,确定要返回的一个字数据
特点:不灵活,每个主存块只能用固定对应的某个缓存块,即使还空着去多位置也不能用
// 直接相联cache direct mapped cache
// cache大小为32块,主存大小为1024块,1块=4字,1字=32bit
// 主存地址为12bit,其中[1:0]是块内偏移,[6:2]是索引,[11:7]是tag
// V+D+Tag+Data = 1+1+5+128 = 135
// 【 5bit | 5bit | 2bit】
// tag index offset
module cache (
input wire clk,
input wire rst,
// cpu <-> cache
input wire [ 11: 0] cpu_req_addr ,
input wire cpu_req_rw ,
input wire cpu_req_valid ,
input wire [ 31: 0] cpu_data_write ,
output reg [ 31: 0] cpu_data_read ,
output reg cpu_ready ,
// cache <-> mem
output reg [ 11: 0] mem_req_addr ,
output reg mem_req_rw ,
output reg mem_req_valid ,
output reg [127: 0] mem_data_write ,
input wire [127: 0] mem_data_read ,
input wire mem_ready
);
//
parameter V = 134 ;
parameter D = 133 ;
parameter TagMSB = 132 ;
parameter TagLSB = 128 ;
parameter BlockMSB = 127 ;
parameter BlockLSB = 0 ;
// state
parameter IDLE = 2'd0;
parameter CompareTag = 2'd1;
parameter Allocate = 2'd2;
parameter WriteBack = 2'd3;
reg [134:0] cache_data [0:31];
reg [1:0] curr_state, next_state;
reg hit;
wire [4:0] cpu_req_tag;
wire [4:0] cpu_req_index;
wire [1:0] cpu_req_offset;
assign cpu_req_tag = cpu_req_addr[11: 7];
assign cpu_req_index = cpu_req_addr[ 6: 2];
assign cpu_req_offset = cpu_req_addr[ 1: 0];
integer i;
// 初始化cache
initial begin
for (i = 0; i < 32; i = i + 1)
cache_data[i] = 135'b0;
end
// 每一个时钟沿产生一次可能的状态变化
always @ (posedge clk or negedge rst)
if (rst == 1'b0)
curr_state <= IDLE;
else
curr_state <= next_state;
// 产生下一状态的组合逻辑
always @ (*)
case (curr_state)
IDLE:
if (cpu_req_valid) // CPU发起读写请求
next_state <= CompareTag;
else
next_state <= IDLE;
CompareTag:
if (hit)
next_state <= IDLE; // hit
else if (cache_data[cpu_req_index][V:D] == 2'b11)
next_state <= WriteBack; // cache块有效并且脏位为1,需要将块写回mem
else
next_state <= Allocate; // 需要从mem读数据更新cache
Allocate:
if (mem_ready)
next_state <= CompareTag; // 等待mem返回读有效信号
else
next_state <= Allocate;
WriteBack:
if (mem_ready)
next_state <= Allocate; // 等待mem返回写有效信号
else
next_state <= WriteBack;
default:next_state <= IDLE;
endcase
// 根据tag判断cache是否命中
always @ (*)
if (curr_state == CompareTag)
if (cache_data[cpu_req_index][134] == 1'b1 && cache_data[cpu_req_index][TagMSB:TagLSB] == cpu_req_tag)
hit <= 1'b1;
else
hit <= 1'b0;
// 各个状态下对mem的读写操作
always @ (posedge clk)
if (curr_state ==Allocate) // 读mem
if (mem_ready == 1'b0) // 还没从mem读出一个块大小的数据,保持读mem请求
begin
mem_req_valid <= 1'b1;
mem_req_addr <= {cpu_req_addr[11:2], 2'b00};
mem_req_rw <= 1'b0;
end
else // mem返回读有效,拉低请求,更新cache对应块
begin
mem_req_valid <= 1'b0;
cache_data[cpu_req_index][BlockMSB:BlockLSB] <= mem_data_read;
cache_data[cpu_req_index][V:D] <= 2'b10;
cache_data[cpu_req_index][TagMSB:TagLSB] <= cpu_req_tag;
end
else if (curr_state == WriteBack) // 写mem
if (mem_ready == 1'b0) // 还没写好,保持写mem请求
begin
mem_req_valid <= 1'b1;
mem_req_addr <= {cache_data[cpu_req_index][TagMSB:TagLSB], cpu_req_index, 2'b00};
mem_req_rw <= 1'b1;
mem_data_write <= cache_data[cpu_req_index][BlockMSB:BlockLSB];
end
else // mem返回写有效,拉低请求
begin
mem_req_valid <= 1'b0;
end
else
mem_req_valid <= 1'b0;
// cache命中后的读写操作
always @ (posedge clk)
if (curr_state == CompareTag && hit)
if (cpu_req_rw == 1'b0) // read hit
begin
cpu_ready <= 1'b1;
cpu_data_read <= cache_data[cpu_req_index][cpu_req_offset*32 +:32];
end
else // write hit
begin
cpu_ready <= 1'b1;
cache_data[cpu_req_index][cpu_req_offset*32 +:32] <= cpu_data_write;
cache_data[cpu_req_index][D] <= 1'b1;
end
else
cpu_ready <= 1'b0;
endmodule
module mem (
input wire clk,
input wire rst,
input wire [ 11: 0] mem_req_addr ,
input wire mem_req_rw ,
input wire mem_req_valid ,
input wire [127: 0] mem_data_write ,
output reg [127: 0] mem_data_read ,
output reg mem_ready
);
reg [31: 0] mem [0:4095];
integer i;
// 初始化mem
initial begin
for (i = 0; i < 4096; i = i +1)
mem[i] = 32'd0;
end
always @ (posedge clk or negedge rst)
if (rst == 1'b0)
mem_ready <= 1'b0;
else if (mem_req_valid && mem_req_rw == 1'b1 && mem_ready == 1'b0) // 有写mem请求,但是还没写好
begin
mem[mem_req_addr + 3] <= mem_data_write[127: 96];
mem[mem_req_addr + 2] <= mem_data_write[ 95: 64];
mem[mem_req_addr + 1] <= mem_data_write[ 63: 32];
mem[mem_req_addr ] <= mem_data_write[ 31: 0];
mem_ready <=1'b1;
end
else if (mem_req_valid && mem_req_rw == 1'b0 && mem_ready == 1'b0) // 有读mem请求,但是还没读好
begin
mem_data_read <= {mem[mem_req_addr + 3],
mem[mem_req_addr + 2],
mem[mem_req_addr + 1],
mem[mem_req_addr ] };
mem_ready <= 1'b1;
end
else if (mem_req_valid && mem_ready == 1'b1)
begin
mem_ready <= 1'b0;
end
endmodule
module sim_direct_cache ();
reg clk ;
reg rst ;
parameter WriteMiss = 0; // 写不命中,不用回写,从mem读一个块,填入cache,修改cache
parameter WriteHit = 1;
parameter ReadMiss = 2;
parameter ReadHit = 3; // 读不命中
parameter WriteDirtyMiss = 4; // 脏位 写不命中
parameter ReadDirtyMiss = 5; // 脏位 读不命中
reg [ 3: 0] curr_state, next_state;
reg [ 11: 0] cpu_req_addr ;
reg cpu_req_rw ;
reg cpu_req_valid ;
reg [ 31: 0] cpu_data_write ;
wire [ 31: 0] cpu_data_read ;
wire cpu_ready ;
wire [ 11: 0] mem_req_addr ;
wire mem_req_rw ;
wire mem_req_valid ;
wire [127: 0] mem_data_write ;
wire [127: 0] mem_data_read ;
wire mem_ready ;
initial begin
clk = 0;
rst = 0;
#10
rst = 1;
end
always #5 clk = ~clk;
always @ (posedge clk or negedge rst)
if (rst == 1'b0)
curr_state <= WriteMiss;
else
curr_state <= next_state;
always @ (*)
case (curr_state)
WriteMiss:
if (cpu_ready == 1'b1)
begin
next_state <= WriteHit;
cpu_req_valid <= 1'b0;
end
else
next_state <= WriteMiss;
WriteHit:
if (cpu_ready == 1'b1)
begin
next_state <= ReadMiss;
cpu_req_valid <= 1'b0;
end
else
next_state <= WriteHit;
ReadMiss:
if (cpu_ready == 1'b1)
begin
next_state <= ReadHit;
cpu_req_valid <= 1'b0;
end
else
next_state <= ReadMiss;
ReadHit:
if (cpu_ready == 1'b1)
begin
next_state <= WriteDirtyMiss;
cpu_req_valid <= 1'b0;
end
else
next_state <= ReadHit;
WriteDirtyMiss:
if (cpu_ready == 1'b1)
begin
next_state <= ReadDirtyMiss;
cpu_req_valid <= 1'b0;
end
else
next_state <= WriteDirtyMiss;
ReadDirtyMiss: begin // 这里还需要在时序的控制下将valid拉低,对齐一下
if (cpu_ready == 1'b1)
cpu_req_valid <= 1'b0;
next_state <= ReadDirtyMiss;
end
default:
next_state <= ReadDirtyMiss;
endcase
always @ (posedge clk or negedge rst)
if (rst == 1'b0)
begin
cpu_req_addr <= 12'd0;
cpu_req_rw <= 1'b0;
cpu_req_valid <= 1'b0;
cpu_data_write <= 32'd0;
end
else
case (curr_state)
WriteMiss: // 写一个cache中没有的块,cache先从mem读对应的块到cache,然后在cache中修改这个快,置D为1
begin
cpu_req_addr <= 12'd0;
cpu_req_rw <= 1'b1;
cpu_data_write <= 32'd8;
cpu_req_valid <= 1'b1;
end
WriteHit: // 修改cacahe中有的块,在cache中修改这个块,置D为1
begin
cpu_req_addr <= 12'd0;
cpu_req_rw <= 1'b1;
cpu_data_write <= 32'd9;
cpu_req_valid <= 1'b1;
end
ReadMiss: // 读cache中没有的数据,cache会先从mem读一个块到cache,然后返回32bit数据到cpu
begin
cpu_req_addr <= 12'd4;
cpu_req_rw <= 1'b0;
cpu_data_write <= cpu_data_write;
cpu_req_valid <= 1'b1;
end
ReadHit: // 读cache中有的块,直接返回32bit数据到cpu
begin
cpu_req_addr <= 12'd0;
cpu_req_rw <= 1'b0;
cpu_data_write <= cpu_data_write;
cpu_req_valid <= 1'b1;
end
WriteDirtyMiss: // 给脏位cache中写数据,先保存cache块到mem,然后读一个块到mem,最后在cache中修改这个块,置D为1
begin
cpu_req_addr <= 12'd128; // 00001|00000|00 index=0000,当前cache中V=1 D=1 data=32'd9
cpu_req_rw <= 1'b1;
cpu_data_write <= 32'd6;
cpu_req_valid <= 1'b1;
end
ReadDirtyMiss:
begin
cpu_req_addr <= 32'd0;
cpu_req_rw <= 1'b0;
cpu_data_write <= cpu_data_write;
cpu_req_valid <= 1'b1;
end
default:
begin
cpu_req_addr <= 12'd0;
cpu_req_rw <= 1'b0;
cpu_data_write <= 32'd8;
cpu_req_valid <= 1'b0;
end
endcase
cache cache_inst (
.clk (clk ),
.rst (rst ),
// cpu <-> cache
.cpu_req_addr (cpu_req_addr ),
.cpu_req_rw (cpu_req_rw ),
.cpu_req_valid (cpu_req_valid ),
.cpu_data_write (cpu_data_write ),
.cpu_data_read (cpu_data_read ),
.cpu_ready (cpu_ready ),
// cache <-> mem
.mem_req_addr (mem_req_addr ),
.mem_req_rw (mem_req_rw ),
.mem_req_valid (mem_req_valid ),
.mem_data_write (mem_data_write ),
.mem_data_read (mem_data_read ),
.mem_ready (mem_ready )
);
mem mem_inst (
.clk (clk ),
.rst (rst ),
.mem_req_addr (mem_req_addr ),
.mem_req_rw (mem_req_rw ),
.mem_req_valid (mem_req_valid ),
.mem_data_write (mem_data_write ),
.mem_data_read (mem_data_read ),
.mem_ready (mem_ready )
);
endmodule
两路组相联 two way set
cache分成u组,每组v行,主存块放到哪个组时固定的,至于存到该组的哪一行数灵活的,cache的组号q和主存的块号j满足q = j mod u
两路组相联的硬件成本相对于直接映射缓存更高,因为每次比较tag的时候需要多比较一组,两路的方式有助于降低cache颠簸可能性
// 两路组相联cache two way set associative cache
// cache大小为32块,分为16组,每组两块,主存大小为1024块,1块=4字,1字=32bit
// 主存地址为12bit,其中[1:0]是块内偏移,[5:2]是索引,[11:6]是tag
// 两路组相联cache two way set associative cache
// cache大小为32块,分为16组,每组两块,主存大小为1024块,1块=4字,1字=32bit
// 主存地址为12bit,其中[1:0]是块内偏移,[5:2]是索引,[11:6]是tag
// V+D+Tag+Data = 1+1+6+128 = 136
// 【 6bit | 4bit | 2bit 】
// tag set index offset
module two_way_set (
input wire clk,
input wire rst,
// cpu <-> cache
input wire [ 11: 0] cpu_req_addr ,
input wire cpu_req_rw ,
input wire cpu_req_valid ,
input wire [ 31: 0] cpu_data_write ,
output reg [ 31: 0] cpu_data_read ,
output reg cpu_ready ,
// cache <-> mem
output reg [ 11: 0] mem_req_addr ,
output reg mem_req_rw ,
output reg mem_req_valid ,
output reg [127: 0] mem_data_write ,
input wire [127: 0] mem_data_read ,
input wire mem_ready
);
//
parameter V = 135 ;
parameter D = 134 ;
parameter TagMSB = 133 ;
parameter TagLSB = 128 ;
parameter BlockMSB = 127 ;
parameter BlockLSB = 0 ;
// state
parameter IDLE = 2'd0;
parameter CompareTag = 2'd1;
parameter Allocate = 2'd2;
parameter WriteBack = 2'd3;
reg [135:0] cache_data [0:31];
reg [1:0] curr_state, next_state;
reg hit;
reg hit1, hit2;
reg way; // 若hit,则way无意义,若miss,则way表示分配的那一路
wire [5:0] cpu_req_tag;
wire [3:0] cpu_req_index;
wire [1:0] cpu_req_offset;
assign cpu_req_tag = cpu_req_addr[11: 6];
assign cpu_req_index = cpu_req_addr[ 5: 2];
assign cpu_req_offset = cpu_req_addr[ 1: 0];
integer i;
// 初始化cache
initial begin
for (i = 0; i < 32; i = i + 1)
cache_data[i] = 136'b0;
end
always @ (posedge clk or negedge rst)
if (rst == 1'b0)
curr_state <= IDLE;
else
curr_state <= next_state;
always @ (*)
case (curr_state)
IDLE:
if (cpu_req_valid)
next_state <= CompareTag;
else
next_state <= IDLE;
CompareTag:
if (hit)
next_state <= IDLE;
else if (cache_data[2*cpu_req_index+way][V:D] == 2'b11) // 被分配的块有效,需要先写回主存
next_state <= WriteBack;
else
next_state <= Allocate;
Allocate:
if (mem_ready)
next_state <= CompareTag;
else
next_state <= Allocate;
WriteBack:
if (mem_ready)
next_state <= Allocate;
else
next_state <= WriteBack;
default:next_state <= IDLE;
endcase
// hit 1
always @ (*)
if (curr_state == CompareTag)
if (cache_data[2*cpu_req_index ][V] == 1'b1
&& cache_data[2*cpu_req_index ][TagMSB:TagLSB] == cpu_req_tag)
hit1 <= 1'b1;
else
hit1 <= 1'b0;
else
hit1 <= 1'b0;
// hit2
always @ (*)
if (curr_state == CompareTag)
if (cache_data[2*cpu_req_index + 1][V] == 1'b1
&& cache_data[2*cpu_req_index + 1][TagMSB:TagLSB] == cpu_req_tag)
hit2 <= 1'b1;
else
hit2 <= 1'b0;
else
hit2 <= 1'b0;
always @ (*)
if (curr_state == CompareTag)
hit <= hit1 | hit2;
else
hit <= 1'b0;
// way: miss时分配的块在组内的位置
always @ (*)
if (curr_state == CompareTag && hit == 1'b0) // miss
case ({cache_data[2*cpu_req_index][V], cache_data[2*cpu_req_index+1][V]})
2'b00: way <= 1'b0; // 第0、1路能用,默认用0
2'b01: way <= 1'b0; // 第0路能用
2'b10: way <= 1'b1; // 第1路能用
2'b11: way <= 1'b0; // 第1、2路都不能用,默认用0
default: way <= 1'b0;
endcase
// 各个状态下对mem的读写操作
always @ (posedge clk)
if (curr_state == Allocate) // 读mem写cache
if (mem_ready == 1'b0)
begin
mem_req_valid <= 1'b1;
mem_req_addr <= {cpu_req_addr[11:2], 2'b00};
mem_req_rw <= 1'b0;
end
else
begin
mem_req_valid <= 1'b0;
cache_data[2*cpu_req_index+way] <= {2'b10, cpu_req_tag, mem_data_read};
end
else if (curr_state == WriteBack) // 写mem
if (mem_ready == 1'b0)
begin
mem_req_valid <= 1'b1;
mem_req_addr <= {cache_data[2*cpu_req_index+way][TagMSB:TagLSB], cpu_req_index, 2'b00};
mem_req_rw <= 1'b1;
mem_data_write <= cache_data[2*cpu_req_index+way][BlockMSB:BlockLSB];
end
else
begin
mem_req_valid <= 1'b0;
end
else
mem_req_valid <= 1'b0;
// cache命中后的读写操作
always @ (posedge clk)
if (curr_state == CompareTag && hit)
if (cpu_req_rw == 1'b0) // read hit
begin
cpu_ready <= 1'b1;
if (hit1)
cpu_data_read <= cache_data[2*cpu_req_index][32*cpu_req_offset +:32];
else
cpu_data_read <= cache_data[2*cpu_req_index+1][32*cpu_req_offset +:32];
end
else // write hit
begin
cpu_ready <= 1'b1;
if (hit1)
begin
cache_data[2*cpu_req_index][32*cpu_req_offset +:32] <= cpu_data_write;
cache_data[2*cpu_req_index][D] <= 1'b1;
end
else
begin
cache_data[2*cpu_req_index+1][32*cpu_req_offset +:32] <= cpu_data_write;
cache_data[2*cpu_req_index+1][D] <= 1'b1;
end
end
else
cpu_ready <= 1'b0;
endmodule
module sim_two_way ( );
reg clk;
reg rst;
parameter Write1 = 3'd0;
parameter Write2 = 3'd1;
parameter Write3 = 3'd2;
parameter Write4 = 3'd3;
parameter Read1 = 3'd4;
parameter Read2 = 3'd5;
parameter Read3 = 3'd6;
parameter Read4 = 3'd7;
reg [ 3: 0] curr_state, next_state;
reg [ 11: 0] cpu_req_addr ;
reg cpu_req_rw ;
reg cpu_req_valid ;
reg [ 31: 0] cpu_data_write ;
wire [ 31: 0] cpu_data_read ;
wire cpu_ready ;
wire [ 11: 0] mem_req_addr ;
wire mem_req_rw ;
wire mem_req_valid ;
wire [127: 0] mem_data_write ;
wire [127: 0] mem_data_read ;
wire mem_ready ;
initial begin
clk = 0;
rst = 0;
#10
rst = 1;
end
always #5 clk = ~clk;
always @ (posedge clk or negedge rst)
if (rst == 1'b0)
curr_state <= Write1;
else
curr_state <= next_state;
always @ (*)
case (curr_state)
Write1:
if (cpu_ready == 1'b1)
begin
next_state <= Write2;
cpu_req_valid <= 1'b0;
end
else
next_state <= Write1;
Write2:
if (cpu_ready == 1'b1)
begin
next_state <= Write3;
cpu_req_valid <= 1'b0;
end
else
next_state <= Write2;
Write3:
if (cpu_ready == 1'b1)
begin
next_state <= Write4;
cpu_req_valid <= 1'b0;
end
else
next_state <= Write3;
Write4:
if (cpu_ready == 1'b1)
begin
next_state <= Read1;
cpu_req_valid <= 1'b0;
end
else
next_state <= Write4;
Read1:
if (cpu_ready == 1'b1)
begin
next_state <= Read2;
cpu_req_valid <=1'b0;
end
else
next_state <= Read1;
Read2:
if (cpu_ready == 1'b1)
begin
next_state <= Read3;
cpu_req_valid <= 1'b0;
end
else
next_state <= Read2;
Read3:
if (cpu_ready == 1'b1)
begin
next_state <= Read4;
cpu_req_valid <= 1'b0;
end
else
next_state <= Read3;
// Read4: next_state <= Read4;
Read4: begin // 对齐
if (cpu_ready == 1'b1)
cpu_req_valid <= 1'b0;
next_state <= Read4;
end
default:next_state <= Write1;
endcase
always @ (posedge clk or negedge rst)
if (rst == 1'b0)
begin
cpu_req_addr <= 12'd0;
cpu_req_rw <= 1'b0;
cpu_req_valid <= 1'b0;
cpu_data_write <= 32'd0;
end
else
case (curr_state)
Write1: // 给0号地址写0x8
begin
cpu_req_addr <= 12'd0; // 000000 | 0000 | 00
cpu_req_rw <= 1'b1;
cpu_data_write <= 32'd8;
cpu_req_valid <= 1'b1;
end
Write2: // 给64号地址写0x9
begin
cpu_req_addr <= 12'd64; // 000001 | 0000 | 00
cpu_req_rw <= 1'b1;
cpu_data_write <= 32'd9;
cpu_req_valid <= 1'b1;
end
Write3: // 给128号地址写0x6
begin
cpu_req_addr <= 12'd128; // 000010 | 0000 | 00
cpu_req_rw <= 1'b1;
cpu_data_write <= 32'd6;
cpu_req_valid <= 1'b1;
end
Write4: // 给65号地址写0x7
begin
cpu_req_addr <= 12'd65; // 000001 | 0000 | 01
cpu_req_rw <= 1'b1;
cpu_data_write <= 32'd7;
cpu_req_valid <= 1'b1;
end
Read1: // 读0号地址
begin
cpu_req_addr <= 12'd0; // 000000 | 0000 | 00
cpu_req_rw <= 1'b0;
cpu_req_valid <= 1'b1;
end
Read2: // 读5号地址
begin
cpu_req_addr <= 12'd4; // 000000 | 0001 | 00
cpu_req_rw <= 1'b0;
cpu_req_valid <= 1'b1;
end
Read3: // 读68号地址
begin
cpu_req_addr <= 12'd68; // 000001 | 0001 | 00
cpu_req_rw <= 1'b0;
cpu_req_valid <= 1'b1;
end
Read4: // 读65号地址
begin
cpu_req_addr <= 12'd65; // 000001 | 0000 | 01
cpu_req_rw <= 1'b0;
cpu_req_valid <= 1'b1;
end
default:
begin
cpu_req_valid <= 1'b0;
end
endcase
two_way_set cache_inst (
.clk (clk ),
.rst (rst ),
// cpu <-> cache
.cpu_req_addr (cpu_req_addr ),
.cpu_req_rw (cpu_req_rw ),
.cpu_req_valid (cpu_req_valid ),
.cpu_data_write (cpu_data_write ),
.cpu_data_read (cpu_data_read ),
.cpu_ready (cpu_ready ),
// cache <-> mem
.mem_req_addr (mem_req_addr ),
.mem_req_rw (mem_req_rw ),
.mem_req_valid (mem_req_valid ),
.mem_data_write (mem_data_write ),
.mem_data_read (mem_data_read ),
.mem_ready (mem_ready )
);
mem mem_inst (
.clk (clk ),
.rst (rst ),
.mem_req_addr (mem_req_addr ),
.mem_req_rw (mem_req_rw ),
.mem_req_valid (mem_req_valid ),
.mem_data_write (mem_data_write ),
.mem_data_read (mem_data_read ),
.mem_ready (mem_ready )
);
endmodule
代码逻辑:
注:
写cache的时候,根据offset确定要替换哪一个字
00:[ 31: 0]
01:[ 63: 32]
10:[ 95: 64]
11:[127: 96]