一 Bmips cache 概述
magnum/basemodules/mma/bmma.h
There are potentially three CPU caches you should be aware of:
1)
L1 cache
- this is a write-back cache
2)
L2 cache
- this is a write-back cache
3)
RAC (read ahead cache)
- this is a read-only cache which are "read ahead" up to a 4K boundary
BMMA_FlushCache_isrsafe will do a wback_invalidate of the L1 and L2 caches and an invalidate of the RAC.
The following cache flush rules will allow software to maintain cache coherency.
Rule 1: After hardware writes to RAM, you must flush the cache before the CPU reads from that memory. That is:
1. Hardware writes to RAM
2. Flush cache for that RAM
3. CPU reads from that RAM
Rule 2: After the CPU writes to RAM, you must flush the cache before Hardware does any access to that memory. That is:
1. CPU writes to RAM
2. Flush cache for that RAM
3. Hardware reads from that RAM
二 L1 Cache操作
cache的介绍见4KcProgMan.pdf中chapter 7 caches的介绍:
Both caches are virtually indexed and physically tagged, allowing cache access to occur in
parallel with virtual-to-physical address translation.
也就是说虚拟地址作为index来决定sets,一个sets里面有多条way,然后物理地址来比较way中的tag,一条way就是一个cacheline
#if (BCHP_CHIP == 7344 || BCHP_CHIP == 7346 || BCHP_CHIP == 7425 || BCHP_CHIP == 7429 || BCHP_CHIP == 7435)
/* DCACHE size 32K and line size is 32 for MIPS 5000. */
#define DCACHE_SIZE (32 * 1024)
#define DCACHE_LINE_SIZE 32
#elif (BCHP_CHIP == 7228 || BCHP_CHIP == 7231 || BCHP_CHIP == 7362 || BCHP_CHIP == 7563 || BCHP_CHIP == 7584)
/* DCACHE size 64K and line size is 64 for MIPS 4380. */
#define DCACHE_SIZE (64 * 1024)
#define DCACHE_LINE_SIZE 64
#else
/* DCACHE size 32K and line size is 16 for MIPS 3300. */
#define DCACHE_SIZE (32 * 1024)
#define DCACHE_LINE_SIZE 16
#endif
#if (BCHP_CHIP == 7228 || BCHP_CHIP == 7231 || BCHP_CHIP == 7362 || BCHP_CHIP == 7563 || BCHP_CHIP == 7584)
#define ICACHE_SIZE (32*1024)
#define ICACHE_LINE_SIZE 64
#define ICACHE_INDEX_MASK (ICACHE_SIZE - 1)
#endif
#if (BCHP_CHIP == 7344 || BCHP_CHIP == 7346 || BCHP_CHIP == 7425 || BCHP_CHIP == 7429 || BCHP_CHIP == 7435)
#define I_D_TAG_PA_MASK 0xfffff000
#define D_TAG_LOCK (1 << 3)
#define D_TAG_EXCLUSIVE (0x2)
#define D_TAG_VALID (1)
#define D_TAG_PT (2)
#elif (BCHP_CHIP == 7228 || BCHP_CHIP == 7231 || BCHP_CHIP == 7362 || BCHP_CHIP == 7563 || BCHP_CHIP == 7584)
#define TAG_PA_MASK 0xfffff000
#define TAG_LOCK (1 << 5)
#define TAG_VALID (1 << 6)
#define TAG_DIRTY (1 << 7)
#else
#define TAG_VALID (1 << 6)
#endif
4-way cache组织方式如下:
set0 way0 tag data
way1
tag data
way2
tag data
way4
tag data
set1 way0
tag data
way1
tag data
way2
tag data
way4
tag data
...... .......
VA作为Index选择set, 4-way的方式PA为:
PA 22 2 8
tag way选择 offset
linesize 为4*4=16Byte, Valid占用4bit,每一个bit对应一个word是否有效, L为lock标志位,1表示lock状态, LRF为
least recently filled的缩写,用于替换策略
对cache的操作指令只有cache一个,具体的用法见
MIPS32® Architecture For Programmers
Volume II: The MIPS32® Instruction Set
有效地址为: base寄存器中的值+offset(16bit, signed), 根据当前操作码得到有效地址是虚拟地址,物理地址,还是索引值
CS为总的cache 大小, A为way, BPT为linesize,则
CS/A 为sets数
log2(BPT) 为offset bit数
log2(
CS/A
) 为index bit数
log2(A) 为way bit 数
For index
operations (
where the address is used to index the cache but need not match the cache tag
) software should use
unmapped addresses to avoid TLB exceptions.
Bits [20:18] of the instruction specify the operation to perform. To provide software with a consistent base of cache
operations, certain encodings must be supported on all processors. The remaining encodings are recommended
具体字段的含义不同平台不同:
#if (BCHP_CHIP == 7344 || BCHP_CHIP == 7346 || BCHP_CHIP == 7425 || BCHP_CHIP == 7429 || BCHP_CHIP == 7435)
#define I_D_TAG_PA_MASK 0xfffff000
#define D_TAG_LOCK (1 << 3)
#define D_TAG_EXCLUSIVE (0x2)
#define D_TAG_VALID (1)
#define D_TAG_PT (2)
#elif (BCHP_CHIP == 7228 || BCHP_CHIP == 7231 || BCHP_CHIP == 7362 || BCHP_CHIP == 7563 || BCHP_CHIP == 7584)
#define TAG_PA_MASK 0xfffff000
#define TAG_LOCK (1 << 5)
#define TAG_VALID (1 << 6)
#define TAG_DIRTY (1 << 7)
#else
#define TAG_VALID (1 << 6)
#endif
CFE_15_3/bsp/bmips.h 全局定义
#define _MM_MAKEMASK1(n) (1 << (n))
#define _MM_MAKEMASK(v,n) (((1<<(v))-1) << (n))
/************************************************************************
* Primary Cache operations
************************************************************************/
#define Index_Invalidate_I 0x0 /* 0 0 */
#define Index_Writeback_Inv_D 0x1 /* 0 1 */
#define Index_Invalidate_SI 0x2 /* 0 2 */
#define Index_Invalidate_TC 0x2 /* 0 2 */
#define Index_Writeback_Inv_SD 0x3 /* 0 3 */
#define Index_Load_Tag_I 0x4 /* 1 0 */
#define Index_Load_Tag_D 0x5 /* 1 1 */
#define Index_Load_Tag_SI 0x6 /* 1 2 */
#define Index_Load_Tag_TC 0x6 /* 1 2 */
#define Index_Load_Tag_SD 0x7 /* 1 3 */
#define Index_Store_Tag_I 0x8 /* 2 0 */
#define Index_Store_Tag_D 0x9 /* 2 1 */
#define Index_Store_Tag_SI 0xA /* 2 2 */
#define Index_Store_Tag_TC 0xA /* 2 2 */
#define Index_Store_Tag_SD 0xB /* 2 3 */
#define Create_Dirty_Exc_D 0xD /* 3 1 */
#define Create_Dirty_Exc_SD 0xF /* 3 3 */
#define Hit_Invalidate_I 0x10 /* 4 0 */
#define Hit_Invalidate_D 0x11 /* 4 1 */
#define Hit_Invalidate_SI 0x12 /* 4 2 */
#define Hit_Invalidate_TC 0x12 /* 4 2 */
#define Hit_Invalidate_SD 0x13 /* 4 3 */
#define Fill_I 0x14 /* 5 0 */
#define Hit_Writeback_Inv_D 0x15 /* 5 1 */
#define Hit_Writeback_Inv_SD 0x17 /* 5 3 */
#define Hit_Writeback_I 0x18 /* 6 0 */
#define Hit_Writeback_D 0x19 /* 6 1 */
#define Hit_Writeback_SD 0x1B /* 6 3 */
#define Hit_Set_Virtual_SI 0x1E /* 7 2 */
#define Hit_Set_Virtual_TC 0x1E /* 7 2 */
#define Hit_Set_Virtual_SD 0x1F /* 7 3 */
/************************************************************************
* KSEG Mapping Definitions and Macro's
************************************************************************/
#define K0_BASE 0x80000000
#define K0_SIZE 0x20000000
#define K1_BASE 0xa0000000
#define K1_SIZE 0x20000000
#define K2_BASE 0xc0000000
#ifndef PHYS_TO_K0
#define PHYS_TO_K0(x) ((x) | 0x80000000)
#define PHYS_TO_K1(x) ((x) | 0xa0000000)
#define K0_TO_PHYS(x) ((x) & 0x1fffffff)
#define K1_TO_PHYS(x) (K0_TO_PHYS(x))
#define K0_TO_K1(x) ((x) | 0x20000000)
#define K1_TO_K0(x) ((x) & 0xdfffffff)
#endif
#define T_VEC K0_BASE /* tlbmiss vector */
#define X_VEC (K0_BASE+0x80) /* xtlbmiss vector */
#define C_VEC (K1_BASE+0x100) /* cache exception vector */
#define E_VEC (K0_BASE+0x180) /* exception vector */
#define R_VEC (K1_BASE+0x1fc00000) /* reset vector */
#define REG_ADDR(X) PHYS_TO_K1(BCHP_PHYSICAL_OFFSET+(X))
#define K0_WRITE_THROUGH (0x0)
#define K0_WRITE_BACK (0x3)
#define K0_UNCACHED (0x2)
/************************************************************************
* flush API parameter definitions
************************************************************************/
#define FLUSH_CODE 0x00000000
#define FLUSH_DATA 0x00000004
#define FLUSH_WRITEBACK 0x00000002
#define FLUSH_NO_WRITEBACK 0x00000000
#define FLUSH_INVALIDATE 0x00000001
#define FLUSH_NO_INVALIDATE 0x00000000
#define cacheop(kva, size, linesize, op) \
.set noreorder ; \
addu t1, kva, size ; \
subu t2, linesize, 1 ; \
not t2 ; \
and t0, kva, t2 ; \
addiu t1, t1, -1 ; \
and t1, t2 ; \
9:
cache op, 0(t0)
; \
//cache , op:为cache类型+操作 ,0(t0):为寄存器t0的值加上偏移0
bne t0, t1, 9b ; \
addu t0, linesize ; \
.set reorder ;
#endif
CFE_15_3/bsp/mips_libs_4380.s
init_mips:
move t8, ra
bal cache_init
nop
CFE_15_3/bsp/cache_lib_4380.s
.global cache_init
.ent cache_init
cache_init:
.set noreorder
move t6, ra
#if (ENABLE_WEAK_ORDER_CONSISTENCY_MODEL == 1)
mfc0 t0, CP0_BRCM_CONFIG0 //#define CP0_BRCM_CONFIG0 $22
nop
nop
li t1, CP0_BRCM_CONFIG0_CM_MASK /*set weak order in consistency model */ //#define CP0_BRCM_CONFIG0_CM_MASK _MM_MAKEMASK1(16)
or t0, t0, t1
sync
mtc0 t0, CP0_BRCM_CONFIG0
sync
#endif
/*
* set delay value for dcache to 0x18.
*/
mfc0 t1, $22, 7
nop
li t4, 0xffffffc0
and t1, t4
li t4, 0x18
or t1, t4
mtc0 t1, $22, 7
nop
nop
bal size_i_cache
nop
/* move the result in s6,s7 */
move s4, v0
move s5, v1
bal size_d_cache
nop
/* move the result in s6,s7 */
move s6, v0
move s7, v1
/* Now s4 contains the i-cache size, s5 contains the i-cache line size
* s6 contains the d-cache size and s7 contains the d-cache line size.
*/
/* initialize TAGLO */
mtc0 zero, CP0_TAG_LO //#define CP0_TAG_LO $28
nop
nop
/* initialize DATALO */
mtc0 zero, CP0_TAG_LO, 1 // $28 1
nop
nop
/* Enable Caches before Clearing. If the caches are disabled
* then the cache operations to clear the cache will be ignored
*/
mfc0 t1, CP0_BRCM_CONFIG0 //#define CP0_BRCM_CONFIG0 $22
nop
nop
//ICE: i cache enable , DCE: data cache enable
li t2, (CP0_BRCM_CONFIG0_ICE_MASK | CP0_BRCM_CONFIG0_DCE_MASK) //#define CP0_BRCM_CONFIG0_ICE_MASK _MM_MAKEMASK1(31)
or t1, t1, t2 //#define CP0_BRCM_CONFIG0_DCE_MASK _MM_MAKEMASK1(30)
mtc0 t1, CP0_BRCM_CONFIG0 //#define CP0_BRCM_CONFIG0 $22
nop
nop
mfc0 t1, CP0_CONFIG //#define CP0_CONFIG $16
nop
nop
and t1, t1, ~CP0_CONFIG_K0_MASK //#define CP0_CONFIG_K0_MASK _MM_MAKEMASK(3,0)
or t1, t1, K0_CACHE_MODE //#define K0_CACHE_MODE (K0_WRITE_BACK)
mtc0 t1, CP0_CONFIG
nop //#define K0_WRITE_THROUGH (0x0)
nop // #define K0_WRITE_BACK (0x3)
//#define K0_UNCACHED (0x2)
/* initialise instruction cache. */
//Index_Store_Tag_I ,有效地址为0x80000000作为index,TAGLO和DATALO都初始化为0,这将初始化所以的指令cache为0
li a0, K0_BASE //#define K0_BASE 0x80000000
move a1, s4 /* i-cache size */
move a2, s5 /* i-cache line size */
cacheop
(a0, a1, a2, Index_Store_Tag_I)
/* initialize data cache */
li a0, K0_BASE
move a1, s6 /* d-cache size */
move a2, s7 /* d-cache line size */
cacheop(a0, a1, a2, Index_Store_Tag_D)
#if (ENABLE_BRANCH_PREDICTION == 1)
mfc0 t0, CP0_BRCM_CONFIG0 //#define CP0_BRCM_CONFIG0 $22
li t1, ~CP0_BRCM_CONFIG0_BHTD_MASK
//
#define CP0_BRCM_CONFIG0_BHTD_MASK _MM_MAKEMASK1(21)
and t0, t0, t1
mtc0 t0, CP0_BRCM_CONFIG0
nop
nop
#endif
jr t6
nop
.set reorder
.end cache_init
/******************************************************************************
* Function: size_i_cache
* Arguments: None
* Returns: v0, v1
* Description: v0 = i cache size, v1 = I cache line size
* Trashes: v0,v1,a0,a1, t7
*
* pseudo code:
*
******************************************************************************/
LEAF(
size_i_cache
)
.set noreorder
mfc0 a0, CP0_CONFIG1 //#define CP0_CONFIG1 $16,1
nop
nop
move t7, a0
/* Determine
sets per way
: IS
*
* This field contains the number of sets (i.e., indices) per way of
* the instruction cache:
* i) 0x0: 64, ii) 0x1: 128, iii) 0x2: 256, iv) 0x3: 512, v) 0x4: 1k
* vi) 0x5 - 0x7: Reserved.
*/
li a1, CP0_CONFIG1_IS_MASK //#define CP0_CONFIG1_IS_MASK _MM_MAKEMASK(3,22)
and a0, a0, a1
li a1, CP0_CONFIG1_IS_SHIFT //#define CP0_CONFIG1_IS_SHIFT (22)
srl a0, a0, a1
/* sets per way = (64<<IS) */
li v0, 0x40
sllv v0, v0, a0
/* determine line size
* This field contains the line size of the instruction cache:
* i) 0x0: No I-cache present, i) 0x3: 16 bytes, ii) 0x4: 32 bytes, iii)
* 0x5: 64 bytes, iv) the rest: Reserved.
*/
move a0, t7
li a1, CP0_CONFIG1_IL_MASK //#define CP0_CONFIG1_IL_MASK _MM_MAKEMASK(3,19)
and a0, a0, a1
li a1, CP0_CONFIG1_IL_SHIFT
//
#define CP0_CONFIG1_IL_SHIFT (19)
srl a0, a0, a1
beqz a0, no_i_cache
nop
/* line size = 2 ^ (IL+1) */
addi a0, a0, 1
li v1, 1
sll v1, v1, a0
/* v0 now have sets per way, multiply it by line size now
* that will give the set size
*/
sll v0, v0, a0
/* determine set associativity
* This field contains the set associativity of the instruction cache.
* i) 0x0: Direct mapped, ii) 0x1: 2-way, iii) 0x2: 3-way, iv) 0x3:
* 4-way, v) 0x4 - 0x7: Reserved.
*/
move a0, t7
li a1, CP0_CONFIG1_IA_MASK // #define CP0_CONFIG1_IA_MASK _MM_MAKEMASK(3,16)
and a0, a0, a1
li a1, CP0_CONFIG1_IA_SHIFT //#define CP0_CONFIG1_IA_SHIFT (16)
srl a0, a0, a1
addi a0, a0, 0x1
/* v0 has the set size, multiply it by
* set associativiy, to get the cache size
*/
multu v0, a0 /*multu is interlocked, so no need to insert nops */
mflo v0
b 1f
nop
no_i_cache:
move v0, zero
move v1, zero
1:
jr ra
nop
.set reorder
END(size_i_cache)
/******************************************************************************
* Function: size_d_cache
* Arguments: None
* Returns: v0, v1
* Description: v0 = d cache size, v1 = d cache line size
* Trashes: v0,v1,a0,a1, t7
*
* pseudo code:
*
******************************************************************************/
LEAF(
size_d_cache
)
.set noreorder
mfc0 a0, $16, 1 //$16, 1
nop
nop
move t7, a0
/* Determine sets per way: IS
*
* This field contains the number of sets (i.e., indices) per way of
* the instruction cache:
* i) 0x0: 64, ii) 0x1: 128, iii) 0x2: 256, iv) 0x3: 512, v) 0x4: 1k
* vi) 0x5 - 0x7: Reserved.
*/
li a1, CP0_CONFIG1_DS_MASK
and a0, a0, a1
li a1, CP0_CONFIG1_DS_SHIFT
srl a0, a0, a1
/* sets per way = (64<<IS) */
li v0, 0x40
sllv v0, v0, a0
/* determine line size
* This field contains the line size of the instruction cache:
* i) 0x0: No I-cache present, i) 0x3: 16 bytes, ii) 0x4: 32 bytes, iii)
* 0x5: 64 bytes, iv) the rest: Reserved.
*/
move a0, t7
li a1, CP0_CONFIG1_DL_MASK
and a0, a0, a1
li a1, CP0_CONFIG1_DL_SHIFT
srl a0, a0, a1
beqz a0, no_d_cache
nop
/* line size = 2 ^ (IL+1) */
addi a0, a0, 1
li v1, 1
sll v1, v1, a0
/* v0 now have sets per way, multiply it by line size now
* that will give the set size
*/
sll v0, v0, a0
/* determine set associativity
* This field contains the set associativity of the instruction cache.
* i) 0x0: Direct mapped, ii) 0x1: 2-way, iii) 0x2: 3-way, iv) 0x3:
* 4-way, v) 0x4 - 0x7: Reserved.
*/
move a0, t7
li a1, CP0_CONFIG1_DA_MASK
and a0, a0, a1
li a1, CP0_CONFIG1_DA_SHIFT
srl a0, a0, a1
addi a0, a0, 0x1
/* v0 has the set size, multiply it by
* set associativiy, to get the cache size
*/
multu v0, a0 /*multu is interlocked, so no need to insert nops */
mflo v0
b 1f
nop
no_d_cache:
move v0, zero
move v1, zero
1:
jr ra
nop
.set reorder
END(size_d_cache)
/******************************************************************************
* Function: init_i_cache (legacy)
* Arguments: None
* Returns: None
* Description:
* Trashes: a0, a1, a2,v0,v1,t0, t1, t2, t7, s0, s4, s5, s6, s7
*
* pseudo code:
*
******************************************************************************/
.global init_i_cache
.ent init_i_cache
init_i_cache:
.set noreorder
mfc0 t0, $16,1 /*.word 0x40088001 */
nop
nop
/* Read the config1 reg for cache size*/
and t1, t0, 0x380000 /* [21:19] IL: line size of i-cache.*/
/* t1=0x280000 (4380) */
srl t1, t1, 19 /* i-cache line-size constant is right-most 3 bits*/
/* t1=0x5 (4380)*/
addi t1, 1 /* add 1 to i-cache line-size constant */
/* (not actually line-size value, 4380=>t1=5+1)*/
/* t1=0x6 (4380)*/
li t4, 0x40 /* shift 0x40 by the line-size constant +1.*/
sllv t4, t4, t1 /* t4=0x1000 (for 64 bytes line-size) (4380)*/
li t7, 0x1
sllv t7, t7, t1 /* t7=0x40 (for 64 bytes line-size) (4380)*/
and t1, t0, 0x1C00000 /* [24:22] IS: sets per way*/
srl t1, t1, 22 /* IS is right-most 3 bits */
/* t1=0x2 (4380)*/
sllv t3, t4, t1 /* t3=0x4000 (4380:shift left 2 for 256 sets)*/
and t1, t0, 0x20000 /* [17] IA: set associativity */
srl t1, t1, 17 /* t1=0x0 (4380) */
sllv t3, t3, t1 /* t3=total num of tags for Icache*/
/* t3=0x4000 (4380)*/
la t0, CacheableICacheLoop
and t0, 0xDFFFFFFF
or t4, t0, $0
srl t1, t3, 1
add t5, t4, t1 /* t5=addr(CacheableICacheLoop)+2000, +8K*/
/* Initialise the TagLo and DataLo registers */
mtc0 $0, $28
nop
nop
mtc0 $0, $28, 1 /*.word 0x4080e001 */
nop
nop
/* pre-load the code */
cache 0x8,0x0(t4)
cache 0x8,0x10(t4)
cache 0x8,0x20(t4)
cache 0x8,0x30(t4)
cache 0x8,0x40(t4)
cache 0x8,0x50(t4)
cache 0x8,0x0(t5)
cache 0x8,0x10(t5)
cache 0x8,0x20(t5)
cache 0x8,0x30(t5)
cache 0x8,0x40(t5)
cache 0x8,0x50(t5)
add t5, t5, t1 /* t5=t5+8K*/
cache 0x8,0x0(t5)
cache 0x8,0x10(t5)
cache 0x8,0x20(t5)
cache 0x8,0x30(t5)
cache 0x8,0x40(t5)
cache 0x8,0x50(t5)
add t5, t5, t1 /* t5=t5+8K*/
cache 0x8,0x0(t5)
cache 0x8,0x10(t5)
cache 0x8,0x20(t5)
cache 0x8,0x30(t5)
cache 0x8,0x40(t5)
cache 0x8,0x50(t5)
add t5, t4, t3 /* t5=addr()+4000, +16KB */
or t6, t7, t7 /* t6=line size */
/* t3 -> icache set size
* t4 -> beginning address
* t5 -> end address (for set 0)
* t6 -> line-size
*/
add t7, t5, t3 /* upto this address (for all the sets)*/
/* t7=addr()+4000+4000, +32KB*/
/* j t0*/
/* nop*/
CacheableICacheLoop:
cache 0x8, 0x0(t4) /* invalidate set 0 (icache)*/
cache 0x8, 0x0(t5) /* invalidate set 1 (icache)*/
add t5, t5, t6 /* next I-cache line (set 1, I-cache)*/
bne t5, t7, CacheableICacheLoop
add t4, t4, t6 /* next I-cache line (set 0, I-cache)*/
#if 0
move s0, ra
bal size_i_cache
nop
/* initialize TAGLO */
mtc0 zero, CP0_TAG_LO
nop
nop
/* initialize DATALO */
mtc0 zero, CP0_TAG_LO, 1
nop
nop
/* initialise instruction cache. */
li a0, K0_BASE
move a1, v0 /* i-cache size */
move a2, v1 /* i-cache line size */
cacheop(a0, a1, a2, Index_Store_Tag_I)
move ra, s0
#endif
jr ra
nop
.set reorder
.end init_i_cache
/******************************************************************************
* Function: invalidate_all_i
* Arguments: None
* Returns: None
* Description: Invalidates(Index_Invalidate_I) complete i-cache
* Trashes: t0, t1
*
* pseudo code:
*
******************************************************************************/
.globl invalidate_all_i_cache
.ent invalidate_all_i_cache
invalidate_all_i_cache:
.set noreorder
move t6, ra
bal size_i_cache
nop
li a0, K0_BASE
cacheop(a0, v0, v1, Index_Invalidate_I)
jr t6
nop
.set reorder
.end invalidate_all_i_cache
/******************************************************************************
* Function: clear_all_d
* Arguments: None
* Returns: None
* Description: Clears(Index_Writeback_Inv_D) D-cache
* Trashes: t0, t1
*
* pseudo code:
*
******************************************************************************/
.globl clear_all_d_cache
.ent clear_all_d_cache
clear_all_d_cache:
.set noreorder
move t6, ra
bal size_d_cache
nop
li a0, K0_BASE
cacheop(a0, v0, v1,
Index_Writeback_Inv_D
)
jr t6
nop
.set reorder
.end clear_all_d_cache
/******************************************************************************
* Function: invalidate_d_cache
* Arguments: a0: start address, a1: size in bytes
* Returns:
* Description: Hit Invalidates lines starting from a0.
* Trashes:
*
* pseudo code:
*
******************************************************************************/
.globl invalidate_d_cache
.ent invalidate_d_cache
invalidate_d_cache:
.set noreorder
/* if the size is zero just return */
beqz a1, 3f
nop
move t4, ra
move t6, a0
move t5, a1
bal size_d_cache
nop
#if DEBUG_CACHE_LIB == 1
/* check for alignment */
addiu t0, v1, -1 /* get the line size mask in t0 */
and t1, t6, t0 /* and it with the address */
beqz t1, 2f
nop
li a0, 'U'
bal _writeasm
nop
1:
b 1b
nop
#endif
2:
move a0, t6
move a1, t5
cacheop(a0, a1, v1,
Hit_Invalidate_D
)
move ra, t4
3:
jr ra
nop
.set reorder
.end invalidate_d_cache
/******************************************************************************
* Function: clear_d_cache
* Arguments: a0: start address, a1: size in bytes
* Returns:
* Description: Clears (Hit-Writeback-invalidate) an address range of the primary data cache
* Trashes:
*
* pseudo code:
*
******************************************************************************/
.globl clear_d_cache
.ent clear_d_cache
clear_d_cache:
.set noreorder
/* if the size is zero just return */
beqz a1, 3f
nop
move t4, ra
move t6, a0
move t5, a1
bal size_d_cache
nop
#if DEBUG_CACHE_LIB == 1
/* check for alignment */
addiu t0, v1, -1 /* get the line size mask in t0 */
and t1, t6, t0 /* and it with the address */
beqz t1, 2f
nop
li a0, 'U'
bal _writeasm
nop
1:
b 1b
nop
#endif
2:
move a0, t6
move a1, t5
cacheop(a0, a1, v1, Hit_Writeback_Inv_D)
move ra, t4
3:
jr ra
nop
.set reorder
.end clear_d_cache
/******************************************************************************
* Function:
flush_d_cache
:Flushes (Hit-Writeback) an address range of the primary data cache
* Arguments: a0: address to flush, a1: number of bytes
* Returns:
* Description:
* Trashes:
*
* pseudo code:
*
******************************************************************************/
.globl flush_d_cache
.ent flush_d_cache
flush_d_cache:
.set noreorder
/* if the size is zero just return */
beqz a1, 3f
nop
move t4, ra
move t6, a0
move t5, a1
bal size_d_cache
nop
#if DEBUG_CACHE_LIB == 1
/* check for alignment */
addiu t0, v1, -1 /* get the line size mask in t0 */
and t1, t6, t0 /* and it with the address */
beqz t1, 2f
nop
li a0, 'U'
bal _writeasm
nop
1:
b 1b
nop
#endif
2:
move a0, t6
move a1, t5
cacheop(a0, a1, v1, Hit_Writeback_D)
move ra, t4
3:
jr ra
nop
.set reorder
.end flush_d_cache
/******************************************************************************
* Function: disable_cache()
* Arguments: None
* Returns: None
* Description:
* Trashes: t1, s0
*
* pseudo code:
*
******************************************************************************/
.global disable_cache //kseg0 的访问不通过cache
.ent disable_cache
disable_cache:
.set noreorder
mfc0 t1, CP0_CONFIG //#define CP0_CONFIG $16
nop
nop
and t1, t1, ~CP0_CONFIG_K0_MASK //#define CP0_CONFIG_K0_MASK _MM_MAKEMASK(3,0)
or t1, t1, K0_UNCACHED //#define K0_UNCACHED (0x2)
mtc0 t1, CP0_CONFIG //#define CP0_CONFIG $16
nop
nop
jr ra
nop
.set reorder
.end disable_cache
三 L2 Cache操作
L2_CONFIG: Level 2 Cache, 表示二级cache
LEAF(
set_L2
)
.set noreorder
li v0, REG_ADDR(BCHP_BMIPS4380_L2_CONFIG) //#define BCHP_BMIPS4380_L2_CONFIG 0x01f0000c /* L2 CONFIGURATION1 REGISTER */
lw v1, 0(v0)
bnez a0, enable_L2
nop
li a1, ~(BCHP_BMIPS4380_L2_CONFIG_L2E_MASK) //#define BCHP_BMIPS4380_L2_CONFIG_L2E_MASK 0xf0000000
and v1, v1, a1
b 1f
nop
enable_L2:
li a1, (
CP0_L2_CONFIG_L2E_DCACHE_MISS_LOOKUP
|
CP0_L2_CONFIG_L2E_ICACHE_MISS_LOOKUP_TP0
|
CP0_L2_CONFIG_L2E_ICACHE_MISS_LOOKUP_TP1
) //#define CP0_L2_CONFIG_L2E_DCACHE_MISS_LOOKUP 0x40000000
//#define CP0_L2_CONFIG_L2E_ICACHE_MISS_LOOKUP_TP0 0x20000000
// #define CP0_L2_CONFIG_L2E_ICACHE_MISS_LOOKUP_TP1 0x10000000
or v1, v1, a1
1: sw v1, 0(v0)
jr ra
nop
.set reorder
END(set_L2)
四 RAC 操作
RAC:
read ahead cache, cache中的一种,首次访问RAM时,从RAM中读取,相应的一份拷贝放在RAC中,下次读取相同的地址单元时直接从RAC中读取而不是从RAM中,但是写RAM时,RAC中无效,直接写到RAM中
LEAF(
init_rac
)
.set noreorder
/* first set the address range, this applies to both TP0 and TP1 */
li v0, REG_ADDR(
BCHP_BMIPS4380_RAC_ADDR_RANGE
) //#define BCHP_BMIPS4380_RAC_ADDR_RANGE 0x01f00004 /* ADDRESS RANGE REGISTER */
lw a0, 0(v0)
li a1, ~(BCHP_BMIPS4380_RAC_ADDR_RANGE_UPB_MASK) //#define BCHP_BMIPS4380_RAC_ADDR_RANGE_UPB_MASK 0xffff0000
and a0, a0, a1
li a1, ~(BCHP_BMIPS4380_RAC_ADDR_RANGE_LWB_MASK) //#define BCHP_BMIPS4380_RAC_ADDR_RANGE_LWB_MASK 0x0000ffff
and a0, a0, a1
li a1, UPPER_BOUND_ADDRESS //#define UPPER_BOUND_ADDRESS (0x8FFF)
li v1, BCHP_BMIPS4380_RAC_ADDR_RANGE_UPB_SHIFT //#define BCHP_BMIPS4380_RAC_ADDR_RANGE_UPB_SHIFT 16
sll a1, a1, v1
or a0, a0, a1
li a1, LOWER_BOUND_ADDRESS //#define LOWER_BOUND_ADDRESS (0x0000)
li v1, BCHP_BMIPS4380_RAC_ADDR_RANGE_LWB_SHIFT //#define BCHP_BMIPS4380_RAC_ADDR_RANGE_LWB_SHIFT 0
sll a1, a1, v1
or a0, a0, a1
sw a0, 0(v0)
//设置ram的地址范围
/* Enable D and I RAC for TP0 */
li v0, REG_ADDR(
BCHP_BMIPS4380_RAC_CONFIG
) //#define BCHP_BMIPS4380_RAC_CONFIG 0x01f00000 /* RAC CONFIGURATION REGISTER */
lw a0, 0(v0)
li a1, (BCHP_BMIPS4380_RAC_CONFIG_RAC_I_MASK | BCHP_BMIPS4380_RAC_CONFIG_RAC_D_MASK )
//
#define BCHP_BMIPS4380_RAC_CONFIG_RAC_I_MASK 0x00000001
//
#define BCHP_BMIPS4380_RAC_CONFIG_RAC_D_MASK 0x00000002
or a0, a0, a1
/* enable prefetching for TP0 */
li a1, (BCHP_BMIPS4380_RAC_CONFIG_PF_I_MASK | BCHP_BMIPS4380_RAC_CONFIG_PF_D_MASK ) //#define BCHP_BMIPS4380_RAC_CONFIG_PF_I_MASK 0x00000004
//#define BCHP_BMIPS4380_RAC_CONFIG_PF_D_MASK 0x00000008
or a0, a0, a1
/* clear NCH field
* if NCH = 1, RAC is not flushed on the execution of CACHE invalidate instructions
* else RAC is flushed
*/
li a1, ~(BCHP_BMIPS4380_RAC_CONFIG_NCH_MASK) //#define BCHP_BMIPS4380_RAC_CONFIG_NCH_MASK 0x00000020
and a0, a0, a1
sw a0, 0(v0)
/* Enable D and I RAC for TP1 */
li v0, REG_ADDR(
BCHP_BMIPS4380_RAC_CONFIG1
) //#define BCHP_BMIPS4380_RAC_CONFIG1 0x01f00008 /* RAC CONFIGURATION1 REGISTER */
lw a0, 0(v0)
li a1, (BCHP_BMIPS4380_RAC_CONFIG1_RAC_I_MASK | BCHP_BMIPS4380_RAC_CONFIG1_RAC_D_MASK | BCHP_BMIPS4380_RAC_CONFIG1_PF_I_MASK | BCHP_BMIPS4380_RAC_CONFIG1_PF_D_MASK)
//#define BCHP_BMIPS4380_RAC_CONFIG1_RAC_I_MASK 0x00000001
//#define BCHP_BMIPS4380_RAC_CONFIG1_RAC_D_MASK 0x00000002
//#define BCHP_BMIPS4380_RAC_CONFIG1_PF_I_MASK 0x00000004
or a0, a0, a1
sw a0, 0(v0)
/* flush RAC */
li v0, REG_ADDR(
BCHP_BMIPS4380_RAC_CONFIG
) //#define BCHP_BMIPS4380_RAC_CONFIG 0x01f00000 /* RAC CONFIGURATION REGISTER */
lw a0, 0(v0)
li a1, BCHP_BMIPS4380_RAC_CONFIG_FLH_MASK
//
#define BCHP_BMIPS4380_RAC_CONFIG_FLH_MASK 0x00000100
or a0, a0, a1
sw a0, 0(v0)
/* set C_INV bit
* If C_INV=1, CPU I-CACHE invalidate instruction will only flush
* all I-blocks and CPU D-CACHE invalidate instruction will flush all the D-blocks.
* If C_INV='0'b, any CACHE invalidate instruction will flush the entire RAC.
*/
li v0, REG_ADDR(BCHP_BMIPS4380_RAC_CONFIG) //#define BCHP_BMIPS4380_RAC_CONFIG 0x01f00000 /* RAC CONFIGURATION REGISTER */
lw a0, 0(v0)
li a1, BCHP_BMIPS4380_RAC_CONFIG_C_INV_MASK //#define BCHP_BMIPS4380_RAC_CONFIG_C_INV_MASK 0x00000010
or a0, a0, a1
sw a0, 0(v0)
jr ra
nop
.set reorder
END(init_rac)
mips3300的初始化:
/* set the rac range */
li t0, BRCM_RAC_ADDRESS_RANGE
/* for 128MB of RAM set the RAC address ranch to 0x07ff0000 */
li t1, 0x4fff0000
sw t1, 0x0(t0)
BCHP_BMIPS4380_RAC_ADDR_RANGE:
配置RAC的地址范围,TP0,TP1共用
BCHP_BMIPS4380_RAC_CONFIG:
配置TP0 RAC
BCHP_BMIPS4380_RAC_CONFIG1:
配置TP1 RAC
RAC默认的linesize 为256 Byte
五 在DDR在没有初始化之前,如何跑c程序呢
#define SHMOO_STACK_SIZE (32 * 1024)
#define SHMOO_STACK_START (DCACHE_ADDR_START) //#define DCACHE_ADDR_START 0x80000000
li a0, SHMOO_STACK_START
li a1, SHMOO_STACK_SIZE
bal validate_dcache_data
nop
/* now initialize sp to stack start */
la sp, (SHMOO_STACK_START + SHMOO_STACK_SIZE - 24)
.global validate_dcache_data
.ent validate_dcache_data
validate_dcache_data:
.set noreorder
/* convert KSEG0 address to physical address */
li t0, 0x1FFFFFFF
and a0, a0, t0
/* va contains the line size */
move a2, v1
li a2, DCACHE_LINE_SIZE //#define DCACHE_LINE_SIZE 64
li a3, CP0_TAG_LO_PA_MASK //#define CP0_TAG_LO_PA_MASK _MM_MAKEMASK(20,12) 高20位
li v0, ~CP0_TAG_LO_Dirty_MASK //#define CP0_TAG_LO_Dirty_MASK _MM_MAKEMASK1(7)
addu t1, a0, a1
subu t2, a2, 1
not t2
and t0, a0, t2
addiu t1, t1, -1
and t1, t2
9:
/* prepare the tag */
and v1, t0, a3 /* extract the phys address from t0 to v1*/
and v1, v1, v0 /* clear the dirty bit */
ori v1, v1, (TAG_VALID) /* make the line valid and locked */ #define TAG_VALID (1 << 6)
mtc0 v1, CP0_TAG_LO /* store it in TAG_LO */ //#define CP0_TAG_LO $28
nop
nop
cache Index_Store_Tag_D, 0(t0)
bne t0, t1, 9b
addu t0, a2
.set reorder
jr ra
nop
.end validate_dcache_data
通过cache,这样sp实际访问的就是cache了,而不会去访问DDR,当然了这样的提前是必须writeback,且访问有限范围的内存空间,保证cache不会被替换
/******************************************************************************
* Function: fill_I_cache_lines
*
* Desrciption: Fills the I cache lines with the code from flash, also fill the
* cache tag with the physical address of the code.
*
* Arguments:
* a0: Address to fetch code from (flash address)
* a1: Addrss to fill the cache tag (Must be kseg0 address)
* a2: Number of Bytes to copy.
*
*
*****************************************************************************/
#ifdef DEF_NEW_TRGT
LEAF(fill_I_cache_lines)
.set noreorder
/* convert KSEG0 address to physical address */
li t0, 0x1FFFFFFF
and a1, a1, t0
/* extract the physical address and prepare the tag */
li t0, TAG_PA_MASK //#define TAG_PA_MASK 0xfffff000
li t2, ~TAG_DIRTY //#define TAG_DIRTY (1 << 7)
li t3, ICACHE_INDEX_MASK //#define ICACHE_INDEX_MASK (ICACHE_SIZE - 1)
1:
and t1, a1, t0
/* now t1 has the physical address, which is needed
* to be stored in TAG
*/
/* Calculate index and store it in t4 */
and t4, a1, t3 //注意索引的计算,索引的值不超过cache的大小
/* set line to be valid and locked */
or t1 ,(TAG_VALID) //#define TAG_VALID (1 << 6)
/* clear the dirty bit */
and t1, t1, t2
/* store t1 to TAGLO */
mtc0 t1, CP0_TAG_LO
nop
nop
/* now the tag is ready, fill the line from flash */
li v1, ICACHE_LINE_SIZE
2:
lw v0, 0(a0)
add v0, v0, zero
/* move v0 to DATALO */
.word 0x4082e001 # mtc0 v0,C0_TAGLO,1 => mtc0 v0,C0_DATALO
nop
nop # paranoia
/* use index store operation to store the TAGLO and DATALO to cache */
.set push
.set mips3
cache Index_Store_Tag_I, 0(t4)
.set pop
nop
nop
mtc0 zero, CP0_TAG_LO
nop
nop
.set push
.set mips3
cache Index_Load_Tag_I, 0(t4) //需要重新load
.set pop
nop
nop
/* don't increment src address in a0 since reading from FIFO */
//addiu a0, 4
/* increment the index by 4 to access the next word */
addiu t4, 4 //索引值每次增加4
/* decrement word count */
addi v1, -4
bgtz v1, 2b
nop
/* increment dest addr in a1 */
addiu a1, ICACHE_LINE_SIZE
/* decrement the copy size in a2 */
addi a2, -ICACHE_LINE_SIZE
bgtz a2, 1b
nop
.set reorder
jr ra
nop
END(fill_I_cache_lines)
#endif
fill_I_cache_lines可以将代码拷贝到cache中,在DDR还没初始化之前,从cache中运行代码