linux cache管理---mips基础(一)

一  Bmips cache 概述
magnum/basemodules/mma/bmma.h

There are potentially three CPU caches you should be aware of:
        1)   L1 cache   - this is a write-back cache
        2)   L2 cache   - this is a write-back cache
        3)   RAC (read ahead cache)   - this is a read-only cache which are "read ahead" up to a 4K boundary

    BMMA_FlushCache_isrsafe will do a wback_invalidate of the L1 and L2 caches and an invalidate of the RAC.

    The following cache flush rules will allow software to maintain cache coherency.

    Rule 1: After hardware writes to RAM, you must flush the cache before the CPU reads from that memory. That is:

        1. Hardware writes to RAM
        2. Flush cache for that RAM
        3. CPU reads from that RAM

    Rule 2: After the CPU writes to RAM, you must flush the cache before Hardware does any access to that memory. That is:

        1. CPU writes to RAM
        2. Flush cache for that RAM
        3. Hardware reads from that RAM

二   L1  Cache操作
cache的介绍见4KcProgMan.pdf中chapter 7 caches的介绍:
Both caches are virtually indexed and physically tagged, allowing cache access to occur in
parallel with virtual-to-physical address translation.

也就是说虚拟地址作为index来决定sets,一个sets里面有多条way,然后物理地址来比较way中的tag,一条way就是一个cacheline


#if (BCHP_CHIP == 7344 || BCHP_CHIP == 7346 || BCHP_CHIP == 7425 || BCHP_CHIP == 7429 || BCHP_CHIP == 7435)
    /* DCACHE size 32K and line size is 32 for MIPS 5000. */
    #define DCACHE_SIZE           (32 * 1024)
    #define DCACHE_LINE_SIZE      32
#elif (BCHP_CHIP == 7228 || BCHP_CHIP == 7231 || BCHP_CHIP == 7362 || BCHP_CHIP == 7563 || BCHP_CHIP == 7584)
    /* DCACHE size 64K and line size is 64 for MIPS 4380. */
    #define DCACHE_SIZE           (64 * 1024)
    #define DCACHE_LINE_SIZE      64
#else
    /* DCACHE size 32K and line size is 16 for MIPS 3300. */
    #define DCACHE_SIZE           (32 * 1024)
    #define DCACHE_LINE_SIZE      16
#endif

#if (BCHP_CHIP == 7228 || BCHP_CHIP == 7231 || BCHP_CHIP == 7362 || BCHP_CHIP == 7563 || BCHP_CHIP == 7584)
#define ICACHE_SIZE                (32*1024)
#define ICACHE_LINE_SIZE        64
#define ICACHE_INDEX_MASK         (ICACHE_SIZE - 1)
#endif

#if (BCHP_CHIP == 7344 || BCHP_CHIP == 7346 || BCHP_CHIP == 7425 || BCHP_CHIP == 7429 || BCHP_CHIP == 7435)
    #define I_D_TAG_PA_MASK           0xfffff000
    #define D_TAG_LOCK              (1 << 3)
    #define D_TAG_EXCLUSIVE         (0x2)
    #define D_TAG_VALID             (1)
    #define D_TAG_PT                (2)
#elif (BCHP_CHIP == 7228 || BCHP_CHIP == 7231 || BCHP_CHIP == 7362 || BCHP_CHIP == 7563 || BCHP_CHIP == 7584)
    #define TAG_PA_MASK           0xfffff000
    #define TAG_LOCK              (1 << 5)
    #define TAG_VALID             (1 << 6)
    #define TAG_DIRTY             (1 << 7)
#else
    #define TAG_VALID             (1 << 6)
#endif



4-way cache组织方式如下:

set0              way0             tag     data
                    way1               tag      data
                    way2              tag      data
                    way4              tag      data

set1              way0              tag      data
                    way1               tag      data
                    way2               tag      data
                    way4              tag      data

......               .......

VA作为Index选择set,   4-way的方式PA为:

PA             22         2                           8
                 tag       way选择               offset



linesize 为4*4=16Byte,  Valid占用4bit,每一个bit对应一个word是否有效, L为lock标志位,1表示lock状态,  LRF为 least recently filled的缩写,用于替换策略

对cache的操作指令只有cache一个,具体的用法见 MIPS32® Architecture For Programmers
Volume II: The MIPS32® Instruction Set




有效地址为: base寄存器中的值+offset(16bit, signed),  根据当前操作码得到有效地址是虚拟地址,物理地址,还是索引值
CS为总的cache 大小,  A为way, BPT为linesize,则
CS/A       为sets数
log2(BPT)   为offset bit数
log2( CS/A )  为index bit数
log2(A)       为way bit 数

For index
operations ( where the address is used to index the cache but need not match the cache tag ) software should use
unmapped addresses to avoid TLB exceptions.



Bits [20:18] of the instruction specify the operation to perform. To provide software with a consistent base of cache
operations, certain encodings must be supported on all processors. The remaining encodings are recommended










具体字段的含义不同平台不同:
#if (BCHP_CHIP == 7344 || BCHP_CHIP == 7346 || BCHP_CHIP == 7425 || BCHP_CHIP == 7429 || BCHP_CHIP == 7435)
    #define I_D_TAG_PA_MASK           0xfffff000
    #define D_TAG_LOCK              (1 << 3)
    #define D_TAG_EXCLUSIVE         (0x2)
    #define D_TAG_VALID             (1)
    #define D_TAG_PT                (2)
#elif (BCHP_CHIP == 7228 || BCHP_CHIP == 7231 || BCHP_CHIP == 7362 || BCHP_CHIP == 7563 || BCHP_CHIP == 7584)
    #define TAG_PA_MASK           0xfffff000
    #define TAG_LOCK              (1 << 5)
    #define TAG_VALID             (1 << 6)
    #define TAG_DIRTY             (1 << 7)
#else
    #define TAG_VALID             (1 << 6)
#endif



CFE_15_3/bsp/bmips.h   全局定义
#define _MM_MAKEMASK1(n) (1 << (n))
#define _MM_MAKEMASK(v,n) (((1<<(v))-1) << (n))

/************************************************************************
* Primary Cache operations
************************************************************************/

#define Index_Invalidate_I               0x0         /* 0       0 */
#define Index_Writeback_Inv_D            0x1         /* 0       1 */
#define Index_Invalidate_SI              0x2         /* 0       2 */
#define Index_Invalidate_TC              0x2         /* 0       2 */
#define Index_Writeback_Inv_SD           0x3         /* 0       3 */
#define Index_Load_Tag_I                 0x4         /* 1       0 */
#define Index_Load_Tag_D                 0x5         /* 1       1 */
#define Index_Load_Tag_SI                0x6         /* 1       2 */
#define Index_Load_Tag_TC                0x6         /* 1       2 */
#define Index_Load_Tag_SD                0x7         /* 1       3 */
#define Index_Store_Tag_I                0x8         /* 2       0 */
#define Index_Store_Tag_D                0x9         /* 2       1 */
#define Index_Store_Tag_SI               0xA         /* 2       2 */
#define Index_Store_Tag_TC               0xA         /* 2       2 */
#define Index_Store_Tag_SD               0xB         /* 2       3 */
#define Create_Dirty_Exc_D               0xD         /* 3       1 */
#define Create_Dirty_Exc_SD              0xF         /* 3       3 */
#define Hit_Invalidate_I                 0x10        /* 4       0 */
#define Hit_Invalidate_D                 0x11        /* 4       1 */
#define Hit_Invalidate_SI                0x12        /* 4       2 */
#define Hit_Invalidate_TC                0x12        /* 4       2 */
#define Hit_Invalidate_SD                0x13        /* 4       3 */
#define Fill_I                           0x14        /* 5       0 */
#define Hit_Writeback_Inv_D              0x15        /* 5       1 */
#define Hit_Writeback_Inv_SD             0x17        /* 5       3 */
#define Hit_Writeback_I                  0x18        /* 6       0 */
#define Hit_Writeback_D                  0x19        /* 6       1 */
#define Hit_Writeback_SD                 0x1B        /* 6       3 */
#define Hit_Set_Virtual_SI               0x1E        /* 7       2 */
#define Hit_Set_Virtual_TC               0x1E        /* 7       2 */
#define Hit_Set_Virtual_SD               0x1F        /* 7       3 */

/************************************************************************
* KSEG Mapping Definitions and Macro's
************************************************************************/

#define K0_BASE          0x80000000
#define K0_SIZE          0x20000000
#define K1_BASE          0xa0000000
#define K1_SIZE          0x20000000
#define K2_BASE          0xc0000000

#ifndef PHYS_TO_K0
#define PHYS_TO_K0(x)   ((x) | 0x80000000)
#define PHYS_TO_K1(x)   ((x) | 0xa0000000)
#define K0_TO_PHYS(x)   ((x) & 0x1fffffff)
#define K1_TO_PHYS(x)   (K0_TO_PHYS(x))
#define K0_TO_K1(x)     ((x) | 0x20000000)
#define K1_TO_K0(x)     ((x) & 0xdfffffff)
#endif

#define    T_VEC        K0_BASE            /* tlbmiss vector */
#define    X_VEC        (K0_BASE+0x80)        /* xtlbmiss vector */
#define    C_VEC        (K1_BASE+0x100)        /* cache exception vector */
#define    E_VEC        (K0_BASE+0x180)        /* exception vector */
#define    R_VEC        (K1_BASE+0x1fc00000)    /* reset vector */

#define REG_ADDR(X)    PHYS_TO_K1(BCHP_PHYSICAL_OFFSET+(X))


#define K0_WRITE_THROUGH        (0x0)        
#define K0_WRITE_BACK            (0x3)
#define K0_UNCACHED                (0x2)

/************************************************************************
* flush API parameter definitions
************************************************************************/
#define FLUSH_CODE            0x00000000
#define FLUSH_DATA            0x00000004
#define    FLUSH_WRITEBACK            0x00000002
#define    FLUSH_NO_WRITEBACK        0x00000000
#define FLUSH_INVALIDATE        0x00000001
#define FLUSH_NO_INVALIDATE        0x00000000



#define cacheop(kva, size, linesize, op)     \
        .set noreorder                ;        \
        addu        t1, kva, size   ;        \
        subu        t2, linesize, 1 ;        \
        not            t2              ;        \
        and            t0, kva, t2     ;        \
        addiu        t1, t1, -1      ;        \
        and            t1, t2          ;        \
9:      cache        op, 0(t0)        ;        \                    //cache , op:为cache类型+操作 ,0(t0):为寄存器t0的值加上偏移0
        bne            t0, t1, 9b     ;        \
        addu        t0, linesize    ;        \
        .set reorder                ;        
        
#endif

CFE_15_3/bsp/mips_libs_4380.s
init_mips:
    
        move    t8, ra

    bal    cache_init
    nop

CFE_15_3/bsp/cache_lib_4380.s
.global    cache_init
    .ent    cache_init
cache_init:
    .set    noreorder

        move    t6, ra
        
#if (ENABLE_WEAK_ORDER_CONSISTENCY_MODEL == 1)
        
    mfc0    t0, CP0_BRCM_CONFIG0             //#define CP0_BRCM_CONFIG0    $22
        nop
        nop
        
        li    t1, CP0_BRCM_CONFIG0_CM_MASK    /*set weak order in consistency model */    //#define        CP0_BRCM_CONFIG0_CM_MASK                    _MM_MAKEMASK1(16)
        or    t0, t0, t1
        sync
        mtc0    t0, CP0_BRCM_CONFIG0
        sync
#endif

    /*
     * set delay value for dcache to 0x18.
     */
        mfc0    t1, $22, 7
        nop
        li      t4, 0xffffffc0
        and     t1, t4
        li      t4, 0x18
        or      t1, t4
        mtc0    t1, $22, 7
    nop
    nop

        bal    size_i_cache
        nop
        
        /* move the result in s6,s7  */
        move    s4, v0
        move    s5, v1
        
        bal    size_d_cache
        nop
        
        /* move the result in s6,s7  */
        move    s6, v0
        move    s7, v1
        
        /* Now s4 contains the i-cache size, s5 contains the i-cache line size
         * s6 contains the d-cache size and s7 contains the d-cache line size.
         */
        
        /* initialize TAGLO  */
        mtc0    zero, CP0_TAG_LO                //#define CP0_TAG_LO            $28
        nop
        nop
        
        /* initialize DATALO */
        mtc0    zero, CP0_TAG_LO, 1          //    $28   1
        nop
        nop
        
        /* Enable Caches before Clearing. If the caches are disabled
         * then the cache operations to clear the cache will be ignored
         */
         
        mfc0    t1, CP0_BRCM_CONFIG0      //#define CP0_BRCM_CONFIG0    $22
    nop
    nop
        //ICE:   i  cache enable ,      DCE:  data  cache enable
        li    t2, (CP0_BRCM_CONFIG0_ICE_MASK | CP0_BRCM_CONFIG0_DCE_MASK)                    //#define        CP0_BRCM_CONFIG0_ICE_MASK                    _MM_MAKEMASK1(31)
        or    t1, t1, t2                                                                                                                                    //#define        CP0_BRCM_CONFIG0_DCE_MASK                    _MM_MAKEMASK1(30)
        mtc0    t1, CP0_BRCM_CONFIG0               //#define CP0_BRCM_CONFIG0    $22
        nop
        nop
        
        mfc0    t1, CP0_CONFIG                            //#define CP0_CONFIG            $16
    nop
    nop
        
        and     t1, t1, ~CP0_CONFIG_K0_MASK            //#define        CP0_CONFIG_K0_MASK                            _MM_MAKEMASK(3,0)
        or      t1, t1, K0_CACHE_MODE                           //#define K0_CACHE_MODE            (K0_WRITE_BACK)
        mtc0    t1, CP0_CONFIG
    nop                                                                                   //#define K0_WRITE_THROUGH        (0x0)        
    nop                                                                                  // #define K0_WRITE_BACK            (0x3)
                                                                                              //#define K0_UNCACHED                (0x2)
            
        
         /* initialise  instruction cache. */     //Index_Store_Tag_I ,有效地址为0x80000000作为index,TAGLO和DATALO都初始化为0,这将初始化所以的指令cache为0
        li      a0, K0_BASE                                                           //#define K0_BASE          0x80000000
        move    a1, s4        /* i-cache size */
        move    a2, s5        /* i-cache line size */
         cacheop (a0, a1, a2, Index_Store_Tag_I)
        
        /* initialize data cache */
        
        li      a0, K0_BASE
        move    a1, s6        /* d-cache size */
        move    a2, s7        /* d-cache line size */
        cacheop(a0, a1, a2, Index_Store_Tag_D)
        
        
        
#if (ENABLE_BRANCH_PREDICTION == 1)
    mfc0    t0, CP0_BRCM_CONFIG0     //#define CP0_BRCM_CONFIG0    $22
    li    t1, ~CP0_BRCM_CONFIG0_BHTD_MASK             //   #define        CP0_BRCM_CONFIG0_BHTD_MASK                    _MM_MAKEMASK1(21)
        and     t0, t0, t1
        mtc0    t0, CP0_BRCM_CONFIG0
        nop
        nop
#endif
         
        jr    t6
        nop
    .set    reorder
    .end cache_init

/******************************************************************************
* Function: size_i_cache
* Arguments:     None
* Returns:    v0, v1
* Description: v0 = i cache size, v1 = I cache line size
* Trashes:    v0,v1,a0,a1, t7
*
*    pseudo code:
*    
******************************************************************************/
LEAF( size_i_cache )

    .set    noreorder
    mfc0    a0, CP0_CONFIG1        //#define CP0_CONFIG1            $16,1
        nop
        nop
        
        move    t7, a0

         /* Determine   sets per way : IS
         *
         * This field contains the number of sets (i.e., indices) per way of
     * the instruction cache:
     * i) 0x0: 64, ii) 0x1: 128, iii) 0x2: 256, iv) 0x3: 512, v) 0x4: 1k
     * vi) 0x5 - 0x7: Reserved.
     */
        
        li      a1, CP0_CONFIG1_IS_MASK                  //#define        CP0_CONFIG1_IS_MASK                            _MM_MAKEMASK(3,22)

        and     a0, a0, a1
        li    a1, CP0_CONFIG1_IS_SHIFT                  //#define        CP0_CONFIG1_IS_SHIFT                         (22)
        srl     a0, a0, a1
        
        /* sets per way = (64<<IS) */
        
        li    v0, 0x40     
        sllv    v0, v0, a0                 
        
        /* determine line size
         * This field contains the line size of the instruction cache:
     * i) 0x0: No I-cache present, i) 0x3: 16 bytes, ii) 0x4: 32 bytes, iii)
     * 0x5: 64 bytes, iv) the rest: Reserved.
     */
        
        move    a0, t7
        
        li    a1, CP0_CONFIG1_IL_MASK        //#define        CP0_CONFIG1_IL_MASK                            _MM_MAKEMASK(3,19)
        and    a0, a0, a1 
        li    a1, CP0_CONFIG1_IL_SHIFT         //   #define        CP0_CONFIG1_IL_SHIFT                         (19)
        srl    a0, a0, a1    

        beqz    a0, no_i_cache   
        nop  

        /* line size = 2 ^ (IL+1) */
        
        addi    a0, a0, 1
        li    v1, 1
        sll    v1, v1, a0
         
        /* v0 now have sets per way, multiply it by line size now
         * that will give the set size
         */
         
         sll    v0, v0, a0
        
        /* determine set associativity
         * This field contains the set associativity of the instruction cache.
     * i) 0x0: Direct mapped, ii) 0x1: 2-way, iii) 0x2: 3-way, iv) 0x3:
     * 4-way, v) 0x4 - 0x7: Reserved.
     */
        
    move    a0, t7
         
    li    a1, CP0_CONFIG1_IA_MASK                // #define        CP0_CONFIG1_IA_MASK                            _MM_MAKEMASK(3,16)
    and    a0, a0, a1
    li    a1, CP0_CONFIG1_IA_SHIFT                //#define        CP0_CONFIG1_IA_SHIFT                         (16)
    srl    a0, a0, a1
    addi    a0, a0, 0x1
       
        /* v0 has the set size, multiply it by
         * set associativiy, to get the cache size
         */
        
    multu    v0, a0    /*multu is interlocked, so no need to insert nops */                    
        mflo    v0    
         b    1f
         nop
        
no_i_cache:
        move    v0, zero                  
        move    v1, zero            
        
1:
        jr       ra
        nop
    .set    reorder
       
END(size_i_cache)    

/******************************************************************************
* Function: size_d_cache
* Arguments:     None
* Returns:    v0, v1
* Description: v0 = d cache size, v1 = d cache line size
* Trashes:    v0,v1,a0,a1, t7
*
*    pseudo code:
*    
******************************************************************************/
LEAF( size_d_cache )

    .set    noreorder

    mfc0    a0, $16, 1         //$16, 1
        nop
        nop
        
        move    t7, a0

        /* Determine sets per way: IS
         *
         * This field contains the number of sets (i.e., indices) per way of
     * the instruction cache:
     * i) 0x0: 64, ii) 0x1: 128, iii) 0x2: 256, iv) 0x3: 512, v) 0x4: 1k
     * vi) 0x5 - 0x7: Reserved.
     */
        
        li      a1, CP0_CONFIG1_DS_MASK                  
        and     a0, a0, a1
        li    a1, CP0_CONFIG1_DS_SHIFT                  
        srl     a0, a0, a1
        
        /* sets per way = (64<<IS) */
        
        li    v0, 0x40     
        sllv    v0, v0, a0                 
        
        /* determine line size
         * This field contains the line size of the instruction cache:
     * i) 0x0: No I-cache present, i) 0x3: 16 bytes, ii) 0x4: 32 bytes, iii)
     * 0x5: 64 bytes, iv) the rest: Reserved.
     */
        
        move    a0, t7

        
        li    a1, CP0_CONFIG1_DL_MASK     
        and    a0, a0, a1
        li    a1, CP0_CONFIG1_DL_SHIFT
        srl    a0, a0, a1    

        beqz    a0, no_d_cache   
        nop  
                                     

        /* line size = 2 ^ (IL+1) */
        
        addi    a0, a0, 1
        li    v1, 1
        sll    v1, v1, a0
        
        
        /* v0 now have sets per way, multiply it by line size now
         * that will give the set size
         */
         
         sll    v0, v0, a0
        
        /* determine set associativity
         * This field contains the set associativity of the instruction cache.
     * i) 0x0: Direct mapped, ii) 0x1: 2-way, iii) 0x2: 3-way, iv) 0x3:
     * 4-way, v) 0x4 - 0x7: Reserved.
     */
        
    move    a0, t7
         
    li    a1, CP0_CONFIG1_DA_MASK
    and    a0, a0, a1
    li    a1, CP0_CONFIG1_DA_SHIFT
    srl    a0, a0, a1
    addi    a0, a0, 0x1
       
        /* v0 has the set size, multiply it by
         * set associativiy, to get the cache size
         */
        
    multu    v0, a0    /*multu is interlocked, so no need to insert nops */                    
        mflo    v0    
        
        b    1f
        nop

no_d_cache:
        move    v0, zero                  
        move    v1, zero    
        
1:        
        jr       ra
        nop
    .set    reorder
       
END(size_d_cache)        

/******************************************************************************
* Function: init_i_cache     (legacy)
* Arguments:     None
* Returns:    None
* Description:
* Trashes:    a0, a1, a2,v0,v1,t0, t1, t2, t7, s0, s4, s5, s6, s7
*
*    pseudo code:
*    
******************************************************************************/
    .global init_i_cache
    .ent    init_i_cache
init_i_cache:

    .set    noreorder

    mfc0     t0, $16,1            /*.word    0x40088001  */
    nop
    nop

         /* Read the config1 reg for cache size*/
    
        and    t1, t0, 0x380000     /* [21:19] IL: line size of i-cache.*/
                         /*   t1=0x280000 (4380) */
        srl    t1, t1, 19          /* i-cache line-size constant is right-most 3 bits*/
                         /*   t1=0x5 (4380)*/
    addi    t1, 1               /* add 1 to i-cache line-size constant */
                         /*   (not actually line-size value, 4380=>t1=5+1)*/
                         /*   t1=0x6 (4380)*/
    li    t4, 0x40          /* shift 0x40 by the line-size constant +1.*/
    sllv    t4, t4, t1        /*   t4=0x1000 (for 64 bytes line-size) (4380)*/
        li    t7, 0x1
    sllv    t7, t7, t1          /*   t7=0x40 (for 64 bytes line-size) (4380)*/
        and    t1, t0, 0x1C00000       /* [24:22] IS: sets per way*/
        srl    t1, t1, 22         /* IS is right-most 3 bits */
                         /*   t1=0x2 (4380)*/
        sllv    t3, t4, t1          /*   t3=0x4000 (4380:shift left 2 for 256 sets)*/
        and    t1, t0, 0x20000      /* [17] IA: set associativity */
        srl    t1, t1, 17          /*   t1=0x0 (4380) */
        sllv    t3, t3, t1          /* t3=total num of tags for Icache*/
                         /*   t3=0x4000 (4380)*/
        la      t0, CacheableICacheLoop
        and     t0, 0xDFFFFFFF
        or      t4, t0, $0  
        srl    t1, t3, 1
        add     t5, t4, t1          /* t5=addr(CacheableICacheLoop)+2000, +8K*/
        
    /* Initialise the TagLo and DataLo registers */
    mtc0    $0, $28
    nop
    nop
    mtc0     $0, $28, 1        /*.word 0x4080e001  */
    nop
    nop
    /* pre-load the code */
        cache   0x8,0x0(t4)
        cache   0x8,0x10(t4)
        cache   0x8,0x20(t4)
        cache   0x8,0x30(t4)
        cache   0x8,0x40(t4)
        cache   0x8,0x50(t4)
        cache   0x8,0x0(t5)
        cache   0x8,0x10(t5)
        cache   0x8,0x20(t5)
        cache   0x8,0x30(t5)
        cache   0x8,0x40(t5)
        cache   0x8,0x50(t5)
        add     t5, t5, t1              /* t5=t5+8K*/
        cache   0x8,0x0(t5)
        cache   0x8,0x10(t5)
        cache   0x8,0x20(t5)
        cache   0x8,0x30(t5)
        cache   0x8,0x40(t5)
        cache   0x8,0x50(t5)
        add     t5, t5, t1              /* t5=t5+8K*/
        cache   0x8,0x0(t5)
        cache   0x8,0x10(t5)
        cache   0x8,0x20(t5)
        cache   0x8,0x30(t5)
        cache   0x8,0x40(t5)
        cache   0x8,0x50(t5)
        add     t5, t4, t3          /* t5=addr()+4000, +16KB */
    or     t6, t7, t7          /* t6=line size */
    /* t3 -> icache set size
     * t4 -> beginning address
     * t5 -> end address (for set 0)
     * t6 -> line-size
     */
        add t7, t5, t3          /* upto this address (for all the sets)*/
                         /* t7=addr()+4000+4000, +32KB*/
                    /*        j  t0*/
                    /*        nop*/
CacheableICacheLoop:
        cache   0x8, 0x0(t4)            /* invalidate set 0 (icache)*/
        cache   0x8, 0x0(t5)            /* invalidate set 1 (icache)*/
        add     t5, t5, t6          /* next I-cache line  (set 1, I-cache)*/
        bne     t5, t7, CacheableICacheLoop
        add     t4, t4, t6          /* next I-cache line  (set 0, I-cache)*/

#if 0
    move    s0, ra
        
    bal    size_i_cache
    nop

     /* initialize TAGLO  */
        mtc0    zero, CP0_TAG_LO
        nop
        nop
        
        /* initialize DATALO */
        mtc0    zero, CP0_TAG_LO, 1
        nop
        nop
    
    
    /* initialise  instruction cache. */
        li      a0, K0_BASE
        move    a1, v0        /* i-cache size */
        move    a2, v1        /* i-cache line size */
        cacheop(a0, a1, a2, Index_Store_Tag_I)

    move    ra, s0    
#endif
    jr    ra
    nop
    .set reorder
    
    .end init_i_cache                

/******************************************************************************
* Function: invalidate_all_i
* Arguments:     None
* Returns:    None
* Description: Invalidates(Index_Invalidate_I) complete i-cache
* Trashes:    t0, t1
*
*    pseudo code:
*    
******************************************************************************/
    .globl    invalidate_all_i_cache
    .ent    invalidate_all_i_cache
invalidate_all_i_cache:
    .set    noreorder

    move    t6, ra
        
    bal    size_i_cache
    nop
        
    li    a0, K0_BASE
           cacheop(a0, v0, v1, Index_Invalidate_I)

        jr       t6
        nop
    .set    reorder        
        
    .end    invalidate_all_i_cache

/******************************************************************************
* Function: clear_all_d
* Arguments:     None
* Returns:    None
* Description: Clears(Index_Writeback_Inv_D) D-cache
* Trashes:    t0, t1
*
*    pseudo code:
*    
******************************************************************************/
    .globl    clear_all_d_cache
    .ent    clear_all_d_cache
clear_all_d_cache:

    .set    noreorder
    move    t6, ra
        
    bal    size_d_cache
    nop
        
        li      a0, K0_BASE
        cacheop(a0, v0, v1,   Index_Writeback_Inv_D )

        jr      t6
        nop
    .set    reorder
        
.end    clear_all_d_cache

/******************************************************************************
* Function: invalidate_d_cache
* Arguments:     a0: start address, a1: size in bytes
* Returns:        
* Description: Hit Invalidates lines starting from a0.
* Trashes:        
*
*    pseudo code:
*    
******************************************************************************/
    .globl    invalidate_d_cache
    .ent    invalidate_d_cache
invalidate_d_cache:
    .set    noreorder

    /* if the size is zero just return */
    beqz    a1, 3f
    nop

    move    t4, ra

    move    t6, a0
    move    t5, a1

    bal    size_d_cache
    nop        

#if DEBUG_CACHE_LIB == 1
    /* check for alignment */
    addiu    t0, v1, -1        /* get the line size mask in t0 */
    and    t1, t6, t0        /* and it with the address */
    beqz    t1, 2f
    nop
        
    li    a0, 'U'
    bal    _writeasm
    nop
1:        
    b    1b
    nop
#endif                
        
2:        
    move    a0, t6
    move    a1, t5
        
        cacheop(a0, a1, v1,   Hit_Invalidate_D )
        
    move    ra, t4        
3:
    jr      ra
        nop
    .set    reorder         
        
    .end    invalidate_d_cache

/******************************************************************************
* Function: clear_d_cache
* Arguments:     a0: start address, a1: size in bytes
* Returns:        
* Description: Clears (Hit-Writeback-invalidate) an address range of the primary data cache
* Trashes:        
*
*    pseudo code:
*    
******************************************************************************/
    .globl    clear_d_cache
    .ent    clear_d_cache
clear_d_cache:  
    .set    noreorder

    /* if the size is zero just return */
    beqz    a1, 3f
    nop
       
    move    t4, ra

    move    t6, a0
    move    t5, a1

    bal    size_d_cache
    nop        

#if DEBUG_CACHE_LIB == 1
    /* check for alignment */
    addiu    t0, v1, -1        /* get the line size mask in t0 */
    and    t1, t6, t0        /* and it with the address */
    beqz    t1, 2f
    nop
        
    li    a0, 'U'
    bal    _writeasm
    nop
1:        
    b    1b
    nop
#endif                
2:        
    move    a0, t6
    move    a1, t5
        
        cacheop(a0, a1, v1, Hit_Writeback_Inv_D)
        
    move    ra, t4        
3:        
    jr      ra
        nop
    .set    reorder
        
    .end    clear_d_cache

/******************************************************************************
* Function:   flush_d_cache :Flushes (Hit-Writeback) an address range of the primary data cache
* Arguments:     a0: address to flush, a1: number of bytes
* Returns:        
* Description:
* Trashes:        
*
*    pseudo code:
*    
******************************************************************************/
    .globl    flush_d_cache
    .ent    flush_d_cache
flush_d_cache:  
    .set    noreorder

    /* if the size is zero just return */
    beqz    a1, 3f
    nop


    move    t4, ra

    move    t6, a0
    move    t5, a1

    bal    size_d_cache
    nop        

#if DEBUG_CACHE_LIB == 1
    /* check for alignment */
    addiu    t0, v1, -1        /* get the line size mask in t0 */
    and    t1, t6, t0        /* and it with the address */
    beqz    t1, 2f
    nop
        
    li    a0, 'U'
    bal    _writeasm
    nop
1:        
    b    1b
    nop
#endif                
2:                
    move    a0, t6
    move    a1, t5
        
        cacheop(a0, a1, v1, Hit_Writeback_D)

    move    ra, t4
3:        
    jr      ra
        nop
    .set    reorder         
        
    .end    flush_d_cache

/******************************************************************************
* Function: disable_cache()
* Arguments:     None
* Returns:        None
* Description:
* Trashes:        t1, s0
*
*    pseudo code:
*    
******************************************************************************/
.global    disable_cache                //kseg0 的访问不通过cache
    .ent    disable_cache
disable_cache:
    .set    noreorder

        mfc0    t1, CP0_CONFIG          //#define CP0_CONFIG            $16
    nop
    nop
        
        and     t1, t1, ~CP0_CONFIG_K0_MASK    //#define        CP0_CONFIG_K0_MASK                            _MM_MAKEMASK(3,0)
        or      t1, t1, K0_UNCACHED                  //#define K0_UNCACHED                            (0x2)
        mtc0    t1, CP0_CONFIG                          //#define CP0_CONFIG            $16
    nop
    nop        
         
        jr       ra
        nop
    .set    reorder
     
    .end    disable_cache


三 L2   Cache操作

L2_CONFIG: Level 2 Cache, 表示二级cache

LEAF( set_L2 )

    .set    noreorder
    li    v0, REG_ADDR(BCHP_BMIPS4380_L2_CONFIG)   //#define BCHP_BMIPS4380_L2_CONFIG                      0x01f0000c /* L2 CONFIGURATION1 REGISTER */
    lw    v1, 0(v0)
    
    bnez    a0, enable_L2
    nop
    
    li    a1, ~(BCHP_BMIPS4380_L2_CONFIG_L2E_MASK)  //#define BCHP_BMIPS4380_L2_CONFIG_L2E_MASK                               0xf0000000
    and    v1, v1, a1
    
    b    1f
    nop
    
enable_L2:
    
    li    a1, ( CP0_L2_CONFIG_L2E_DCACHE_MISS_LOOKUP   |   CP0_L2_CONFIG_L2E_ICACHE_MISS_LOOKUP_TP0   |   CP0_L2_CONFIG_L2E_ICACHE_MISS_LOOKUP_TP1 )    //#define        CP0_L2_CONFIG_L2E_DCACHE_MISS_LOOKUP            0x40000000
//#define        CP0_L2_CONFIG_L2E_ICACHE_MISS_LOOKUP_TP0        0x20000000
// #define        CP0_L2_CONFIG_L2E_ICACHE_MISS_LOOKUP_TP1        0x10000000
    or    v1, v1, a1
    
1:    sw    v1, 0(v0)
        
    jr    ra
    nop
    .set    reorder
    
END(set_L2)     

四  RAC 操作
RAC:   read ahead cache, cache中的一种,首次访问RAM时,从RAM中读取,相应的一份拷贝放在RAC中,下次读取相同的地址单元时直接从RAC中读取而不是从RAM中,但是写RAM时,RAC中无效,直接写到RAM中

LEAF( init_rac )
    .set    noreorder

     /* first set the address range, this applies to both TP0 and TP1 */     
    
    li    v0, REG_ADDR( BCHP_BMIPS4380_RAC_ADDR_RANGE )   //#define BCHP_BMIPS4380_RAC_ADDR_RANGE                 0x01f00004 /* ADDRESS RANGE REGISTER */
    lw    a0, 0(v0)
    
    li    a1, ~(BCHP_BMIPS4380_RAC_ADDR_RANGE_UPB_MASK)  //#define BCHP_BMIPS4380_RAC_ADDR_RANGE_UPB_MASK                          0xffff0000
    and    a0, a0, a1
    
    li    a1, ~(BCHP_BMIPS4380_RAC_ADDR_RANGE_LWB_MASK)  //#define BCHP_BMIPS4380_RAC_ADDR_RANGE_LWB_MASK                          0x0000ffff
    and    a0, a0, a1
    
    li    a1, UPPER_BOUND_ADDRESS   //#define    UPPER_BOUND_ADDRESS        (0x8FFF)
    li    v1, BCHP_BMIPS4380_RAC_ADDR_RANGE_UPB_SHIFT   //#define BCHP_BMIPS4380_RAC_ADDR_RANGE_UPB_SHIFT                         16
    sll    a1, a1, v1
    or    a0, a0, a1
    
    
    li    a1, LOWER_BOUND_ADDRESS   //#define    LOWER_BOUND_ADDRESS        (0x0000)
    li    v1, BCHP_BMIPS4380_RAC_ADDR_RANGE_LWB_SHIFT  //#define BCHP_BMIPS4380_RAC_ADDR_RANGE_LWB_SHIFT                         0
    sll    a1, a1, v1
    or    a0, a0, a1
    
    sw    a0, 0(v0)     //设置ram的地址范围

     /* Enable D and I RAC for TP0 */
    
    li    v0, REG_ADDR( BCHP_BMIPS4380_RAC_CONFIG )  //#define BCHP_BMIPS4380_RAC_CONFIG                     0x01f00000 /* RAC CONFIGURATION REGISTER */
    lw    a0, 0(v0)
    
    li    a1, (BCHP_BMIPS4380_RAC_CONFIG_RAC_I_MASK | BCHP_BMIPS4380_RAC_CONFIG_RAC_D_MASK )  //   #define BCHP_BMIPS4380_RAC_CONFIG_RAC_I_MASK                            0x00000001
// #define BCHP_BMIPS4380_RAC_CONFIG_RAC_D_MASK                            0x00000002
    or    a0, a0, a1
    
     /* enable prefetching for TP0 */
    
    li    a1, (BCHP_BMIPS4380_RAC_CONFIG_PF_I_MASK | BCHP_BMIPS4380_RAC_CONFIG_PF_D_MASK )    //#define BCHP_BMIPS4380_RAC_CONFIG_PF_I_MASK                             0x00000004
//#define BCHP_BMIPS4380_RAC_CONFIG_PF_D_MASK                             0x00000008
    or    a0, a0, a1
    
    /* clear NCH field
     * if NCH = 1, RAC is not flushed on the execution of CACHE invalidate instructions
     * else RAC is flushed
    */
    
    li    a1, ~(BCHP_BMIPS4380_RAC_CONFIG_NCH_MASK)  //#define BCHP_BMIPS4380_RAC_CONFIG_NCH_MASK                              0x00000020
    and    a0, a0, a1
    
    sw    a0, 0(v0)

     /* Enable D and I RAC for TP1 */
    
    li    v0, REG_ADDR( BCHP_BMIPS4380_RAC_CONFIG1 )   //#define BCHP_BMIPS4380_RAC_CONFIG1                    0x01f00008 /* RAC CONFIGURATION1 REGISTER */
    lw    a0, 0(v0)
    
    li    a1, (BCHP_BMIPS4380_RAC_CONFIG1_RAC_I_MASK | BCHP_BMIPS4380_RAC_CONFIG1_RAC_D_MASK | BCHP_BMIPS4380_RAC_CONFIG1_PF_I_MASK | BCHP_BMIPS4380_RAC_CONFIG1_PF_D_MASK)
//#define BCHP_BMIPS4380_RAC_CONFIG1_RAC_I_MASK                           0x00000001
//#define BCHP_BMIPS4380_RAC_CONFIG1_RAC_D_MASK                           0x00000002
//#define BCHP_BMIPS4380_RAC_CONFIG1_PF_I_MASK                            0x00000004
    or    a0, a0, a1
    
    sw    a0, 0(v0)
    

     /* flush RAC */
    li    v0, REG_ADDR( BCHP_BMIPS4380_RAC_CONFIG )   //#define BCHP_BMIPS4380_RAC_CONFIG                     0x01f00000 /* RAC CONFIGURATION REGISTER */
    lw    a0, 0(v0)
    
    li    a1, BCHP_BMIPS4380_RAC_CONFIG_FLH_MASK   //   #define BCHP_BMIPS4380_RAC_CONFIG_FLH_MASK                              0x00000100
    or    a0, a0, a1
    
    sw    a0, 0(v0)
    
    
     /* set C_INV bit
     * If C_INV=1, CPU I-CACHE invalidate instruction will only flush
     * all I-blocks and CPU D-CACHE invalidate instruction will flush all the D-blocks.
     * If C_INV='0'b, any CACHE invalidate instruction will flush the entire RAC.
     */
    
    li    v0, REG_ADDR(BCHP_BMIPS4380_RAC_CONFIG)   //#define BCHP_BMIPS4380_RAC_CONFIG                     0x01f00000 /* RAC CONFIGURATION REGISTER */
    lw    a0, 0(v0)
    
    li    a1, BCHP_BMIPS4380_RAC_CONFIG_C_INV_MASK  //#define BCHP_BMIPS4380_RAC_CONFIG_C_INV_MASK                            0x00000010
    or    a0, a0, a1
    
    sw    a0, 0(v0)
        
    jr       ra
        nop
    .set    reorder
        
END(init_rac)


mips3300的初始化:
    /* set the rac range */
    li    t0, BRCM_RAC_ADDRESS_RANGE
     /*    for 128MB of RAM set the RAC address ranch to 0x07ff0000 */
    li    t1, 0x4fff0000
    sw    t1, 0x0(t0)

BCHP_BMIPS4380_RAC_ADDR_RANGE: 配置RAC的地址范围,TP0,TP1共用
BCHP_BMIPS4380_RAC_CONFIG:   配置TP0 RAC
BCHP_BMIPS4380_RAC_CONFIG1:   配置TP1 RAC

RAC默认的linesize 为256 Byte


五 在DDR在没有初始化之前,如何跑c程序呢
#define SHMOO_STACK_SIZE                (32 * 1024)
#define SHMOO_STACK_START               (DCACHE_ADDR_START)          //#define DCACHE_ADDR_START      0x80000000
         li    a0, SHMOO_STACK_START
         li    a1, SHMOO_STACK_SIZE
         bal   validate_dcache_data
         nop

    /* now initialize sp to stack start */
         la    sp, (SHMOO_STACK_START + SHMOO_STACK_SIZE - 24)

.global validate_dcache_data
.ent     validate_dcache_data
validate_dcache_data:
.set noreorder

/* convert KSEG0 address to physical address */
    li        t0, 0x1FFFFFFF
    and        a0, a0, t0    

    /* va contains the line size */
    move    a2, v1    
    li        a2, DCACHE_LINE_SIZE               //#define DCACHE_LINE_SIZE      64
    li        a3, CP0_TAG_LO_PA_MASK       //#define        CP0_TAG_LO_PA_MASK                            _MM_MAKEMASK(20,12)   高20位
    li        v0, ~CP0_TAG_LO_Dirty_MASK  //#define        CP0_TAG_LO_Dirty_MASK                        _MM_MAKEMASK1(7)
    

    
    addu        t1, a0, a1           
    subu        t2, a2, 1         
    not            t2                      
    and            t0, a0, t2             
    addiu        t1, t1, -1              
    and            t1, t2                  
9:  
    /* prepare the tag */
    and            v1, t0, a3 /* extract the phys address from t0 to v1*/
    and            v1, v1, v0 /* clear the dirty bit */
    ori            v1, v1, (TAG_VALID) /* make the line valid and locked */   #define TAG_VALID             (1 << 6)
    mtc0        v1, CP0_TAG_LO /* store it in TAG_LO */   //#define CP0_TAG_LO            $28
    nop
    nop   
    cache        Index_Store_Tag_D, 0(t0)               
    bne            t0, t1, 9b             
    addu        t0, a2        

.set reorder
    jr    ra
    nop
    
.end validate_dcache_data

通过cache,这样sp实际访问的就是cache了,而不会去访问DDR,当然了这样的提前是必须writeback,且访问有限范围的内存空间,保证cache不会被替换

/******************************************************************************
* Function: fill_I_cache_lines
*
* Desrciption: Fills the I cache lines with the code from flash, also fill the
* cache tag with the physical address of the code.
*
* Arguments:
*    a0: Address to fetch code from (flash address)
*    a1: Addrss to fill the cache tag (Must be kseg0 address)
*    a2: Number of Bytes to copy.
*
*
*****************************************************************************/
#ifdef DEF_NEW_TRGT

LEAF(fill_I_cache_lines)

.set noreorder

    /* convert KSEG0 address to physical address */

    li        t0, 0x1FFFFFFF
    and        a1, a1, t0
    
    /* extract the physical address and prepare the tag */
    
    li        t0, TAG_PA_MASK             //#define TAG_PA_MASK           0xfffff000
    li        t2, ~TAG_DIRTY                 //#define TAG_DIRTY             (1 << 7)
    li        t3, ICACHE_INDEX_MASK   //#define ICACHE_INDEX_MASK         (ICACHE_SIZE - 1)
    
    
1:    
    and        t1, a1, t0

    /* now t1 has the physical address, which is needed
     * to be stored in TAG
     */
         
    /* Calculate index and store it in t4 */
    
    and        t4, a1, t3          //注意索引的计算,索引的值不超过cache的大小


    /* set line to be valid and locked */
    
    or        t1 ,(TAG_VALID)  //#define TAG_VALID             (1 << 6)
    
    /* clear the dirty bit */    
    
    and        t1, t1, t2
    
    /* store t1 to TAGLO */
        
    mtc0    t1, CP0_TAG_LO
    nop
    nop

    /* now the tag is ready, fill the line  from flash */
    
    li        v1, ICACHE_LINE_SIZE
    
2:
    lw        v0, 0(a0)
    add        v0, v0, zero
    
    /* move v0 to DATALO */
    
    .word 0x4082e001    # mtc0 v0,C0_TAGLO,1 => mtc0 v0,C0_DATALO
    nop
    nop                    # paranoia
    
    /* use index store operation to store the TAGLO and DATALO to cache */
.set push
.set mips3
    cache Index_Store_Tag_I, 0(t4)
.set pop
    nop
    nop
    
    mtc0    zero, CP0_TAG_LO
    nop
    nop

.set push
.set mips3    
    cache Index_Load_Tag_I, 0(t4)      //需要重新load
.set pop
    nop
    nop
    
    
    /* don't increment src address in a0 since reading from FIFO */
    //addiu        a0, 4
    
    /* increment the index by 4 to access the next word */
    addiu    t4, 4     //索引值每次增加4
    
    /* decrement word count */
    addi    v1, -4
    bgtz    v1, 2b
    nop
    
    /* increment dest addr in a1 */
    addiu    a1, ICACHE_LINE_SIZE
    
    /* decrement the copy size in a2 */
    addi    a2, -ICACHE_LINE_SIZE
    bgtz    a2, 1b
    nop
    
.set reorder

    jr        ra
    nop

END(fill_I_cache_lines)
#endif

fill_I_cache_lines可以将代码拷贝到cache中,在DDR还没初始化之前,从cache中运行代码
  • 0
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值