主要说一下OCTEON的启动过程,在Cavium提供的SDK中有可用的u-boot,需要针对自己的的板子来简单的改写了,但是不只是移植还看了下启动的原理,研究了MIPS架构上的一些东西。
.globl _start
.text
_start:
RVECENT(reset,0) /* U-boot entry point */
/* The above jump instruction/nop are considered part of the
* bootloader_header_t structure but are not changed when the header is
* updated.
*/
/* Leave room for bootloader_header_t header at start of binary. This
* header is used to identify the board the bootloader is for, what
* address it is linked at, failsafe/normal, etc. It also contains a
* CRC of the entire image.
*/
.org 0x200
XVECENT(romExcHandle,0x200) /* bfc00200: R4000 tlbmiss vector */
......
XVECENT(romExcHandle,0x280) /* bfc00280: R4000 xtlbmiss vector */
......
XVECENT(romExcHandle,0x300) /* bfc00300: R4000 cache vector */
......
XVECENT(romExcHandle,0x380) /* bfc00380: R4000 general vector */
......
XVECENT(romExcHandle,0x400) /* bfc00400: */
......
RVECENT(debugHandler,0x480) /* bfc00480: Debug vector*/
......
RVECENT(romReserved,159)
/* Reserve extra space so that when we use the boot bus local memory
* segment to remap the debug exception vector we don't overwrite
* anything useful
*/
.align 8
.globl asm_reset
asm_reset:
reset:
nop
mfc0 k0, COP0_STATUS_REG
ori k0, 0x00E0 /* enable 64 bit mode for CSR access */
mtc0 k0, COP0_STATUS_REG
/* Check what core we are - if core 0, branch to init tlb
* loop in flash. Otherwise, look up address of init tlb
* loop that was saved in the boot vector block.
*/
mfc0 a0, COP0_EBASE_REG
andi a0, 0xFF /* get core */
beqz a0, InitTLBStart_local
nop
break
如果是core0的话,进入到InitTLBStart_local进行TLB的设置
.globl InitTLBStart
InitTLBStart:
InitTLBStart_local:
/* If we don't have working memory yet configure a bunch of
* scratch memory, and set the stack pointer to the top
* of it. This allows us to go to C code without having
* memory set up
*
* Warning: do not change SCRATCH_STACK_LINES as this can impact the
* transition from start.S to crti.asm. crti requires 590 bytes of
* stack space.
*/
#define SCRATCH_STACK_LINES 0x36 /* MAX is 0x36 */
dmfc0 v0, COP0_CVMMEMCTL_REG
dsrl v0, 9
dsll v0, 9
/* setup SCRATCH_STACK_LINES scratch lines of scratch */
ori v0, 0x100 | SCRATCH_STACK_LINES
dmtc0 v0, COP0_CVMMEMCTL_REG
/* set stack to top of scratch memory */
li sp, 0xffff8000 + (SCRATCH_STACK_LINES * 128)
/* Clear scratch for CN63XX pass 2.0 errata Core-15169*/
li t0, 0xffff8000
clear_scratch:
sd zero, 0(t0)
addi t0, 8
bne t0, sp, clear_scratch
nop
针对core0的TLB设置,方便core0从flash里面取bootloader的代码以启动其他的core。
/* This code run on all cores - core 0 from flash,
* the rest from DRAM. When booting from PCI, non-zero cores
* come directly here from the boot vector - no earlier code in this
* file is executed.
*/
/* Some generic initialization is done here as well, as we need this
* done on all cores even when booting from PCI
*/
/* Clear watch registers. */
mtc0 zero, CP0_WATCHLO
mtc0 zero, CP0_WATCHHI
/* STATUS register */
mfc0 k0, CP0_STATUS
li k1, ~ST0_IE
and k0, k1
mtc0 k0, CP0_STATUS
/* CAUSE register */
mtc0 zero, CP0_CAUSE
/* Init Timer */
dmtc0 zero, CP0_COUNT
dmtc0 zero, CP0_COMPARE
mfc0 a5, COP0_STATUS_REG
li v0,0xE0 /* enable 64 bit mode for CSR access */
or v0, v0, a5
mtc0 v0, COP0_STATUS_REG
dli v0, 1 << 29 /* Enable large physical address support in TLB */
mtc0 v0, COP0_PAGEGRAIN_REG
InitTLB:
dmtc0 zero, COP0_ENTRYLO0_REG
dmtc0 zero, COP0_ENTRYLO1_REG
mtc0 zero, COP0_PAGEMASK_REG
dmtc0 zero, COP0_CONTEXT_REG
/* Use an offset into kseg0 so we won't conflict with Mips1 legacy
* TLB clearing */
dli v0, 0xFFFFFFFF90000000
mfc0 a0, COP0_CONFIG1_REG
srl a0, a0, 25
/* Check if config4 reg present */
mfc0 a1, COP0_CONFIG3_REG
bbit0 a1, 31, 2f
and a0, a0, 0x3F /* a0 now has the max mmu entry index */
mfc0 a1, COP0_CONFIG4_REG
bbit0 a1, 14, 2f /* check config4[MMUExtDef] */
nop
/* append config4[MMUSizeExt] to most significant bit of
* config1[MMUSize-1] */
ins a0, a1, 6, 8
and a0, a0, 0x3fff /* a0 now includes max entries for cn6xxx */
2:
dmtc0 zero, COP0_XCONTEXT_REG
mtc0 zero, COP0_WIRED_REG
其他的core都在loop中
InitTLBloop:
/* Work around TLBx parity error on some CN6XXX devices. */
.balign 64,0,48
dmtc0 v0, COP0_ENTRYHI_REG
nop
nop
nop
tlbp
nop
nop
nop
nop
nop
nop
mfc0 v1, COP0_INDEX_REG
daddiu v0, v0, 1<<13
bgez v1, InitTLBloop
nop
/* Work around TLBx parity error on some CN6XXX devices. */
.balign 64,0,48
nop
mtc0 a0, COP0_INDEX_REG
nop
nop
tlbwi
nop
nop
nop
nop
nop
nop
nop
nop
bne a0, zero, InitTLBloop
addiu a0, -1
#ifdef ENABLE_BOARD_DEBUG
/* Set GPIO output bits */
dli a4, OCTEON_GPIO_TX_SET
li a5, 0x7e
sll a5, 8
sd a5, 0(a4)
#endif
mthi zero
mtlo zero
/* Set up status register */
mfc0 v0, COP0_STATUS_REG
li a4, 1 << 28 /* enable cop0 access */
or v0, a4
li a4, 1 << 30 /* enable cop2 access */
or v0, a4
/* Must leave BEV set here, as DRAM is not configured for core 0.
* Also, BEV must be 1 later on when the exception base address is set.*/
li a4, ~0xff00 /* mask all interrupts */
and v0, a4
li a4, ~0xff
and v0, a4
/* Clear NMI (used to start cores other than core 0) */
li a4, ~(1 << 19)
and v0, a4
ori v0, 0xE5 /* enable 64 bit, disable interrupts */
mtc0 v0, COP0_STATUS_REG
dli v0,0xC000000F /* enable all readhw locations */
mtc0 v0, COP0_HWRENA_REG
dmfc0 v0, COP0_CVMCTL_REG
mfc0 a4, COP0_PROC_ID_REG
li a5, 0x000d0000 /* Octeon pass1 chip id */
bne a4, a5, skip_icachetch_disable
nop
/* disable icache prefectch - errata core 8 (pass1 only) */
ori v0, 1<<13
skip_icachetch_disable:
ori v0, 1<<14 /* enable fixup of unaligned mem access */
dmtc0 v0, COP0_CVMCTL_REG
/* Setup scratch memory. This is also done in
* cvmx_user_app_init, and this code will be removed
* from the bootloader in the near future.
*/
dmfc0 v0, COP0_CVMMEMCTL_REG
mfc0 a4, COP0_PROC_ID_REG
li a5, 0x000d9000 /* Octeon pass1 chip id */
bgt a5, a4, 71f
nop
ori a6, a5, 8 /* Octeon cn63xx pass2 chip id */
bge a4, a6, 71f
nop
li a6, 4
ins v0, a6, 11, 4 /* Set WBTHRESH=4 as per Core-14752 errata */
71:
dmtc0 v0, COP0_CVMMEMCTL_REG
/* clear these to avoid immediate interrupt in noperf mode */
dmtc0 zero, COP0_COMPARE_REG /* clear timer interrupt */
dmtc0 zero, COP0_COUNT_REG /* clear timer interrupt */
dmtc0 zero, COP0_PERF_CNT0_REG/* clear perfCnt0 */
dmtc0 zero, COP0_PERF_CNT1_REG/* clear perfCnt1 */
/* Set up TLB mappings for u-boot code in flash. */
里面一个设置
ins v0, a6, 11, 4 /* Set WBTHRESH=4 as per Core-14752 errata
这个是63XX的一个BUG workaround,看来这个问题在6系列的OCTEON II CPU存在已久,有设置成10的,但是以前都是设置成4。这个值确实能够影响到write buffer,但是确实不能了解到Cavium的write buffer的实现细节,这个值是怎么影响到系统的就不得而知。大概看了下文档上的介绍,只能知道个所以然。
/* Start of GP hack. This needs to be done once properly for all code.
* old relocation hacks need to be removed.
*/
/* Branch and link to get current PC in ra */
bal 2f
nop
/* This contains the linked address of the GOT */
.word _GLOBAL_OFFSET_TABLE_
/* The ra register now contains the runtime address of the above
* memory location */
/* This contains the link time address of the previous word, */
.word . - 4
2:
move gp, ra /* Move current PC into gp register */
lw a5, 0(ra) /* Load linked address of the GOT into a5 */
lw a6, 4(ra) /* Load the link time address of the GOT
* storage location into a6
*/
sub a5, a6 /* Subtract a6 from t1. */
/* a5 now contains the difference between the link-time GOT table
* address and the link time expected PC
*/
/* Add this difference to the current PC (copied into gp above) so
* that gp now has the current runtime GOT table address
*/
daddu gp, a5 # calculate current location of offset table
/* End of GP hack. */
relocation在bootloader里面有点难度
本文地址:http://www.tech4cloud.com/2012/09/01/octeon-startup