下面的代码从EL3开始运行,在arm-develope-studio DS-5上跑;
EL3部分需要提前做一些异常向量、打开EL2权限以及GICv3的设置;因为要做从EL2出发的小内核,同时还没有搞好GIC的处理,所以先不给出,后续再做详述。
此段启动代码主要来自于Xen的head.S文件,做了一些裁剪,添加了大量注解(用的编辑器输入中文很别扭,所以用很蹩脚的英文做的注解),主要完成了:
1. 使能了uart
2. 使能MMU前对各寄存器的准备
3. 构建起启动阶段的页表,包括uart地址
4. 使能了MMU,并建起小栈,切换到C代码
在EL3简要设置之后,设置SPSR_EL3,eret便使PE跳入到EL2
.global drop_to_el2
drop_to_el2:
adr x1, el2_entry_aarch64
msr ELR_EL3, x1
mov x1, #(AARCH64_SPSR_EL2h | \
AARCH64_SPSR_F | \
AARCH64_SPSR_I | \
AARCH64_SPSR_A)
msr SPSR_EL3, x1
isb
eret
//about device p011(from xen)
.macro early_uart_init xb, c
mov x\c, #0x9//(7372800 % 16)
strh w\c, [\xb, #0x28] /* -> UARTFBRD (Baud divisor fraction) */
mov x\c, #0x27//(7372800 / 0x10)
strh w\c, [\xb, #0x24] /* -> UARTIBRD (Baud divisor integer) */
mov x\c, #0x60 /* 8n1 */
str w\c, [\xb, #0x2C] /* -> UARTLCR_H (Line control) */
ldr x\c, =0x00000301 /* RXE | TXE | UARTEN */
str w\c, [\xb, #0x30] /* -> UARTCR (Control Register) */
.endm
/*
* PL011 UART wait UART to be ready to transmit
* xb: register which contains the UART base address
* c: scratch register number
*/
.macro early_uart_ready xb, c
1:
ldrh w\c, [\xb, #0x18] /* <- UARTFR (Flag register) */
/* tst->check if w\c&0b1000 is 0; b.ne->if not equal, jump 1b (bit0b1000 set means busy) */
tst w\c, #0x8 /* Check BUSY bit */
b.ne 1b /* Wait for the UART to be ready */
.endm
/*
* PL011 UART transmit character
* xb: register which contains the UART base address
* wt: register which contains the character to transmit
*/
.macro early_uart_transmit xb, wt
strb \wt, [\xb] /* -> UARTDR (Data Register) */
.endm
/* Bring up the UART.
* x23: Early UART base address
* Clobbers x0-x1 */
init_uart:
early_uart_init x23, 0
adr x0, 1f
b puts
1: .asciz "- UART enabled -\r\n"
.align 4
/* Print early debug messages.
* x0: Nul-terminated string to print.
* x23: Early UART base address
* Clobbers x0-x1 */
puts:
early_uart_ready x23, 1
ldrb w1, [x0], #1 /* Load next char */
cbz w1, 1f /* Exit on nul */
early_uart_transmit x23, w1
b puts
1:
ret
/* Print a 32-bit number in hex. Specific to the PL011 UART.
* x0: Number to print.
* x23: Early UART base address
* Clobbers x0-x3 */
putn:
adr x1, hex
mov x3, #8
1:
early_uart_ready x23, 2
and x2, x0, #0xf0000000 /* Mask off the top nybble */
lsr x2, x2, #28
ldrb w2, [x1, x2] /* Convert to a char */
early_uart_transmit x23, w2
lsl x0, x0, #4 /* Roll it through one nybble at a time */
subs x3, x3, #1
b.ne 1b
ret
hex: .ascii "0123456789abcdef"
.align 2
#define PRINT(_s) \
adr x0, 98f ; \
bl puts ; \
b 99f ; \
98: .asciz _s ; \
.align 2 ; \
99:
/* Load the physical address of a symbol into xb */
.macro load_paddr xb, sym
ldr \xb, =\sym //load linked address
add \xb, \xb, x20 //add phy offset
.endm
.global el2_entry_aarch64
.type el2_entry_aarch64, "function"
el2_entry_aarch64:
mov x26, #0
msr DAIFSet, 0xf //stop
mov x21, x0
ldr x0,=start
adr x19, start
sub x20, x19, x0
mov x22, #0
b common_start
.global init_secondary
.type init_secondary, "function"
init_secondary:
msr DAIFSet, 0xf
ldr x0, =start
adr x19, start
sub x20,x19,x0
mov x22,#1
mov x26,#1
common_start:
mov x24,#0
mrs x0,mpidr_el1
/* tbnz test bit not zero: test bit30, if is not zero, then jump to 1f */
tbnz x0,30,1f //mpidr_el1 bit30 is U, if 1, means only one processor, no need for more
ldr x13,=(~0xff00ffffff)
/* mpidr_el1 bit0:7 is aff0, bit8:15 is aff1, bit 16:23 is aff2, bit32:40 is aff3; bit24:31 contain MT and U
* bit41:63 is nothing;
* so aff3.aff2.aff1.aff0 is CPUID!
/* bic bit clear: clean bit if match */
bic x24,x0,x13 //clean 0xffffff00ff000000 bit24:31 is
1:
/* cbz: condition branch zero, jump to branch if 0 */
cbz x22, 1f //x22 is 0 on boot cpu, and 1 on secondary cpu
//smp_up_cpu is variable; if secondary cpu, should wait smp_up_cpu is it's CPUID(aff3.aff2.aff1.aff0)
load_paddr x0,smp_up_cpu //smp_up_cpu defined in C main.c, but can used here and no extern
dsb sy
2://check smp_up_cpu again and again and ......
ldr x1,[x0]
cmp x1,x24
beq 1f
wfe
b 2b
1:
ldr x23, =0x1c090000
cbnz x22, 1f
bl init_uart /* Boot CPU sets up the UART too */
1: PRINT("- CPU ")
mov x0, x24
bl putn
PRINT(" booting -\r\n")
PRINT("- Current EL ")
mrs x4, CurrentEL
mov x0, x4
bl putn
PRINT(" -\r\n")
/* Are we in EL2 */
cmp x4, #0x8
/* with cmp, we could know if currentEL is PSR_MODE_EL2t;
* if ne, PSR_MODE_EL2h is also work;
* use cmp and ccmp * ne, means if->else if->;
* any hit will be ok
*/
ccmp x4, #0x9, #0x4, ne //if cmp is ne, execute this instruct;
//if x4==0x9, set z bit of nzcv, which is used by b.eq
b.eq el2 /* Yes */
/* OK, we're boned. */
PRINT("- Xen must be entered in NS EL2 mode -\r\n")
PRINT("- Please update the bootloader -\r\n")
b fail
el2: PRINT("- Xen starting at EL2 -\r\n")
/* Zero BSS only when requested */
cbnz x26, skip_bss
PRINT("- Zero BSS -\r\n")
/* SESSION in xen is more complex, now use firework's bss section, until more section is involved */
load_paddr x0, __bss_start__ /* Load paddr of start & end of bss */
load_paddr x1, __bss_end__
1:
str xzr, [x0], #8
cmp x0, x1
b.lo 1b
skip_bss:
PRINT("- Setting up control registers -\r\n")
/* MAIR_el2 register is used to explain memory translate table entry's Index bit.
* for a translation table entry,
* bitA:A+2(val A depends on entry level and page size) which define Index(others may be NS+AP+SH+AF+UXN+PXN)
* is the val for attr_index0-8 in next to explain
* val 0x00->0b0000 0000 means DEVICE_nGnRnE:
* device memory, no gathering(can not gather memory access, x times access is x times access, on gathering),
* no re-ordering(can not reorder memory access, Xth access is Xth, no reordering),
* no early write acknowledge(transaction: memory write have ack to PE, write once ack once, nE means ack only return from final address )
* val 0x04->0b0000 0100 means DEVICE_nGnRE:
device memory, no gathering, no re-ordering, but no transaction
* val 0x44->0b0100 0100 means NORMAL_NC:
* normal memory, non-cacheable
* val 0xaa->0b1010 1010 means NORMAL_WT
* normal memory, write-through
* val 0xee->0b1110 1110 means NORMAL_WB
* normal memory, write-back
* val 0xff->0b1111 1111 means NORMAL
* normal memory, write-back write-allocate
*/
#define MAIRVAL \
0xff00000004aa4400
/* bit56:63 define attr_index7: NORMAL*/
/* bit32:47 define attr_index4: DEVICE_nGnRnE */
/* bit24:31 define attr_index3: DEVICE_nGnRE */
/* bit16:23 define attr_index2: NORMAL_WT */
/* bit8:15 define attr_index1: NORMAL_NC */
/* bit0:7 define attr_index0: DEVICE_nGnRnE */
ldr x0,=MAIRVAL
msr mair_el2, x0
/* 3<<12 set SH0 bits, chose inner-shareable for ttw;
* arm explain this bit as:
* Shareability attribute for memory associated with translation table walks using TTBR0_EL2.
* Non/Outer/Inner shareable.
* 0b00 non-shareable
* 0b01 undefine
* 0b10 outer shareable
* 0b11 inner shareable
* what's the meaning of Non/Outer/Inner shareable? and why should TCR_ELx config it?
* 1. Non-shareable means target memory in cache won't coherence by hardware;
* it is designed for core private memory, no synchronization with other cores or device,
* but can access(i think)
* 1.1 inner/outer-shareable is depends on memory domain desigin, cores/devices in same memory domain
* inner or outer, have coherency protect by hardware
* 2. PE access address saved in TTBR0_EL2 for ttw is phy-address,
* not controled by pagetable but cache policy is also needed;
* so, SH0 control inner/outer shareable of memory in TTBR0_EL2.
* 1<<10 set ORGN0 Normal memory, Outer Write-Back Read-Allocate Write-Allocate Cacheable
* 1<<8 set IRGN0 Normal memory, Inner Write-Back Read-Allocate Write-Allocate Cacheable
* ORGN0/IRGN0 control Outer/Inner cacheability attribute of memory associated with translation table walks using TTBR0_EL2.
* 01 means Outer/Inner Write-back(not through) Read/Write-Allocate is seted;
* A write-allocate cache makes room for the new data on a write miss,
* just like it would on a read miss. Read/Write-allocate just resolve how will cache do on Write/Read miss,
* alloc place in cache or not!
* 12:0b1010 set T0SZ bit, size offset of memory region addressed by ttbr0_el2
* 12 means offset=64-12=48 bits
*/
ldr x0,=(1<<31|1<<23|3<<12|1<<10|1<<8|12)
/* ID-AA64MMFR0_EL1 have memory model and management information about current hardware platform
* bit0:3 is PARange, phy-address range supported, bit 3 is always 0,
* bit0:2 0b000 means 32bit 4G
* 0b001 means 36bit 64G
* 0b010 means 40bit 1T
* 0b011 means 42bit 4T
* 0b101 means 44bit 16T
* 0b110 means 48bit 256T
* this platform is 0b0010 according to it's register val.
*/
mrs x1,ID_AA64MMFR0_EL1
/* bfi copy bit0:2 of x1 to bit16:18 of x0
* bit16:18 is is PS bit of TCR_EL2, means phy-address size;
* for now, it is seted according to IDAA64MMFR0_EL1 bit PARange
*/
bfi x0,x1,16,3
/* PS -Phy-address Size- Based on ID_AA64MMFR0_EL1.PARange
* Top byte is used
* PT walks use Inner-Shareable accesses,
* PT walks are write-back, write-allocate in both cache levels,
* 48-bit virtual address space goes through this table.
* ps. PS 40bit address size cost 40bit,
* and T0SZ 48bit means address can use 48bit, T0SZ should -ge PS.
*/
msr tcr_el2,x0
/* Set up the SCTLR_EL2:
* bit25 EE: Exceptions in LE ARM; 0-little endian
* Low-latency IRQs disabled,
* bit19 WXN: Write-implies-XN disabled (for now); 0-disable
* bit2 C: D-cache disabled (for now); 0-disable
* bit12 I: I-cache enabled; 1-enable
* bit1 A: Alignment checking disabled; 0-disable
* bit0 M: MMU translation disabled (for now); 0-disable.
*/
ldr x0,=(0x30c51878)
msr SCTLR_EL2, x0
/* control which SP_ELx to use in ELx expection level
* bit0 SP: 0 use SP_EL0 in all expection level
* 1 use SP_ELx in ELx expection level
*/
msr spsel, #1
/* check this binary start point is 0x2000000 or not.
* xen memory map defined below
* Common ARM32 and ARM64 layout:
* 0 - 2M Unmapped
* 2M - 4M Xen text, data, bss
* 4M - 6M Fixmap: special-purpose 4K mapping slots
* 6M - 10M Early boot mapping of FDT
* 10M - 12M Early relocation address (used when relocating Xen)
* and later for livepatch vmap (if compiled in)
* ARM64 layout:
* 0x0000000000000000 - 0x0000007fffffffff (512GB, L0 slot [0])
* 0 - 12M <COMMON>
* 1G - 2G VMAP: ioremap and early_ioremap
* 32G - 64G Frametable: 24 bytes per page for 5.3TB of RAM
* 0x0000008000000000 - 0x00007fffffffffff (127.5TB, L0 slots [1..255])
* Unused
* 0x0000800000000000 - 0x000084ffffffffff (5TB, L0 slots [256..265])
* 1:1 mapping of RAM
* 0x0000850000000000 - 0x0000ffffffffffff (123TB, L0 slots [266..511])
* Unused
* this memory layout for Xen is not totally realized here, just list here
*/
cmp x19,#0x200000
/* if xen is place in 0x200000(2M- M), no more map for xen is needed,
* because xen is in slot 0 of L0(boot_pgtable, 0x200000 >> 38 is 0))
* and slot 0 of L1(boot_first, 0x200000 >> 30 is 0)
* but not slot 0 of L2(boot_second, 0x200000 >> 21 is not 0), some map is needed in boot_second
*/
cset x25, eq
load_paddr x4,boot_pgtable
/* now, boot_pgtable hold the L0 first translate table entry */
msr TTBR0_EL2,x4
/* use boot_first hold all L1 entrys of L0 slot 0, so map it to L0(boot_pgtable)'s slot 0 */
load_paddr x1,boot_first
/* store the created translate table entry to first slot
* for Lx translate table entrys:
* bit:11 is always nG, never Gathering
* bit:10 is always AF, Access Flage, means region described by this entry
* is not accessed(0) or accesed(1); set AF bit from 0 to 1 will cause a
* synchronous expection(Access Flag fault), some response could happened
* in it's handler. access pages described by this entry will cause hardware
* set AF bit to 1 automatically.
* bit9:8 is always SH, SHareable
* bit7:6 is always AP, Access Permission;
* AP-00 EL0(unprivileged) no access; EL1/2/3(privileged) R/W
* AP-01 EL0 R/W; EL1/2/3 R/W
* AP-10 EL0 no access; EL1/2/3 RO
* AP-11 EL0 RO; EL1/2/3 RO
* bit:5 is always NS, Security bit, for EL3 and Secure EL1
* bit:4:2 is always Indx, Remember we talked earlier that MAIR_ELx is
* used to explain(define) something(like DEVICE_nGnRnE/NORMAL_WB-WT-NC)? yes, this is what it is for.
* bit:1 is define this entry is block or table:
* if block, this entry give it's base address, with base address and address in VA, we get the location that VA really pointed to
* eg. L1 entry with bit1 is 0, bit47:30 are bit47:30 of output address,
* or L2 entry with bit1 is 0, bit47:21 are bit47:21 of output address,
* bit:0 is flag indicate whether this entry is valid(1) or not(0)
*/
/* nG=1 AF=1 SH=11 AP=01 NS=1 ATTR=111 T=1 P=1 */
mov x3, #0xf7f
/* boot_first is 4KB align, lower 12bit is 0, comfort definition of L1 descriptor of 4KB granule
* as lower 12bit is 0, attributes could placed, and bit12:47 determine address of boot_first(L1)'s first entry.
*/
orr x2,x1,x3
str x2,[x4,#0]
/* logic shift right;
* 12 is PAGE_SHIFT
* 9 is LPAE_SHIFT
* if phy-offset >> 39 is 0, means it is in L0-pagetable slot 0
* 4KB granule page table:
* L0-bit39:47
* L1-bit30:38
* L2-bit21:29
* L3-bit12:20
* page-bit0:11
* L0-L3 comfort LPAE_SHIFT, 9 bit values 512 entrys for each Lx table
* page comfort PAGE_SHIFT
*/
lsr x1,x19,#(12+9+9+9)
cbz x1,1f
/* x1 is 0, means first slot of L0 is used, boot_first will be used for this case,
* b 1f, x25 will not set(if x25 is not 0x200000, and fit L0 slot 0)
* x1 is val, means slot num in L0 table, boot_first is slot 0, another entry should build
* it is boot_first_id(slot x1, offset x1<<3 from boot_pgtable(TTBR0_EL2))
*/
/* L0 not support memory-block map, table entry is needed(for map phy-offset)
* boot_first_id hold all L1 entry in slot phy-offset>>38(offset from boot_pgtable is phy-offset>>38 <<3) */
load_paddr x2,boot_first_id
mov x3,#0xf7f
orr x2,x2,x3
/* x1 is phy-offset>>39 means entry offset of phy-offset in L0
* <<3 is because a entry in a solt occupy 64bit, 8byte, 2^3, so!
* GOD! this troubled me for a week! am i stupid?!
*/
lsl x1,x1,#3
str x2,[x4,x1]
/* until now, boot_first_id's address is in L0 table entry(boot_pgtable)'s x1(phy-offset>>38) slot */
/* for phy-offset is not L0 slot 0 map it in L1 memory-block(not table entry) */
load_paddr x4,boot_first_id
/* for L1 entry, bit30:38
*/
lsr x1,x19,#(12+9+9)
lsl x2,x1,#(12+9+9)
/* nG=1 AF=1 SH=11 AP=01 NS=1 ATTR=111
* T=0(means this is memory block entry, not table entry for next level) P=1
* for this case, boot_first can not be used, map a huggggge page as size 1GB to be used
*/
mov x3,0xf7d
orr x2,x2,x3
/* mask left bit 30:38(now in bit0:8 of x1) */
and x1,x1,#((1<<9)-1)
lsl x1,x1,#3 /*same to L0(8byte to place a entry, 64bit)*/
str x2,[x4,x1]
mov x25,#1 /* identify now map is in place */
/* phy-offset is mapped in L1 memory-block;
* but if phy-offset is end of this L1 memory-block, access to unmaped memory will happen soon ? */
1:/* if jumped here, means phy-offset comfort L0 slot 0,
* L0 slot 0 is boot_first and already mapped,
* more map for phy-offset may happen in L2/3/page */
load_paddr x4,boot_first
load_paddr x1, boot_second
/* map boot_second's address into boot_first's first slot */
mov x3,0xf7f
orr x2,x1,x3
str x2,[x4,#0]
cbnz x25,1f
/* if not mapped, means phy-offset is not mapped in L1 slot x memory-block, and also not 0x200000
* because phy-offset in L1 slot 0
*/
lsr x2,x19,#(12+9+9)
and x1,x2,#((1<<9)-1)
cbz x1,1f /* if phy-offset is L1 slot 0, map in L2/3 is needed */
/* if not in L1 slot 0, phy-offset will be mapped */
lsl x2,x2,#(12+9+9) /* if phy-offset is not L1 slot 0 map it memory-block(not table entry) */
mov x3,#0xf7d
orr x2,x2,x3
lsl x1,x1,#3
str x2,[x4,x1]
mov x25,#1
1:/* jumped here means phy-offset is already mapped */
load_paddr x4,boot_second
/* build boot_third as L2 table entry in 'L1 table entry: boot_second' 's slot 1
* hold all L3 table entry of it */
load_paddr x1,boot_third
mov x3,#0xf7f
orr x2,x1,x3
str x2,[x4,#8] /* 2M-4M */
/* boot_pgtable is start of L0;
* boot_first in boot_pgtable(L0)'s slot 0 is L1
* boot_second in boot_first(L1)'s slot 0 is L2
* boot_third in boot_second(L2)'s slot 1 is L3
* boot_third hold pages!
*/
cbnz x25,1f
/* for now, if phy-offset still not mapped:
* means phy-offset is in L1 slot 0,
* smaller than 2M, which is reserved memory)
*/
lsr x2,x19,#(12+9)
and x1,x2,#((1<<9)-1)
/* and it's address of L3 bit21:30 is 1, means in L2 slot 1, 2M-4M
* this will cause fail directly, because phy-offset is not 0x200000(2M), but in 2M-4M, this is not allowed
*/
cmp x1,#1
b.eq virtphys_clash
/* map phy-offset if not L3 slot 1 */
lsl x2,x2,#(12+9)
mov x3,#0xf7d
orr x2,x2,x3
lsl x2,x1,#3
str x2,[x4,x1]
mov x25,#1
1:
load_paddr x4,boot_third
/* create a L3 table entry(also called pte)
*/
lsr x2,x19,#12
lsl x2,x2,#12
mov x3,#0xf7f
orr x2,x2,x3
/* build all page table entrys in boot_third */
mov x11,xzr
1: str x2,[x4,x1]
add x2,x2,#(1<<12)
add x1,x1,#8
cmp x1,#((1<<9)<<3)
b.lt 1b
cbnz x25,1f
PRINT("unable to build boot page tables, fail on indentify map this bin. \r\n");
b fail
virtphys_clash:
PRINT("unable to build boot page tables, virt and phys addresses clash. \r\n");
1:
PRINT("Turnning on paging \r\n");
tlbi alle2 /* flush all el2 tbls */
dsb nsh /* dsb: data synchronization barrier,
* nsh: non-shareable, any load/store-load/store,
* for compare, nshld is non-shareable load-load, load-store
*/
ldr x1,=paging /* load paging label's link address */
mrs x2, SCTLR_EL2
orr x0,x0,#1 /* bit0 M in SCTLR_EL2 is enable(1) or disable(0) MMU */
orr x0,x0,#2 /* bit2 C in SCTLR_EL2 is enable(1) or disable(0) data cache */
dsb sy /* sy means dsb range is full system load/store-load/store */
msr SCTLR_EL2, x0 /* for now, paging is enabled */
isb /* flush all icache */
br x1 /* jump to address in x1(vaddr switch) */
paging:
dsb sy
/* for early printk: device memory map */
cbnz x22,1f /* non-boot cpus no need to build this page tables */
ldr x1,=xen_fixmap/* write to xen_fixmap, use link address, MMU is already enabled */
lsr x2,x23,#12
lsl x2,x2,#12
mov x3,#0xe73 /* nG=1 AF=1 SH=10 AP=01 NS=1 ATTR=100 T=1 P=1
* SH=10 means OuterShareable
* ATTR=100 define upper place is DEVICEnGnRnE */
orr x2,x2,x3
str x2,[x1,#0]/* map console address to slot 0 of xen_fixmap(L3) */
1:/* xen_fixmap is 4M-6M memory(memory map upper place), need map in boot_second */
ldr x4,=boot_second
load_paddr x2,xen_fixmap /* map xen_fixmap need phy addr of it */
mov x2,#0xf7f
orr x2,x2,x3
ldr x1,=(0x400000)
lsr x1,x1,#(12+9-3) /* get slot offset of xen_fixmap's vaddr */
str x2,[x4,x1]
ldr x23,=0x1c090000
dsb sy
isb
tlbi alle2
PRINT("- Ready -\r\n");
cbz x22, launch /* boot cpu go to C */
ldr x4,=init_ttbr
ldr x2,[x4]
dsb sy
msr TTBR0_EL2,x4
dsb sy
isb
tlbi alle2
dsb sy
isb
#define INITINFO_stack 0 /* offset of unit stack from head of struct init_info{uchar*stack;uint cpuid} */
#define STACK_SIZE ((1<<12)<<4) /* defined stack_size used to boot a cpu */
#define CPUINFO_sizeof 336 /* sizeof(struct cpu_info), cpu_info is per-vcpu state placed in the top of stack
* struct cpu_info{
* struct cpu_user_regs guest_cpu_user_regs;
* ulong elr;
* uint flags;
* }
* struct cpu_user_regs{
* 30 registers(for aarch64);
* sp;
* pc;
* cpsr;
* hsr;
* spsr_el1/spsr_svc;(aarch64/aarch32)
* pad;
* spsr_fiq/irq/und/abt(for aarch32);
* sp_el0/1,elr_el1(for aarch64);
*/
launch:
ldr x0,=init_data
add x0,x0,#INITINFO_stack
ldr x0,[x0]
add x0,x0,#STACK_SIZE /* stack size is 4 pages according to STACK_SIZE */
sub x0,x0,#CPUINFO_sizeof /* cpu_info placed in the top of stack */
mov sp,x0
mov x0,x20 /* phy-offset */
mov x1,x21 /* FDT */
mov x2,x24 /* cpuid */
cbnz x22,1f
b start_xen
1:
b start_secondary
fail: PRINT("- Boot failed -\r\n")
1: wfe
b 1b