kernel版本:5.10.129
进入内核的时候,arm64处理器的异常级别可能是1或者2,函数el2_setup的主要工作如下:
1、如果异常级别是1,那么在异常级别1执行内核
2、如果异常级别是2, 那么根据处理器是否支持虚拟化宿主扩展(Virtuallization Host Extensions,VHE),决定是否需要降级到异常级别
具体实现代码如下:
/*
* If we're fortunate enough to boot at EL2, ensure that the world is
* sane before dropping to EL1.
*
* Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if
* booted in EL1 or EL2 respectively.
*/
SYM_FUNC_START(el2_setup)
msr SPsel, #1 // We want to use SP_EL{1,2}
mrs x0, CurrentEL
cmp x0, #CurrentEL_EL2
b.eq 1f
mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)
msr sctlr_el1, x0
mov w0, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1
isb
ret
1: mov_q x0, (SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
msr sctlr_el2, x0
#ifdef CONFIG_ARM64_VHE
/*
* Check for VHE being present. For the rest of the EL2 setup,
* x2 being non-zero indicates that we do have VHE, and that the
* kernel is intended to run at EL2.
*/
mrs x2, id_aa64mmfr1_el1
ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4
#else
mov x2, xzr
#endif
/* Hyp configuration. */
mov_q x0, HCR_HOST_NVHE_FLAGS
cbz x2, set_hcr
mov_q x0, HCR_HOST_VHE_FLAGS
set_hcr:
msr hcr_el2, x0
isb
/*
* Allow Non-secure EL1 and EL0 to access physical timer and counter.
* This is not necessary for VHE, since the host kernel runs in EL2,
* and EL0 accesses are configured in the later stage of boot process.
* Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout
* as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined
* to access CNTHCTL_EL2. This allows the kernel designed to run at EL1
* to transparently mess with the EL0 bits via CNTKCTL_EL1 access in
* EL2.
*/
cbnz x2, 1f
mrs x0, cnthctl_el2
orr x0, x0, #3 // Enable EL1 physical timers
msr cnthctl_el2, x0
1:
msr cntvoff_el2, xzr // Clear virtual offset
#ifdef CONFIG_ARM_GIC_V3
/* GICv3 system register access */
mrs x0, id_aa64pfr0_el1
ubfx x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4
cbz x0, 3f
mrs_s x0, SYS_ICC_SRE_EL2
orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1
orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1
msr_s SYS_ICC_SRE_EL2, x0
isb // Make sure SRE is now set
mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back,
tbz x0, #0, 3f // and check that it sticks
msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults
3:
#endif
/* Populate ID registers. */
mrs x0, midr_el1
mrs x1, mpidr_el1
msr vpidr_el2, x0
msr vmpidr_el2, x1
#ifdef CONFIG_COMPAT
msr hstr_el2, xzr // Disable CP15 traps to EL2
#endif
/* EL2 debug */
mrs x1, id_aa64dfr0_el1
sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4
cmp x0, #1
b.lt 4f // Skip if no PMU present
mrs x0, pmcr_el0 // Disable debug access traps
ubfx x0, x0, #11, #5 // to EL2 and allow access to
4:
csel x3, xzr, x0, lt // all PMU counters from EL1
/* Statistical profiling */
ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4
cbz x0, 7f // Skip if SPE not present
cbnz x2, 6f // VHE?
mrs_s x4, SYS_PMBIDR_EL1 // If SPE available at EL2,
and x4, x4, #(1 << SYS_PMBIDR_EL1_P_SHIFT)
cbnz x4, 5f // then permit sampling of physical
mov x4, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \
1 << SYS_PMSCR_EL2_PA_SHIFT)
msr_s SYS_PMSCR_EL2, x4 // addresses and physical counter
5:
mov x1, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
orr x3, x3, x1 // If we don't have VHE, then
b 7f // use EL1&0 translation.
6: // For VHE, use EL2 translation
orr x3, x3, #MDCR_EL2_TPMS // and disable access from EL1
7:
msr mdcr_el2, x3 // Configure debug traps
/* LORegions */
mrs x1, id_aa64mmfr1_el1
ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4
cbz x0, 1f
msr_s SYS_LORC_EL1, xzr
1:
/* Stage-2 translation */
msr vttbr_el2, xzr
cbz x2, install_el2_stub
mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2
isb
ret
SYM_INNER_LABEL(install_el2_stub, SYM_L_LOCAL)
/*
* When VHE is not in use, early init of EL2 and EL1 needs to be
* done here.
* When VHE _is_ in use, EL1 will not be used in the host and
* requires no configuration, and all non-hyp-specific EL2 setup
* will be done via the _EL1 system register aliases in __cpu_setup.
*/
mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)
msr sctlr_el1, x0
/* Coprocessor traps. */
mov x0, #0x33ff
msr cptr_el2, x0 // Disable copro. traps to EL2
/* SVE register access */
mrs x1, id_aa64pfr0_el1
ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4
cbz x1, 7f
bic x0, x0, #CPTR_EL2_TZ // Also disable SVE traps
msr cptr_el2, x0 // Disable copro. traps to EL2
isb
mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector
msr_s SYS_ZCR_EL2, x1 // length for EL1.
/* Hypervisor stub */
7: adr_l x0, __hyp_stub_vectors
msr vbar_el2, x0
/* spsr */
mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
PSR_MODE_EL1h)
msr spsr_el2, x0
msr elr_el2, lr
mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2
eret
SYM_FUNC_END(el2_setup)
该汇编函数,用w0寄存器保存返回值,返回值有两种:
BOOT_CPU_MODE_EL1:表示当前CPU跳入内核时处于权限级EL1
BOOT_CPU_MODE_EL2:表示当前CPU跳入内核时处于权限级EL2
该汇编函数中的状态寄存器访问指令:
MRS: 状态寄存器到通用寄存器的传送指令。
MRS:({R0-R12}⬅CPSR,SPSR)
MSR: 通用寄存器到状态寄存器的传送指令。
MSR:(CPSR,SPSR⬅{R0-R12})
选择栈
msr SPsel, #1
由于当前异常级别可能是EL1也可能是EL2,因此msr SPsel, #1切换到当前异常级别的SP,如果当前为EL1,则切换到SPEL1,如果当前为EL2,则切换到SPEL2
判断当前的异常级别
mrs x0, CurrentEL
cmp x0, #CurrentEL_EL2
b.ne 1f
CurrentEL就是获取PSTATE中current exception level域的特殊寄存器
#define CurrentEL_EL2 (2 << 2)
判断当前是否处于EL2 ,当cpu不处于EL2时跳转到标号1
设置EL2端模式
1: mov_q x0, (SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
msr sctlr_el2, x0
设置EL2下为大端存储
设置EL0/EL1的端模式
mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)
msr sctlr_el1, x0
mov w0, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1
isb
ret
如果处于EL1级别,设置EL0和EL1下为大端存储,将启动时的异常级别EL1保存到w0
VHE支持
#ifdef CONFIG_ARM64_VHE
/*
* Check for VHE being present. For the rest of the EL2 setup,
* x2 being non-zero indicates that we do have VHE, and that the
* kernel is intended to run at EL2.
*/
mrs x2, id_aa64mmfr1_el1
ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4
#else
mov x2, xzr
#endif
通过读取id_aa64mmfr1_el1,存入X2, 判断CPU是支持虚拟主机扩展VHE模式,还是传统的分离Hyp模式。
vhe的全称是Virtualization Host Extension support。是armv8.1的新特性,其最终要就是支持type-2的hypervisors 这种扩展让kernel直接跑在el2上,这样可以减少host和guest之间share的寄存器,并减少overhead of virtualization 具体实现如下面的patch:https://lwn.net/Articles/650524/
Hyp configuration
/* Hyp configuration. */
mov_q x0, HCR_HOST_NVHE_FLAGS
cbz x2, set_hcr
mov_q x0, HCR_HOST_VHE_FLAGS
set_hcr:
msr hcr_el2, x0
isb
当寄存器x2,0表示Hyp模式;非0表示VHE模式
HCR_HOST_NVHE_FLAGS宏定义如下:
#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
HCR_HOST_NVHE_FLAGS宏定义如下:
#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
access physical timer and counter
/*
* Allow Non-secure EL1 and EL0 to access physical timer and counter.
* This is not necessary for VHE, since the host kernel runs in EL2,
* and EL0 accesses are configured in the later stage of boot process.
* Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout
* as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined
* to access CNTHCTL_EL2. This allows the kernel designed to run at EL1
* to transparently mess with the EL0 bits via CNTKCTL_EL1 access in
* EL2.
*/
cbnz x2, 1f
mrs x0, cnthctl_el2
orr x0, x0, #3 // Enable EL1 physical timers
msr cnthctl_el2, x0
1:
msr cntvoff_el2, xzr // Clear virtual offset
Hyp模式, 启动计数器 ,设定Non-secure EL1 and EL0可访问物理定时器和计数器,虚拟偏移量清零; VHE模式只需要清零偏移量
GIC设置
/* GICv3 system register access */
mrs x0, id_aa64pfr0_el1
ubfx x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4
cbz x0, 3f
ID_AA64PFR0_EL1的bit24~27如果为0表示不支持GIC V3, 为1表示支持GICV3
mrs_s x0, SYS_ICC_SRE_EL2
orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1
orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1
msr_s SYS_ICC_SRE_EL2, x0
isb // Make sure SRE is now set
mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back,
tbz x0, #0, 3f // and check that it sticks
msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to
ICC_SRE_EL2_ENABLE表示Non-secure EL1 accesses to ICC_SRE_EL1 are permitted if EL3 is not present or ICC_SRE_EL3.Enable is 1, otherwise Non-secure EL1 accesses to ICC_SRE_EL1 trap to EL3
如果是GICv3,设置SYS_ICC_SRE_EL2、SYS_ICH_HCR_EL2寄存器,这两个寄存器是GICv3的CPU接口寄存器。设置完毕后需要重新读取来确认,如果设置不成功则跳转到3f
Populate ID registers
mrs x0, midr_el1
mrs x1, mpidr_el1
msr vpidr_el2, x0
msr vmpidr_el2, x1
根据物理CPU的ID寄存器和亲合属性寄存器,来设置虚拟CPU对应的寄存器
hstr_el2清零
msr hstr_el2, xzr
将Hypervisor系统陷入寄存器HSTR_EL2清零。一般情况下,当客户虚拟机是AArch32位,会有Thumb和协处理器方式,不希望在访问相关寄存器陷入到EL2中
EL2 debug
mrs x1, id_aa64dfr0_el1
sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4
cmp x0, #1
b.lt 4f // Skip if no PMU present
mrs x0, pmcr_el0 // Disable debug access traps
ubfx x0, x0, #11, #5 // to EL2 and allow access to
4:
csel x3, xzr, x0, lt // all PMU counters from EL1
通过id_aa64dfr0_el1寄存器的pmu version来判断是否支持pmu
如果pmu version小于1则不支持PMU,直接将x3清零;
否则表示支持pmu,则从pmcr_el0获取事件的数量赋值给x3,也就是说x3用于保存事件数量
Statistical profiling
/* Statistical profiling */
ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4
cbz x0, 7f // Skip if SPE not present
cbnz x2, 6f // VHE?
mrs_s x4, SYS_PMBIDR_EL1 // If SPE available at EL2,
and x4, x4, #(1 << SYS_PMBIDR_EL1_P_SHIFT)
cbnz x4, 5f // then permit sampling of physical
mov x4, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \
1 << SYS_PMSCR_EL2_PA_SHIFT)
msr_s SYS_PMSCR_EL2, x4 // addresses and physical counter
5:
mov x1, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
orr x3, x3, x1 // If we don't have VHE, then
b 7f // use EL1&0 translation.
6: // For VHE, use EL2 translation
orr x3, x3, #MDCR_EL2_TPMS // and disable access from EL1
7:
msr mdcr_el2, x3 // Configure debug traps
判断PMS是否支持
LORegions
mrs x1, id_aa64mmfr1_el1
ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4
cbz x0, 1f
msr_s SYS_LORC_EL1, xzr
Stage-2 translation
msr vttbr_el2, xzr
cbz x2, install_el2_stub
mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2
isb
ret
如上VHE和HYP模式将分别走不同的分支
如果是VHE模式,直接返回w0为BOOT_CPU_MODE_EL2。由于进入内核时,CPU处于EL2?,所以直接调用ret指令返回,CPU仍然是EL2 install_el2_stub是Hyp模式相关设置和返回代码
install_el2_stub
SYM_INNER_LABEL(install_el2_stub, SYM_L_LOCAL)
/*
* When VHE is not in use, early init of EL2 and EL1 needs to be
* done here.
* When VHE _is_ in use, EL1 will not be used in the host and
* requires no configuration, and all non-hyp-specific EL2 setup
* will be done via the _EL1 system register aliases in __cpu_setup.
*/
mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)
msr sctlr_el1, x0
/* Coprocessor traps. */
mov x0, #0x33ff
msr cptr_el2, x0 // Disable copro. traps to EL2
/* SVE register access */
mrs x1, id_aa64pfr0_el1
ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4
cbz x1, 7f
bic x0, x0, #CPTR_EL2_TZ // Also disable SVE traps
msr cptr_el2, x0 // Disable copro. traps to EL2
isb
mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector
msr_s SYS_ZCR_EL2, x1 // length for EL1.
/* Hypervisor stub */
7: adr_l x0, __hyp_stub_vectors
msr vbar_el2, x0
/* spsr */
mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
PSR_MODE_EL1h)
msr spsr_el2, x0
msr elr_el2, lr
mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2
eret
当非VHE模式下,用于EL1和EL2的早期初始化
总结
el2_setup根据当前CPU处于EL1还是EL2主要做了如下的工作:
1、设置当前异常级别的栈
2、判断当前的异常级别,并据此设置端模式;
3、如果支持VHE,设置VHE,否则设置传统HYP模式
4、启动定时器
5、GIC设置
6、根据物理CPU的ID寄存器和亲合属性寄存器,来设置虚拟CPU对应的寄存器
7、hstr_el2清零
8、EL2 debug相关寄存器设置