linux ARM64 中断底层处理代码分析
1 Exception vectors
1.1 Vector table offsets from vector table base address
1.2 linux Exception vectors
/*
* Exception vectors.
*/
.pushsection ".entry.text", "ax"
.align 11 /* 异常向量表要按照2K的对齐方式去处理,这和VBAR_ELx想对应的 */
ENTRY(vectors)
kernel_ventry 1, sync_invalid // Synchronous EL1t
kernel_ventry 1, irq_invalid // IRQ EL1t
kernel_ventry 1, fiq_invalid // FIQ EL1t
kernel_ventry 1, error_invalid // Error EL1t
kernel_ventry 1, sync // Synchronous EL1h
kernel_ventry 1, irq // IRQ EL1h
kernel_ventry 1, fiq_invalid // FIQ EL1h
kernel_ventry 1, error // Error EL1h
kernel_ventry 0, sync // Synchronous 64-bit EL0
kernel_ventry 0, irq // IRQ 64-bit EL0
kernel_ventry 0, fiq_invalid // FIQ 64-bit EL0
kernel_ventry 0, error // Error 64-bit EL0
#ifdef CONFIG_COMPAT
kernel_ventry 0, sync_compat, 32 // Synchronous 32-bit EL0
kernel_ventry 0, irq_compat, 32 // IRQ 32-bit EL0
kernel_ventry 0, fiq_invalid_compat, 32 // FIQ 32-bit EL0
kernel_ventry 0, error_compat, 32 // Error 32-bit EL0
#else
kernel_ventry 0, sync_invalid, 32 // Synchronous 32-bit EL0
kernel_ventry 0, irq_invalid, 32 // IRQ 32-bit EL0
kernel_ventry 0, fiq_invalid, 32 // FIQ 32-bit EL0
kernel_ventry 0, error_invalid, 32 // Error 32-bit EL0
#endif
END(vectors)
1.2.1 当前异常等级使用SP_EL0
kernel_ventry 1, sync_invalid // Synchronous EL1t
kernel_ventry 1, irq_invalid // IRQ EL1t
kernel_ventry 1, fiq_invalid // FIQ EL1t
kernel_ventry 1, error_invalid // Error EL1t
1.2.2 当前异常等级使用SP_ELx
kernel_ventry 1, sync // Synchronous EL1h
kernel_ventry 1, irq // IRQ EL1h
kernel_ventry 1, fiq_invalid // FIQ EL1h
kernel_ventry 1, error // Error EL1h
1.2.3 在Aarch64的低等级发生异常
kernel_ventry 0, sync // Synchronous 64-bit EL0
kernel_ventry 0, irq // IRQ 64-bit EL0
kernel_ventry 0, fiq_invalid // FIQ 64-bit EL0
kernel_ventry 0, error // Error 64-bit EL0
1.2.4 在Aarch32的低等级发生异常
#ifdef CONFIG_COMPAT
kernel_ventry 0, sync_compat, 32 // Synchronous 32-bit EL0
kernel_ventry 0, irq_compat, 32 // IRQ 32-bit EL0
kernel_ventry 0, fiq_invalid_compat, 32 // FIQ 32-bit EL0
kernel_ventry 0, error_compat, 32 // Error 32-bit EL0
#else
kernel_ventry 0, sync_invalid, 32 // Synchronous 32-bit EL0
kernel_ventry 0, irq_invalid, 32 // IRQ 32-bit EL0
kernel_ventry 0, fiq_invalid, 32 // FIQ 32-bit EL0
kernel_ventry 0, error_invalid, 32 // Error 32-bit EL0
#endif
1.3 kernel_ventry
.macro kernel_ventry, el, label, regsize = 64
.align 7 /* 对于arm64 每个异常入口大小为128Byte,需要按照128Byte对齐的方式去访问*/
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
alternative_if ARM64_UNMAP_KERNEL_AT_EL0
.if \el == 0 /* if (el == 0)*/
.if \regsize == 64
mrs x30, tpidrro_el0
msr tpidrro_el0, xzr
.else
mov x30, xzr
.endif
.endif
alternative_else_nop_endif
#endif
sub sp, sp, #S_FRAME_SIZE /* 将寄存器栈框设置为S_FRAME_SIZE大小 */
#ifdef CONFIG_VMAP_STACK
/*
* Test whether the SP has overflowed, without corrupting a GPR.
* Task and IRQ stacks are aligned to (1 << THREAD_SHIFT).
*/
add sp, sp, x0 // sp' = sp + x0
sub x0, sp, x0 // x0' = sp' - x0 = (sp + x0) - x0 = sp
tbnz x0, #THREAD_SHIFT, 0f
sub x0, sp, x0 // x0'' = sp' - x0' = (sp + x0) - sp = x0
sub sp, sp, x0 // sp'' = sp' - x0 = (sp + x0) - x0 = sp
b el\()\el\()_\label
0:
/*
* Either we've just detected an overflow, or we've taken an exception
* while on the overflow stack. Either way, we won't return to
* userspace, and can clobber EL0 registers to free up GPRs.
*/
/* Stash the original SP (minus S_FRAME_SIZE) in tpidr_el0. */
msr tpidr_el0, x0
/* Recover the original x0 value and stash it in tpidrro_el0 */
sub x0, sp, x0
msr tpidrro_el0, x0
/* Switch to the overflow stack */
adr_this_cpu sp, overflow_stack + OVERFLOW_STACK_SIZE, x0
/*
* Check whether we were already on the overflow stack. This may happen
* after panic() re-enables interrupts.
*/
mrs x0, tpidr_el0 // sp of interrupted context
sub x0, sp, x0 // delta with top of overflow stack
tst x0, #~(OVERFLOW_STACK_SIZE - 1) // within range?
b.ne __bad_stack // no? -> bad stack pointer
/* We were already on the overflow stack. Restore sp/x0 and carry on. */
sub sp, sp, x0
mrs x0, tpidrro_el0
#endif
b el\()\el\()_\label /* 假如是在el1下发生的中断,该条指令和 b el1_irq 一致*/
.endm
1.3.1 预留struct pt_regs大小的栈框
sub sp, sp, #S_FRAME_SIZE /* 将寄存器栈框设置为S_FRAME_SIZE大小 */
1.3.2 跳转到特定的异常处理接口
b el\()\el\()_\label /* 假如是在el1下发生的中断,该条指令和 b el1_irq 一致*/
1.4 el1_irq
.align 6
el1_irq:
kernel_entry 1
enable_da_f
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_off
#endif
irq_handler
#ifdef CONFIG_PREEMPT
ldr x24, [tsk, #TSK_TI_PREEMPT] // get preempt count
cbnz x24, 1f // preempt count != 0
bl el1_preempt
1:
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_on
#endif
kernel_exit 1
ENDPROC(el1_irq)
1.5 struct pt_regs
kernel_entry
和kernel_exit
这两个函数均是对pt_regs结构体的处理,通过对pt_regs的处理来切换上下文。
/*
* This struct defines the way the registers are stored on the stack during an
* exception. Note that sizeof(struct pt_regs) has to be a multiple of 16 (for
* stack alignment). struct user_pt_regs must form a prefix of struct pt_regs.
*/
struct pt_regs {
union {
struct user_pt_regs user_regs;
struct {
u64 regs[31]; /* 31个通用寄存器 x0 ~ x31 */
u64 sp; /* 栈寄存器 sp */
u64 pc; /* 程序计数寄存器PC */
u64 pstate; /* 当前处理器状态寄存器 */
};
};
u64 orig_x0;
#ifdef __AARCH64EB__
u32 unused2;
s32 syscallno;
#else
s32 syscallno;
u32 unused2;
#endif
u64 orig_addr_limit;
u64 unused; // maintain 16 byte alignment
u64 stackframe[2];
};
和pt_regs对应的宏定义是编译之后生成的include/generated/asm-offsets.h
文件所定义的下面的宏,
#define S_X0 0 /* offsetof(struct pt_regs, regs[0]) */
#define S_X1 8 /* offsetof(struct pt_regs, regs[1]) */
#define S_X2 16 /* offsetof(struct pt_regs, regs[2]) */
#define S_X3 24 /* offsetof(struct pt_regs, regs[3]) */
#define S_X4 32 /* offsetof(struct pt_regs, regs[4]) */
#define S_X5 40 /* offsetof(struct pt_regs, regs[5]) */
#define S_X6 48 /* offsetof(struct pt_regs, regs[6]) */
#define S_X7 56 /* offsetof(struct pt_regs, regs[7]) */
#define S_X8 64 /* offsetof(struct pt_regs, regs[8]) */
#define S_X10 80 /* offsetof(struct pt_regs, regs[10]) */
#define S_X12 96 /* offsetof(struct pt_regs, regs[12]) */
#define S_X14 112 /* offsetof(struct pt_regs, regs[14]) */
#define S_X16 128 /* offsetof(struct pt_regs, regs[16]) */
#define S_X18 144 /* offsetof(struct pt_regs, regs[18]) */
#define S_X20 160 /* offsetof(struct pt_regs, regs[20]) */
#define S_X22 176 /* offsetof(struct pt_regs, regs[22]) */
#define S_X24 192 /* offsetof(struct pt_regs, regs[24]) */
#define S_X26 208 /* offsetof(struct pt_regs, regs[26]) */
#define S_X28 224 /* offsetof(struct pt_regs, regs[28]) */
#define S_LR 240 /* offsetof(struct pt_regs, regs[30]) */
#define S_SP 248 /* offsetof(struct pt_regs, sp) */
#define S_COMPAT_SP 104 /* offsetof(struct pt_regs, compat_sp) */
#define S_PSTATE 264 /* offsetof(struct pt_regs, pstate) */
#define S_PC 256 /* offsetof(struct pt_regs, pc) */
#define S_ORIG_X0 272 /* offsetof(struct pt_regs, orig_x0) */
#define S_SYSCALLNO 280 /* offsetof(struct pt_regs, syscallno) */
#define S_ORIG_ADDR_LIMIT 288 /* offsetof(struct pt_regs, orig_addr_limit) */
#define S_STACKFRAME 304 /* offsetof(struct pt_regs, stackframe) */
#define S_FRAME_SIZE 320 /* sizeof(struct pt_regs) */
1.6 kernel_entry
kernel_entry
的处理和linux内核所定义的struct pt_regs
所对应
.macro kernel_entry, el, regsize = 64
.if \regsize == 32
mov w0, w0 // zero upper 32 bits of x0
.endif
stp x0, x1, [sp, #16 * 0]
stp x2, x3, [sp, #16 * 1]
stp x4, x5, [sp, #16 * 2]
stp x6, x7, [sp, #16 * 3]
stp x8, x9, [sp, #16 * 4]
stp x10, x11, [sp, #16 * 5]
stp x12, x13, [sp, #16 * 6]
stp x14, x15, [sp, #16 * 7]
stp x16, x17, [sp, #16 * 8]
stp x18, x19, [sp, #16 * 9]
stp x20, x21, [sp, #16 * 10]
stp x22, x23, [sp, #16 * 11]
stp x24, x25, [sp, #16 * 12]
stp x26, x27, [sp, #16 * 13]
stp x28, x29, [sp, #16 * 14]
.if \el == 0 /* if (el == 0)*/
clear_gp_regs
mrs x21, sp_el0
ldr_this_cpu tsk, __entry_task, x20 // Ensure MDSCR_EL1.SS is clear,
ldr x19, [tsk, #TSK_TI_FLAGS] // since we can unmask debug
disable_step_tsk x19, x20 // exceptions when scheduling.
apply_ssbd 1, x22, x23
.else /* el != 0*/
add x21, sp, #S_FRAME_SIZE
get_thread_info tsk
/* Save the task's original addr_limit and set USER_DS */
ldr x20, [tsk, #TSK_TI_ADDR_LIMIT]
str x20, [sp, #S_ORIG_ADDR_LIMIT] /* S_ORIG_ADDR_LIMIT 288 offsetof(struct pt_regs, orig_addr_limit) */
mov x20, #USER_DS
str x20, [tsk, #TSK_TI_ADDR_LIMIT] /* TSK_TI_ADDR_LIMIT 8 offsetof(struct task_struct, thread_info.addr_limit) */
/* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */
.endif /* \el == 0 */
mrs x22, elr_el1 /* 把elr_el1寄存器保存到x22寄存器中*/
mrs x23, spsr_el1 /* 把spsr_el1寄存器的值保存到x23中 */
stp lr, x21, [sp, #S_LR] /* lr(x30) and sp + #S_FRAME_SIZE*/
/*
* In order to be able to dump the contents of struct pt_regs at the
* time the exception was taken (in case we attempt to walk the call
* stack later), chain it together with the stack frames.
*/
.if \el == 0
stp xzr, xzr, [sp, #S_STACKFRAME]
.else
stp x29, x22, [sp, #S_STACKFRAME] /* 把FP 和elr_el1寄存器的值保存到sp + S_STACKFRAME处 */
.endif
add x29, sp, #S_STACKFRAME /* fp = sp + S_STACKFRAME */
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
/*
* Set the TTBR0 PAN bit in SPSR. When the exception is taken from
* EL0, there is no need to check the state of TTBR0_EL1 since
* accesses are always enabled.
* Note that the meaning of this bit differs from the ARMv8.1 PAN
* feature as all TTBR0_EL1 accesses are disabled, not just those to
* user mappings.
*/
alternative_if ARM64_HAS_PAN
b 1f // skip TTBR0 PAN
alternative_else_nop_endif
.if \el != 0 /* 如果是在el0发生的异常 */
mrs x21, ttbr0_el1 /* 保存ttbr0_el1的值到x21寄存器中 */
tst x21, #TTBR_ASID_MASK // Check for the reserved ASID
orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR
b.eq 1f // TTBR0 access already disabled
and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR
.endif
__uaccess_ttbr0_disable x21
1:
#endif
stp x22, x23, [sp, #S_PC]
/* Not in a syscall by default (el0_svc overwrites for real syscall) */
.if \el == 0
mov w21, #NO_SYSCALL
str w21, [sp, #S_SYSCALLNO]
.endif
/*
* Set sp_el0 to current thread_info.
*/
.if \el == 0
msr sp_el0, tsk
.endif
/*
* Registers that may be useful after this macro is invoked:
*
* x21 - aborted SP
* x22 - aborted PC
* x23 - aborted PSTATE
*/
.endm
1.6.1 struct user_pt_regs user_regs对应的汇编代码
union {
struct user_pt_regs user_regs;
struct {
u64 regs[31];
u64 sp;
u64 pc;
u64 pstate;
};
};
stp x0, x1, [sp, #16 * 0]
stp x2, x3, [sp, #16 * 1]
stp x4, x5, [sp, #16 * 2]
stp x6, x7, [sp, #16 * 3]
stp x8, x9, [sp, #16 * 4]
stp x10, x11, [sp, #16 * 5]
stp x12, x13, [sp, #16 * 6]
stp x14, x15, [sp, #16 * 7]
stp x16, x17, [sp, #16 * 8]
stp x18, x19, [sp, #16 * 9]
stp x20, x21, [sp, #16 * 10]
stp x22, x23, [sp, #16 * 11]
stp x24, x25, [sp, #16 * 12]
stp x26, x27, [sp, #16 * 13]
stp x28, x29, [sp, #16 * 14]
.if \el == 0 /* if (el == 0)*/
clear_gp_regs
mrs x21, sp_el0
ldr_this_cpu tsk, __entry_task, x20 // Ensure MDSCR_EL1.SS is clear,
ldr x19, [tsk, #TSK_TI_FLAGS] // since we can unmask debug
disable_step_tsk x19, x20 // exceptions when scheduling.
apply_ssbd 1, x22, x23
.else /* el != 0*/
add x21, sp, #S_FRAME_SIZE
get_thread_info tsk
/* Save the task's original addr_limit and set USER_DS */
ldr x20, [tsk, #TSK_TI_ADDR_LIMIT]
str x20, [sp, #S_ORIG_ADDR_LIMIT]
mov x20, #USER_DS
str x20, [tsk, #TSK_TI_ADDR_LIMIT]
/* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */
.endif /* \el == 0 */
mrs x22, elr_el1
mrs x23, spsr_el1
stp lr, x21, [sp, #S_LR] /* lr and sp + #S_FRAME_SIZE*/
/*
* In order to be able to dump the contents of struct pt_regs at the
* time the exception was taken (in case we attempt to walk the call
* stack later), chain it together with the stack frames.
*/
.if \el == 0
stp xzr, xzr, [sp, #S_STACKFRAME]
.else
stp x29, x22, [sp, #S_STACKFRAME]
1.7 kernel_exit
.macro kernel_exit, el
.if \el != 0
disable_daif
/* Restore the task's original addr_limit. */
ldr x20, [sp, #S_ORIG_ADDR_LIMIT]
str x20, [tsk, #TSK_TI_ADDR_LIMIT]
/* No need to restore UAO, it will be restored from SPSR_EL1 */
.endif
ldp x21, x22, [sp, #S_PC] // load ELR, SPSR
.if \el == 0
ct_user_enter
.endif
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
/*
* Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR
* PAN bit checking.
*/
alternative_if ARM64_HAS_PAN
b 2f // skip TTBR0 PAN
alternative_else_nop_endif
.if \el != 0
tbnz x22, #22, 1f // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set
.endif
__uaccess_ttbr0_enable x0, x1
.if \el == 0
/*
* Enable errata workarounds only if returning to user. The only
* workaround currently required for TTBR0_EL1 changes are for the
* Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache
* corruption).
*/
bl post_ttbr_update_workaround
.endif
1:
.if \el != 0
and x22, x22, #~PSR_PAN_BIT // ARMv8.0 CPUs do not understand this bit
.endif
2:
#endif
.if \el == 0
ldr x23, [sp, #S_SP] // load return stack pointer
msr sp_el0, x23
tst x22, #PSR_MODE32_BIT // native task?
b.eq 3f
#ifdef CONFIG_ARM64_ERRATUM_845719
alternative_if ARM64_WORKAROUND_845719
#ifdef CONFIG_PID_IN_CONTEXTIDR
mrs x29, contextidr_el1
msr contextidr_el1, x29
#else
msr contextidr_el1, xzr
#endif
alternative_else_nop_endif
#endif
3:
apply_ssbd 0, x0, x1
.endif
msr elr_el1, x21 // set up the return data
msr spsr_el1, x22
ldp x0, x1, [sp, #16 * 0]
ldp x2, x3, [sp, #16 * 1]
ldp x4, x5, [sp, #16 * 2]
ldp x6, x7, [sp, #16 * 3]
ldp x8, x9, [sp, #16 * 4]
ldp x10, x11, [sp, #16 * 5]
ldp x12, x13, [sp, #16 * 6]
ldp x14, x15, [sp, #16 * 7]
ldp x16, x17, [sp, #16 * 8]
ldp x18, x19, [sp, #16 * 9]
ldp x20, x21, [sp, #16 * 10]
ldp x22, x23, [sp, #16 * 11]
ldp x24, x25, [sp, #16 * 12]
ldp x26, x27, [sp, #16 * 13]
ldp x28, x29, [sp, #16 * 14]
ldr lr, [sp, #S_LR]
add sp, sp, #S_FRAME_SIZE // restore sp
.if \el == 0
alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
bne 4f
msr far_el1, x30
tramp_alias x30, tramp_exit_native
br x30
4:
tramp_alias x30, tramp_exit_compat
br x30
#endif
.else
eret /* 从硬中断上下文返回 */
.endif
sb
.endm