linux 4.9 aarch32
如下图中断、异常和系统调用的flow,中断一般是异步的硬件请求,异常通常是应用程序的错误触发,系统调用是应用程序对内核的功能请求
发生swi后进入系统中断向量然后执行vector_swi,源代码分析如下(CONFIG_AEABI):
/*=============================================================================
* SWI handler
*-----------------------------------------------------------------------------
*/
/*只保留一部分代码*/
.align 5
ENTRY(vector_swi)
sub sp, sp, #PT_REGS_SIZE
stmia sp, {r0 - r12} @ Calling r0 - r12
ARM( add r8, sp, #S_PC )
ARM( stmdb r8, {sp, lr}^ ) @ Calling sp, lr
THUMB( mov r8, sp )
THUMB( store_user_sp_lr r8, r10, S_SP ) @ calling sp, lr
mrs r8, spsr @ called from non-FIQ mode, so ok.
str lr, [sp, #S_PC] @ Save calling PC
str r8, [sp, #S_PSR] @ Save CPSR
str r0, [sp, #S_OLD_R0] @ Save OLD_R0
zero_fp
alignment_trap r10, ip, __cr_alignment /*对齐检查*/
enable_irq /*使能中断*/
ct_user_exit
get_thread_info tsk /*进程的内核栈与thread_info放在相邻的两页*/
/*
* Get the system call number.
*/
uaccess_disable tbl
adr tbl, sys_call_table @ load syscall table pointer
local_restart:
ldr r10, [tsk, #TI_FLAGS] @ check for syscall tracing
stmdb sp!, {r4, r5} @ push fifth and sixth args
tst r10, #_TIF_SYSCALL_WORK @ are we tracing syscalls?
bne __sys_trace
cmp scno, #NR_syscalls @ check upper syscall limit
badr lr, ret_fast_syscall @ return address /*手动
设置sys_*返回到ret_fast_syscall */
ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine
/*Pure EABI user space always put syscall number into scno (r7). table是4字节对齐所以要用lsl把scno左移两位作table偏移,跳到对应的syscall 对应表项*/
ENDPROC(vector_swi)
执行sys_*函数后返回到ret_fast_syscall函数
/*
* This is the fast syscall return path. We do as little as possible here,
* such as avoiding writing r0 to the stack. We only use this path if we
* have tracing and context tracking disabled - the overheads from those
* features make this path too inefficient.
*/
ret_fast_syscall:
UNWIND(.fnstart )
UNWIND(.cantunwind )
disable_irq_notrace @ disable interrupts
ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
bne fast_work_pending /*检查是否有事情要做,比如signal处理 schedule处理等*/
/* perform architecture specific actions before user return */
arch_ret_to_user r1, lr /*arch 相关返回用户空间要做的事*/
restore_user_regs fast = 1, offset = S_OFF /*恢复用户空间现场,执行MOVS pc, lr时,CPSR会被SPSR覆盖,这条命令就cpu就恢复了user模式*/
UNWIND(.fnend )
ENDPROC(ret_fast_syscall)
/* Ok, we need to do extra processing, enter the slow path. */
fast_work_pending:
str r0, [sp, #S_R0+S_OFF]! @ returned r0
/* fall through to work_pending */
tst r1, #_TIF_SYSCALL_WORK
bne __sys_trace_return_nosave
slow_work_pending:
mov r0, sp @ 'regs'
mov r2, why @ 'syscall'
bl do_work_pending
cmp r0, #0
beq no_work_pending
movlt scno, #(__NR_restart_syscall - __NR_SYSCALL_BASE)
ldmia sp, {r0 - r6} @ have to reload r0 - r6
b local_restart @ ... and off we go
ENDPROC(ret_fast_syscall)
system call 返回用户空间要做的事情_TIF_SYSCALL_WORK | _TIF_WORK_MASK 定义在thread_info.h中
/*
* thread information flags:
* TIF_USEDFPU - FPU was used by this task this quantum (SMP)
* TIF_POLLING_NRFLAG - true if poll_idle() is polling TIF_NEED_RESCHED
*/
#define TIF_SIGPENDING 0 /* signal pending */
#define TIF_NEED_RESCHED 1 /* rescheduling necessary */
#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
#define TIF_UPROBE 3 /* breakpointed or singlestepping */
#define TIF_SYSCALL_TRACE 4 /* syscall trace active */
#define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */
#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
#define TIF_SECCOMP 7 /* seccomp syscall filtering active */
#define TIF_NOHZ 12 /* in adaptive nohz mode */
#define TIF_USING_IWMMXT 17
#define TIF_MEMDIE 18 /* is terminating due to OOM killer */
#define TIF_RESTORE_SIGMASK 20
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT)
/* Checks for any syscall work in entry-common.S */
#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP)
/*
* Change these and you break ASM code in entry-common.S
*/
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
_TIF_NOTIFY_RESUME | _TIF_UPROBE)
以signal处理为例
do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
{
/*
* The assembly code enters us with IRQs off, but it hasn't
* informed the tracing code of that for efficiency reasons.
* Update the trace code with the current status.
*/
trace_hardirqs_off();
do {
if (likely(thread_flags & _TIF_NEED_RESCHED)) {
schedule(); /*schedule 当前进程出去*/
} else {
if (unlikely(!user_mode(regs)))
return 0;
local_irq_enable();
if (thread_flags & _TIF_SIGPENDING) {
int restart = do_signal(regs, syscall);/*处理当前进程挂起的信号*/
if (unlikely(restart)) {
/*
* Restart without handlers.
* Deal with it without leaving
* the kernel space.
*/
return restart;
}
syscall = 0;
} else if (thread_flags & _TIF_UPROBE) {
uprobe_notify_resume(regs);
} else {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
}
}
local_irq_disable();
thread_flags = current_thread_info()->flags;
} while (thread_flags & _TIF_WORK_MASK); /*事情做完才会继续返回用户空间*/
return 0;
}