通常我们申请的内存,只是建立了进程的地址空间,为我们分配了对应的虚拟地址。但是这些虚拟地址并未和实际的物理内存建立对应的映射。因此当访问这些未建立映射关系的虚拟地址时,处理器会自动触发一个缺页异常。
armv7为例,当在数据访问周期里,进行存储访问发生异常时,会跳转到异常向量表中的Data abort向量中,处理流程为__vectors_start-->vector_dabt-->__dabt_usr/__dabt_svc-->dabt_helper-->v7_early_abort
arm的mmu中有两个与存储访问失效相关的寄存器
失效状态寄存器(date fault status register,FSR)和失效地址寄存器(data fault address register,FAR)。当发生存储访问失效时,FSR会反映所发生的存储失效相关信息,包含存储访问所属域和存储类型等。FAR会记录访问失效的虚拟地址。
/*
* Function: v7_early_abort
*
* Params : r2 = pt_regs
* : r4 = aborted context pc
* : r5 = aborted context psr
*
* Returns : r4 - r11, r13 preserved
*
* Purpose : obtain information about current aborted instruction.
*/
.align 5
ENTRY(v7_early_abort)
/*
* The effect of data aborts on on the exclusive access monitor are
* UNPREDICTABLE. Do a CLREX to clear the state
*/
clrex
mrc p15, 0, r1, c5, c0, 0 @ get FSR
mrc p15, 0, r0, c6, c0, 0 @ get FAR
/*
* V6 code adjusts the returned DFSR.
* New designs should not need to patch up faults.
*/
#if defined(CONFIG_VERIFY_PERMISSION_FAULT)
/*
* Detect erroneous permission failures and fix
*/
ldr r3, =0x40d @ On permission fault
and r3, r1, r3
cmp r3, #0x0d
bne do_DataAbort
mcr p15, 0, r0, c7, c8, 0 @ Retranslate FAR
isb
mrc p15, 0, ip, c7, c4, 0 @ Read the PAR
and r3, ip, #0x7b @ On translation fault
cmp r3, #0x0b
bne do_DataAbort
bic r1, r1, #0xf @ Fix up FSR FS[5:0]
and ip, ip, #0x7e
orr r1, r1, ip, LSR #1
#endif
b do_DataAbort
ENDPROC(v7_early_abort)
const struct fsr_info用于描述一条失效状态对应的处理方案
static struct fsr_info fsr_info[] = {
/*
* The following are the standard ARMv3 and ARMv4 aborts. ARMv5
* defines these to be "precise" aborts.
*/
{ do_bad, SIGSEGV, 0, "vector exception" },
{ do_bad, SIGBUS, BUS_ADRALN, "alignment exception" },
{ do_bad, SIGKILL, 0, "terminal exception" },
{ do_bad, SIGBUS, BUS_ADRALN, "alignment exception" },
{ do_bad, SIGBUS, 0, "external abort on linefetch" },
{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "section translation fault" },
{ do_bad, SIGBUS, 0, "external abort on linefetch" },
{ do_page_fault, SIGSEGV, SEGV_MAPERR, "page translation fault" },
{ do_bad, SIGBUS, 0, "external abort on non-linefetch" },
{ do_bad, SIGSEGV, SEGV_ACCERR, "section domain fault" },
{ do_bad, SIGBUS, 0, "external abort on non-linefetch" },
{ do_bad, SIGSEGV, SEGV_ACCERR, "page domain fault" },
{ do_bad, SIGBUS, 0, "external abort on translation" },
{ do_sect_fault, SIGSEGV, SEGV_ACCERR, "section permission fault" },
{ do_bad, SIGBUS, 0, "external abort on translation" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "page permission fault" },
..........................................
};
asmlinkage void __exception
do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
const struct fsr_info *inf = fsr_info + fsr_fs(fsr);
struct siginfo info;
if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs))
return;
/* 到这里应该是处理不了这次异常 */
printk(KERN_ALERT "Unhandled fault: %s (0x%03x) at 0x%08lx\n",
inf->name, fsr, addr);
info.si_signo = inf->sig;
info.si_errno = 0;
info.si_code = inf->code;
info.si_addr = (void __user *)addr;
arm_notify_die("", regs, &info, fsr, 0);
}
inf->fn = do_page_fault
static int __kprobes
do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
struct task_struct *tsk;
struct mm_struct *mm;
int fault, sig, code;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
if (notify_page_fault(regs, fsr))
return 0;
tsk = current;
mm = tsk->mm;
/* Enable interrupts if they were enabled in the parent context. */
if (interrupts_enabled(regs))
local_irq_enable();
/*
* If we're in an interrupt or have no user
* context, we must not take the fault..
*/
/* 是否是中断上下文或者禁止抢占状态 mm为空代表是内核线程 */
if (in_atomic() || !mm)
goto no_context;
if (user_mode(regs))/* 是否是用户态 */
flags |= FAULT_FLAG_USER;
if (fsr & FSR_WRITE)
flags |= FAULT_FLAG_WRITE;
/*
* As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code,
* we can bug out early if this is from code which shouldn't.
*/
if (!down_read_trylock(&mm->mmap_sem)) {
if (!user_mode(regs) && !search_exception_tables(regs->ARM_pc))
goto no_context;
retry:
down_read(&mm->mmap_sem);
} else {
/*
* The above down_read_trylock() might have succeeded in
* which case, we'll have missed the might_sleep() from
* down_read()
*/
might_sleep();
#ifdef CONFIG_DEBUG_VM
if (!user_mode(regs) &&
!search_exception_tables(regs->ARM_pc))
goto no_context;
#endif
}
fault = __do_page_fault(mm, addr, fsr, flags, tsk);
/* If we need to retry but a fatal signal is pending, handle the
* signal first. We do not need to release the mmap_sem because
* it would already be released in __lock_page_or_retry in
* mm/filemap.c. */
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
return 0;
/*
* Major/minor page fault accounting is only done on the
* initial attempt. If we go through a retry, it is extremely
* likely that the page will be found in page cache at that point.
*/
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
if (!(fault & VM_FAULT_ERROR) && flags & FAULT_FLAG_ALLOW_RETRY) {
if (fault & VM_FAULT_MAJOR) {
tsk->maj_flt++;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
regs, addr);
} else {
tsk->min_flt++;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
regs, addr);
}
if (fault & VM_FAULT_RETRY) {
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
* of starvation. */
flags &= ~FAULT_FLAG_ALLOW_RETRY;
flags |= FAULT_FLAG_TRIED;
goto retry;
}
}
up_read(&mm->mmap_sem);
/*
* Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR
*/
/* 如果没有ERROR,BADMAP,BADACCES则认为缺页中断处理完成 */
if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS))))
return 0;
/*
* If we are in kernel mode at this point, we
* have no context to handle this fault with.
*/
if (!user_mode(regs))
goto no_context;
/* 说明当前没有足够的内存,触发OOM机制 */
if (fault & VM_FAULT_OOM) {
/*
* We ran out of memory, call the OOM killer, and return to
* userspace (which will retry the fault, or kill us if we
* got oom-killed)
*/
pagefault_out_of_memory();
return 0;
}
if (fault & VM_FAULT_SIGBUS) {
/*
* We had some memory, but were unable to
* successfully fix up this page fault.
*/
sig = SIGBUS;
code = BUS_ADRERR;
} else {
/*
* Something tried to access memory that
* isn't in our memory map..
*/
sig = SIGSEGV;
code = fault == VM_FAULT_BADACCESS ?
SEGV_ACCERR : SEGV_MAPERR;
}
/* 内核无法处理,给用户进程发信号 */
__do_user_fault(tsk, addr, fsr, sig, code, regs);
return 0;
no_context:
/* 发生在内核态,内核无法处理,则发送oops错误 */
__do_kernel_fault(mm, addr, fsr, regs);
return 0;
}
如果缺页中断发生在用户态,当内核无法处理时,则会使用 __do_user_fault给用户进程发送信号
static void
__do_user_fault(struct task_struct *tsk, unsigned long addr,
unsigned int fsr, unsigned int sig, int code,
struct pt_regs *regs)
{
struct siginfo si;
#ifdef CONFIG_DEBUG_USER
if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) ||
((user_debug & UDBG_BUS) && (sig == SIGBUS))) {
printk(KERN_DEBUG "%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n",
tsk->comm, sig, addr, fsr);
show_pte(tsk->mm, addr);
show_regs(regs);
}
#endif
tsk->thread.address = addr;
tsk->thread.error_code = fsr;
tsk->thread.trap_no = 14;
si.si_signo = sig;
si.si_errno = 0;
si.si_code = code;
si.si_addr = (void __user *)addr;
force_sig_info(sig, &si, tsk);
}
如果发生在内核态,内核无法处理,则使用__do_kernel_fault发送oops错误
static void
__do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
struct pt_regs *regs)
{
/*
* Are we prepared to handle this kernel fault?
*/
if (fixup_exception(regs))
return;
/*
* No handler, we'll have to terminate things with extreme prejudice.
*/
bust_spinlocks(1);
printk(KERN_ALERT
"Unable to handle kernel %s at virtual address %08lx\n",
(addr < PAGE_SIZE) ? "NULL pointer dereference" :
"paging request", addr);
show_pte(mm, addr);
die("Oops", regs, fsr);
bust_spinlocks(0);
do_exit(SIGKILL);
}
缺页中断核心处理函数
static int __kprobes
__do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
unsigned int flags, struct task_struct *tsk)
{
struct vm_area_struct *vma;
int fault;
/*
通过失效的地址addr来寻找vma, 如果找不到说明地址还没有在进程地址空间中
返回VM_FAULT_BADMAP
*/
vma = find_vma(mm, addr);
fault = VM_FAULT_BADMAP;
if (unlikely(!vma))
goto out;
if (unlikely(vma->vm_start > addr))
goto check_stack;
/*
* Ok, we have a good vm_area for this
* memory access, so we can handle it.
*/
good_area:
/*
判断vma是否具有可写或者可执行的权限
如果发生一个写错误的缺页中断,则需要判断vma是否具有可写属性
如果没有则返回VM_FAULT_BADACCESS
*/
if (access_error(fsr, vma)) {
fault = VM_FAULT_BADACCESS;
goto out;
}
return handle_mm_fault(mm, vma, addr & PAGE_MASK, flags);
check_stack:
/* Don't allow expansion below FIRST_USER_ADDRESS */
if (vma->vm_flags & VM_GROWSDOWN &&
addr >= FIRST_USER_ADDRESS && !expand_stack(vma, addr))
goto good_area;
out:
return fault;
}