前面博文提到了,如果address不属于进程的地址空间,那么do_page_fault()函数继续执行bad_area标记处的语句。如果错误发生在用户态,则发送一个SIGSEGV信号给current进程并结束函数:
/*
* Something tried to access memory that isn't in our memory map..
* Fix it, but check if it's kernel or user first..
*/
bad_area:
up_read(&mm->mmap_sem);
bad_area_nosemaphore:
/* User mode accesses just cause a SIGSEGV */
if (error_code & 4) {
/*
* Valid to do another page fault here because this one came
* from user space.
*/
if (is_prefetch(regs, address, error_code))
return;
tsk->thread.cr2 = address;
/* Kernel addresses are always protection faults */
tsk->thread.error_code = error_code | (address >= TASK_SIZE);
tsk->thread.trap_no = 14;
force_sig_info_fault(SIGSEGV, si_code, address, tsk);
return;
}
#ifdef CONFIG_X86_F00F_BUG
/*
* Pentium F0 0F C7 C8 bug workaround.
*/
if (boot_cpu_data.f00f_bug) {
unsigned long nr;
nr = (address - idt_descr.address) >> 3;
if (nr == 6) {
do_invalid_op(regs, 0);
return;
}
}
#endif
force_sig_info_fault()函数确信进程不忽略或阻塞SIGSEGV信号,并通过si_code局部变量传递附加信息的同时把该信号发送给用户态进程。si_code变量已被置为SEGV_MAPERR(如果异常是由于一个不存在的页框引起,上篇博文刚开始的时候),或置为SEGV_ACCERR(如果异常是由于对现有页框的无效访问引起)。
如果异常发生在内核态(error_code的第2位被清0,即error_code & 4为0),仍然有两种可选的情况:
(1)异常的引起是由于把某个线性地址作为系统调用的参数传递给内核。
(2)异常是因一个真正的内核缺陷所引起的。
函数这样区分这两种可选的情况:
no_context:
/* Are we prepared to handle this kernel fault? */
if (fixup_exception(regs))
return;
/*
* Valid to do another page fault here, because if this fault
* had been triggered by is_prefetch fixup_exception would have
* handled it.
*/
if (is_prefetch(regs, address, error_code))
return;
在第一种情况中,代码跳到一段“修正代码”处,这段代码的典型操作就是向当前进程发送SIGSEGV信号,或用一个适当的出错码终止系统调用处理程序:
int fixup_exception(struct pt_regs *regs)
{
const struct exception_table_entry *fixup;
#ifdef CONFIG_PNPBIOS
if (unlikely((regs->xcs & ~15) == (GDT_ENTRY_PNPBIOS_BASE << 3)))
{
extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp;
extern u32 pnp_bios_is_utter_crap;
pnp_bios_is_utter_crap = 1;
printk(KERN_CRIT "PNPBIOS fault.. attempting recovery./n");
__asm__ volatile(
"movl %0, %%esp/n/t"
"jmp *%1/n/t"
: : "g" (pnp_bios_fault_esp), "g" (pnp_bios_fault_eip));
panic("do_trap: can't hit this");
}
#endif
fixup = search_exception_tables(regs->eip);
if (fixup) {
regs->eip = fixup->fixup;
return 1;
}
return 0;
}
const struct exception_table_entry *search_exception_tables(unsigned long addr)
{
const struct exception_table_entry *e;
e = search_extable(__start___ex_table, __stop___ex_table-1, addr);
if (!e)
e = search_module_extables(addr);
return e;
}
在第二种情况中,函数把CPU寄存器和内核态堆栈的全部转储打印到控制台,并输出到一个系统消息缓冲区,然后调用函数do_exit()杀死当前进程。这就是所谓按所显示的消息命名的“内核漏洞(Kernel oops)”错误。这片代码我就不多讲了,如果你是内核高手,这些输出值可由内核编程高手用于推测引发此错误的条件,进而发现并纠正错误:
/*
* Oops. The kernel tried to access some bad page. We'll have to
* terminate things with extreme prejudice.
*/
bust_spinlocks(1);
if (oops_may_print()) {
#ifdef CONFIG_X86_PAE
if (error_code & 16) {
pte_t *pte = lookup_address(address);
if (pte && pte_present(*pte) && !pte_exec_kernel(*pte))
printk(KERN_CRIT "kernel tried to execute "
"NX-protected page - exploit attempt? "
"(uid: %d)/n", current->uid);
}
#endif
if (address < PAGE_SIZE)
printk(KERN_ALERT "BUG: unable to handle kernel NULL "
"pointer dereference");
else
printk(KERN_ALERT "BUG: unable to handle kernel paging"
" request");
printk(" at virtual address %08lx/n",address);
printk(KERN_ALERT " printing eip:/n");
printk("%08lx/n", regs->eip);
}
page = read_cr3();
page = ((unsigned long *) __va(page))[address >> 22];
if (oops_may_print())
printk(KERN_ALERT "*pde = %08lx/n", page);
/*
* We must not directly access the pte in the highpte
* case, the page table might be allocated in highmem.
* And lets rather not kmap-atomic the pte, just in case
* it's allocated already.
*/
#ifndef CONFIG_HIGHPTE
if ((page & 1) && oops_may_print()) {
page &= PAGE_MASK;
address &= 0x003ff000;
page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];
printk(KERN_ALERT "*pte = %08lx/n", page);
}
#endif
tsk->thread.cr2 = address;
tsk->thread.trap_no = 14;
tsk->thread.error_code = error_code;
die("Oops", regs, error_code);
bust_spinlocks(0);
do_exit(SIGKILL);