Part B: Copy-on-Write Fork
传统的fork()执行时,子进程会复制父进程地址空间的内容,但一般来说在子进程中exec()会在fork()之后马上就调用,而exec()将替换掉子进程复制来的内存。在这种情况下,fork()复制父进程的内存就显得很没有必要了(因为在调用exec()之前,这一部分复制来的内存基本没有用上),因此后面的unix系统中引入了写时复制技术(copy-on-write)。
内核在fork()上将地址空间映射从父进程复制到子进程,而不是将实际页面的内容复制过去,同时将当前共享的页面标记为只读。当两个进程之一尝试写入这些共享页面之一时,该进程将导致页面错误。此时,Unix内核意识到该页面实际上是“virtual”副本或“copy-on-write”副本,因此它为导致错误的进程创建了新的,私有的,可写的副本。这样,各个页面的内容实际上不会被复制,直到它们被实际写入为止。这种优化使fork()之后调用exec()的子进程不必要的开销变小了许多:子进程在调用exec()之前可能只需要复制一页(其堆栈的当前页)。
User-level page fault handling
Setting the Page Fault Handler
exercise 8 :实现sys_env_set_pgfault_upcall系统调用。 查找目标进程的进程ID时,请确保启用权限检查,因为这是“危险的”系统调用。
static int sys_env_set_pgfault_upcall(envid_t envid, void *func)
{
// LAB 4: Your code here.
//panic("sys_env_set_pgfault_upcall not implemented");
struct Env *env;
//if environment envid doesn't currently exist, or the caller doesn't have permission to change envid
if( envid2env(envid, &env, 1) < 0) return -E_BAD_ENV;
env->env_pgfault_upcall = func;
return 0;
}
Invoking the User Page Fault Handler
exercise 9:在kern / trap.c中的page_fault_handler中实现将页面错误分派到用户态处理程序所需的代码。 写入异常堆栈时,请确保采取适当的预防措施。 (如果用户进程在异常堆栈上空间不足,会发生什么?)
// LAB 4: Your code here.
struct UTrapframe *utf;
uint32_t fault_va;
// Read processor's CR2 register to find the faulting address
fault_va = rcr2();
// Handle kernel-mode page faults.
if (tf->tf_cs == GD_KT)
panic("page_fault in kernel mode, fault address: %d\n", fault_va);
if( curenv->env_pgfault_upcall){
// 异常模式下陷入
if (tf->tf_esp >= UXSTACKTOP-PGSIZE && tf->tf_esp < UXSTACKTOP) {
utf = (struct UTrapframe *)(tf->tf_esp - sizeof(struct UTrapframe) - 4);
}
// 非异常模式下陷入
else {
utf = (struct UTrapframe *)(UXSTACKTOP - sizeof(struct UTrapframe));
}
//check
user_mem_assert(curenv, (const void *) utf, sizeof(struct UTrapframe), PTE_P|PTE_W);
//保存现场
utf->utf_fault_va = fault_va;
utf->utf_err = tf->tf_trapno;
utf->utf_regs = tf->tf_regs;
utf->utf_eflags = tf->tf_eflags;
utf->utf_eip = tf->tf_eip;
utf->utf_esp = tf->tf_esp;
tf->tf_eip = curenv->env_pgfault_upcall;
tf->tf_esp = utf;
env_run(curenv);
}
//If there's no page fault upcall, the environment didn't allocate a
// page for its exception stack or can't write to it, or the exception
// stack overflows, then destroy the environment that caused the fault.
else{
// Destroy the environment that caused the fault.
cprintf("[%08x] user fault va %08x ip %08x\n",
curenv->env_id, fault_va, tf->tf_eip);
print_trapframe(tf);
env_destroy(curenv);
}
User-mode Page Fault Entrypoint
接下来,我们需要实现汇编例程,该例程将调用C页面错误处理程序并在引发错误的指令处恢复执行。 该汇编例程是使用sys_env_set_pgfault_upcall()向内核注册的处理程序。
exercise10 :在lib / pfentry.S中实现_pgfault_upcall例程。 有趣的部分是返回到导致页面错误的用户代码处。 将直接返回那里,而无需返回内核。 困难的部分是同时切换堆栈并重新加载EIP。
addl $8, %esp // esp+8 -> PushRegs over utf_fault_va utf_err
movl 0x20(%esp), %eax // eax = (esp+0x20 -> utf_eip )
subl $4, 0x28(%esp) // for trap time eip 保留32bit, esp+48 = utf_esp
movl 0x28(%esp), %edx // %edx = utf_esp-4
movl %eax, (%edx) // %eax = eip ----> esp-4 以至于ret可以直接读取其继续执行的地址
popal // after popal esp->utf_eip
addl $4, %esp // esp+4 -> utf_eflags
popfl
popl %esp
ret // 这里十分巧妙, ret会读取esp指向的第一个内容, 也就是我们第一步写入的eip
最后,我们需要实现用户页面错误处理机制的C用户库。
exercise 11:完成lib/pgfault.c的set_pgfault_handler()。
void set_pgfault_handler(void (*handler)(struct UTrapframe *utf))
{
int r;
if (_pgfault_handler == 0) {
// First time through!
// LAB 4: Your code here.
//panic("set_pgfault_handler not implemented");
if ((r = sys_page_alloc(thisenv->env_id, (void *)(UXSTACKTOP - PGSIZE), PTE_SYSCALL)) < 0)
panic("set_pgfault_handler: page alloc failed ,num is %e", r);
if ((r = sys_env_set_pgfault_upcall(thisenv->env_id, _pgfault_upcall)) < 0)
panic("set_pgfault_handler: pgfault upcall failed ,num is %e", r);
}
// Save handler pointer for assembly to call.
_pgfault_handler = handler;
}
Implementing Copy-on-Write Fork
前面做了这么多的准备工作,现在将来真正实现一个完全在用户空间中得copy-on-write fork() 。
课程中以及在lib/fork.c中为你要实现的fork()搭建了基本框架,就想dumbfork()一样,fork()应该创建一个新的进程,然后扫描父进程的地址空间并在子进程中设置相应的映射关系,关键的不同点在于,dumbfork()复制页面内容,fork()仅仅复制页面映射关系,fork()只有在一个进程尝试对copy-on-write副本进行写入时才会复制页面内容。
exercise 12:实现lib/fork.c中的fork,duppage和pgfault,使用forktree程序测试你的代码,应当打印如下信息和一些new env,free env,exiting gracefully信息,这些信息和进程ID可能并非按下图顺序所打印:
1000: I am ‘’
1001: I am ‘0’
2000: I am ‘00’
2001: I am ‘000’
1002: I am ‘1’
3000: I am ‘11’
3001: I am ‘10’
4000: I am ‘100’
1003: I am ‘01’
5000: I am ‘010’
4001: I am ‘011’
2002: I am ‘110’
1004: I am ‘001’
1005: I am ‘111’
1006: I am ‘101’
1.pgfault:
static void
pgfault(struct UTrapframe *utf)
{
void *addr = (void *) utf->utf_fault_va;
uint32_t err = utf->utf_err;
int r;
// Check that the faulting access was (1) a write, and (2) to a
// copy-on-write page. If not, panic.
// Hint:
// Use the read-only page table mappings at uvpt
// (see <inc/memlayout.h>).
// LAB 4: Your code here.
pte_t pte=uvpt[PGNUM(addr)];
if( !(err&FEC_WR) || !(pte& (PTE_W | PTE_COW)))
panic("pgfault:err is n ot FEC_WR or page is not writable / COW page\n");
// Allocate a new page, map it at a temporary location (PFTEMP),
// copy the data from the old page to the new page, then move the new
// page to the old page's address.
// Hint:
// You should make three system calls.
// LAB 4: Your code here.
//panic("pgfault not implemented");
addr = ROUNDDOWN(addr,PGSIZE);
//Allocate a new page
if( sys_page_alloc(sys_getenvid(),(void *)PFTEMP,PTE_P | PTE_U | PTE_W) < 0)
panic("pgfault:page alloc failed!");
//copy the data from the old page to the new page
memcpy((void *)PFTEMP,(const void*) addr,PGSIZE);
//move the new page to the old page's address.
if( sys_page_map(sys_getenvid(),(void *)PFTEMP,sys_getenvid(),(void *)addr,PTE_P | PTE_U | PTE_W) <0)
panic("pgfault:page map failed!");
if( sys_page_unmap(sys_getenvid(),(void *)PFTEMP) < 0)
panic("pgfault:page unmap failed!");
}
2.duppage
static int duppage(envid_t envid, unsigned pn)
{
int r;
// LAB 4: Your code here.
//panic("duppage not implemented");
//If the page is writable or copy-on-write,the new mapping must be created copy-on-write, and then our mapping must be marked copy-on-write as well.
if(uvpt[pn] & (PTE_W | PTE_COW)){
//map child page
if((r = sys_page_map(sys_getenvid(),(void *)(pn*PGSIZE),envid,(void *)(pn*PGSIZE),PTE_P | PTE_U | PTE_COW)) <0)
return r;
//map parent page
if((r =sys_page_map(sys_getenvid(),(void *)(pn*PGSIZE),sys_getenvid(),(void *)(pn*PGSIZE),PTE_P | PTE_U | PTE_COW)) <0)
return r;
}
else{
//map the U page
if((r = sys_page_map(sys_getenvid(),(void *)(pn*PGSIZE),envid,(void *)(pn*PGSIZE),PTE_P | PTE_U)) <0)
return r;
}
return 0;
}
3.fork
envid_t fork(void)
{
// LAB 4: Your code here.
//panic("fork not implemented");
//Set up our page fault handler appropriately.
set_pgfault_handler(pgfault);
envid_t sonId= sys_exofork();
int err;
extern void _pgfault_upcall(void);
if(sonId < 0) return sonId;
else if (sonId > 0){
for(int i=0;i*PGSIZE < USTACKTOP;i++){
if((uvpd[i>>10]&PTE_P)&&(uvpt[i]&PTE_P)){
err=duppage(sonId,i);
if(err < 0) return err;
}
}
if ((err=sys_page_alloc(sonId,(void *)(UXSTACKTOP-PGSIZE),PTE_W | PTE_U)) < 0) return err;
if ((err=sys_env_set_pgfault_upcall(sonId,_pgfault_upcall)) < 0) return err;
//Then mark the child as runnable and return.
if ((err=sys_env_set_status(sonId,ENV_RUNNABLE)) < 0) return err;
}
//== 0
else{
//Remember to fix "thisenv" in the child process.
thisenv = &envs[ENVX(sys_getenvid())];
return 0;
}
return sonId;
}
测试结果:
make grade:
make grade通过,Lab4 Part B结束。