这个是2021版本。
2021版本和2020版本的这部分略有不同。
COW想必科班的同学应该不陌生,因此本文注重操作过程,没有很多的分析,详细内容可参考https://pdos.csail.mit.edu/6.828/2021/labs/cow.html
下面开始做实验。
切换到COW分支
git fetch
git checkout cow
make clean
首先我们看一下fork的函数在干什么(该函数在kernel/proc.c中):
int
fork(void)
{
int i, pid;
struct proc *np;
struct proc *p = myproc();
// Allocate process.
if((np = allocproc()) == 0){
return -1;
}
// Copy user memory from parent to child.
if(uvmcopy(p->pagetable, np->pagetable, p->sz) < 0){
freeproc(np);
release(&np->lock);
return -1;
}
np->sz = p->sz;
// copy saved user registers.
*(np->trapframe) = *(p->trapframe);
// Cause fork to return 0 in the child.
np->trapframe->a0 = 0;
// increment reference counts on open file descriptors.
for(i = 0; i < NOFILE; i++)
if(p->ofile[i])
np->ofile[i] = filedup(p->ofile[i]);
np->cwd = idup(p->cwd);
safestrcpy(np->name, p->name, sizeof(p->name));
pid = np->pid;
release(&np->lock);
acquire(&wait_lock);
np->parent = p;
release(&wait_lock);
acquire(&np->lock);
np->state = RUNNABLE;
release(&np->lock);
return pid;
}
可以发现,写时复制主要改的是uvmcopy这个函数,将本来的直接复制改成引用计数的模式。
因此,整个实验步骤如下:
先在kernel/riscv.h中加入
#define PTE_C (1L << 8) // COW
不熟悉PTE的话可以回顾一下前面做过的实验。
然后在kernel/kalloc.c的最开始的部分加入一下内容:
note: 因为到这里还没涉及多线程,测试点的部分里也没有,因此没有加锁的部分。如果是多线程则应该要上锁,毕竟这个引用计数是个共享变量。
//KERNBASE == 0x80000000
static int reference_count[(PHYSTOP - KERNBASE) / PGSIZE];
static int idx_rc(uint64 pa){ //get reference cnt
return (pa - KERNBASE) / PGSIZE;
}
void add_rc(uint64 pa){ // add
reference_count[idx_rc(pa)]++;
}
void sub_rc(uint64 pa){ // sub
reference_count[idx_rc(pa)]--;
}
然后在kernel/vm.c中修改uvmcopy函数。
函数原来是:
int
uvmcopy(pagetable_t old, pagetable_t new, uint64 sz)
{
pte_t *pte;
uint64 pa, i;
uint flags;
char *mem;
for(i = 0; i < sz; i += PGSIZE){
if((pte = walk(old, i, 0)) == 0)
panic("uvmcopy: pte should exist");
if((*pte & PTE_V) == 0)
panic("uvmcopy: page not present");
pa = PTE2PA(*pte);
flags = PTE_FLAGS(*pte);
if((mem = kalloc()) == 0)
goto err;
memmove(mem, (char*)pa, PGSIZE);
if(mappages(new, i, PGSIZE, (uint64)mem, flags) != 0){
kfree(mem);
goto err;
}
}
return 0;
err:
uvmunmap(new, 0, i / PGSIZE, 1);
return -1;
}
现在不应该直接拷贝页,而是增加引用计数,改成如下:
int
uvmcopy(pagetable_t old, pagetable_t new, uint64 sz)
{
pte_t *pte;
uint64 pa, i;
uint flags;
for(i = 0; i < sz; i += PGSIZE){
if((pte = walk(old, i, 0)) == 0)
panic("uvmcopy: pte should exist");
if((*pte & PTE_V) == 0)
panic("uvmcopy: page not present");
pa = PTE2PA(*pte);
*pte = (*pte & ~PTE_W) | PTE_C;
flags = PTE_FLAGS(*pte);
if(mappages(new, i, PGSIZE,pa, flags) != 0){
goto err;
}
add_rc(pa);// 不分配页,改成增加引用计数
}
return 0;
err:
uvmunmap(new, 0, i / PGSIZE, 1);
return -1;
}
那么在解引用的时候,需要减去一次引用计数,这部分我们需要注意一下,这个引用是逻辑上是在物理地址的位置,所以只要更改uvmunmap中的do_free有关的部分就行,即修改kfree函数(在kernel/kalloc.c中)
原来代码:
void
kfree(void *pa)
{
struct run *r;
if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP)
panic("kfree");
// Fill with junk to catch dangling refs.
memset(pa, 1, PGSIZE);
r = (struct run*)pa;
acquire(&kmem.lock);
r->next = kmem.freelist;
kmem.freelist = r;
release(&kmem.lock);
}
改成如下,增加一下只有引用计数为才处理,否则就减去一次引用计数:
void
kfree(void *pa)
{
struct run *r;
if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP)
panic("kfree");
if(reference_count[idx_rc((uint64)pa)] > 1){// > 1 just sub 1
sub_rc((uint64)pa);
return;
}
// Fill with junk to catch dangling refs.
memset(pa, 1, PGSIZE);
r = (struct run*)pa;
acquire(&kmem.lock);
r->next = kmem.freelist;
kmem.freelist = r;
release(&kmem.lock);
reference_count[idx_rc((uint64)pa)] = 0;
}
下面修改usertrap函数(在kernel/trap.c中)
原来代码:
void
usertrap(void)
{
int which_dev = 0;
if((r_sstatus() & SSTATUS_SPP) != 0)
panic("usertrap: not from user mode");
// send interrupts and exceptions to kerneltrap(),
// since we're now in the kernel.
w_stvec((uint64)kernelvec);
struct proc *p = myproc();
// save user program counter.
p->trapframe->epc = r_sepc();
if(r_scause() == 8){
// system call
if(p->killed)
exit(-1);
// sepc points to the ecall instruction,
// but we want to return to the next instruction.
p->trapframe->epc += 4;
// an interrupt will change sstatus &c registers,
// so don't enable until done with those registers.
intr_on();
syscall();
} else if((which_dev = devintr()) != 0){
// ok
} else {
printf("usertrap(): unexpected scause %p pid=%d\n", r_scause(), p->pid);
printf(" sepc=%p stval=%p\n", r_sepc(), r_stval());
p->killed = 1;
}
if(p->killed)
exit(-1);
// give up the CPU if this is a timer interrupt.
if(which_dev == 2)
yield();
usertrapret();
}
现在需要增加当scause为13或15时的处理
改完后的代码为:
void
usertrap(void)
{
int which_dev = 0;
if((r_sstatus() & SSTATUS_SPP) != 0)
panic("usertrap: not from user mode");
// send interrupts and exceptions to kerneltrap(),
// since we're now in the kernel.
w_stvec((uint64)kernelvec);
struct proc *p = myproc();
// save user program counter.
p->trapframe->epc = r_sepc();
if(r_scause() == 8){
// system call
if(p->killed)
exit(-1);
// sepc points to the ecall instruction,
// but we want to return to the next instruction.
p->trapframe->epc += 4;
// an interrupt will change sstatus &c registers,
// so don't enable until done with those registers.
intr_on();
syscall();
} else if((which_dev = devintr()) != 0){
// ok
} else {
uint64 va = PGROUNDDOWN(r_stval());
if(va >= MAXVA){
p->killed = 1;
exit(-1);
}
pte_t* pte = walk(p->pagetable,va,0);
if(pte == 0){
p->killed = 1;
exit(-1);
}
uint64 pa = PTE2PA(*pte);
uint64 flag = PTE_FLAGS(*pte);
if((r_scause() == 13 || r_scause() == 15) && (flag & PTE_C) ){
char* mem = kalloc();
if(mem == 0){
printf("here\n");
p->killed = 1;
exit(-1);
}
memmove(mem,(char*)pa,PGSIZE);
kfree((char*)pa);
*pte = PA2PTE(mem) | (flag & ~PTE_C) | PTE_W;
}
else{
printf("usertrap(): unexpected scause %p pid=%d\n", r_scause(), p->pid);
printf(" sepc=%p stval=%p\n", r_sepc(), r_stval());
p->killed = 1;
}
}
if(p->killed)
exit(-1);
// give up the CPU if this is a timer interrupt.
if(which_dev == 2)
yield();
usertrapret();
}
这部分内容就是 分配页面,并且修改标志位,同时调用kfree进行处理,减少一次引用计数。
然后修改kernel/vm.c的copyout函数。
原来代码为:
int
copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len)
{
uint64 n, va0, pa0;
while(len > 0){
va0 = PGROUNDDOWN(dstva);
pa0 = walkaddr(pagetable, va0);
if(pa0 == 0)
return -1;
n = PGSIZE - (dstva - va0);
if(n > len)
n = len;
memmove((void *)(pa0 + (dstva - va0)), src, n);
len -= n;
src += n;
dstva = va0 + PGSIZE;
}
return 0;
}
现在改为:
int
copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len)
{
uint64 n, va0, pa0, flag;
pte_t* pte;
while(len > 0){
va0 = PGROUNDDOWN(dstva);
if(va0 >= MAXVA)
return -1;
pte = walk(pagetable,va0,0);
if(pte == 0)
return -1;
pa0 = PTE2PA(*pte);
flag = PTE_FLAGS(*pte);
if(flag & PTE_C){
char* mem = kalloc();
if(mem == 0){
return -1;
}
memmove(mem,(char*)pa0,PGSIZE);
kfree((char*)pa0);
*pte = PA2PTE((uint64)mem) | (flag & ~PTE_C) | PTE_W;
pa0 = (uint64)mem;
}
if(pa0 == 0)
return -1;
n = PGSIZE - (dstva - va0);
if(n > len)
n = len;
memmove((void *)(pa0 + (dstva - va0)), src, n);
len -= n;
src += n;
dstva = va0 + PGSIZE;
}
return 0;
}
别忘了更改kalloc函数,位于(kernel/kalloc.c中)
原来为:
void *
kalloc(void)
{
struct run *r;
acquire(&kmem.lock);
r = kmem.freelist;
if(r)
kmem.freelist = r->next;
release(&kmem.lock);
if(r)
memset((char*)r, 5, PGSIZE); // fill with junk
return (void*)r;
}
现在需要在分配页的时候初始化引用计数(设置为1)
也就是:
void *
kalloc(void)
{
struct run *r;
acquire(&kmem.lock);
r = kmem.freelist;
if(r)
kmem.freelist = r->next;
release(&kmem.lock);
if(r)
memset((char*)r, 5, PGSIZE); // fill with junk
if(r)
reference_count[idx_rc((uint64)r)] = 1;
return (void*)r;
}
最后在kernel/defs.h中添加声名:
// kalloc.c
void add_rc(uint64);
void sub_rc(uint64);
// vm.c
pte_t* walk(pagetable_t, uint64,int);
这是最后的实验结果:
参考文献:
https://github.com/jlu-xiurui/MIT6.S081-2021-FALL/tree/master/lab6-cow