2021 MIT6.S081 Copy-on-Write Fork for xv6

文章详细介绍了如何在操作系统内核中实现写时复制(COW)机制,通过修改页表项,增加引用计数,并在内存分配和释放时进行相应的操作。实验涉及修改`uvmcopy`、`kalloc`、`kfree`等关键函数,以及处理异常情况,以提高内存效率。
摘要由CSDN通过智能技术生成

这个是2021版本。

2021版本和2020版本的这部分略有不同。

COW想必科班的同学应该不陌生,因此本文注重操作过程,没有很多的分析,详细内容可参考https://pdos.csail.mit.edu/6.828/2021/labs/cow.html

下面开始做实验。

切换到COW分支

git fetch
git checkout cow
make clean

首先我们看一下fork的函数在干什么(该函数在kernel/proc.c中):

int
fork(void)
{
  int i, pid;
  struct proc *np;
  struct proc *p = myproc();

  // Allocate process.
  if((np = allocproc()) == 0){
    return -1;
  }

  // Copy user memory from parent to child.
  if(uvmcopy(p->pagetable, np->pagetable, p->sz) < 0){
    freeproc(np);
    release(&np->lock);
    return -1;
  }
  np->sz = p->sz;

  // copy saved user registers.
  *(np->trapframe) = *(p->trapframe);

  // Cause fork to return 0 in the child.
  np->trapframe->a0 = 0;

  // increment reference counts on open file descriptors.
  for(i = 0; i < NOFILE; i++)
    if(p->ofile[i])
      np->ofile[i] = filedup(p->ofile[i]);
  np->cwd = idup(p->cwd);

  safestrcpy(np->name, p->name, sizeof(p->name));

  pid = np->pid;

  release(&np->lock);

  acquire(&wait_lock);
  np->parent = p;
  release(&wait_lock);

  acquire(&np->lock);
  np->state = RUNNABLE;
  release(&np->lock);

  return pid;
}

可以发现,写时复制主要改的是uvmcopy这个函数,将本来的直接复制改成引用计数的模式。

因此,整个实验步骤如下:

先在kernel/riscv.h中加入

#define PTE_C (1L << 8) // COW

不熟悉PTE的话可以回顾一下前面做过的实验。

然后在kernel/kalloc.c的最开始的部分加入一下内容:

note: 因为到这里还没涉及多线程,测试点的部分里也没有,因此没有加锁的部分。如果是多线程则应该要上锁,毕竟这个引用计数是个共享变量。

//KERNBASE == 0x80000000
static int reference_count[(PHYSTOP - KERNBASE) / PGSIZE];

static int idx_rc(uint64 pa){ //get reference cnt
    return (pa - KERNBASE) / PGSIZE;
}
void add_rc(uint64 pa){ // add
    reference_count[idx_rc(pa)]++;
}
void sub_rc(uint64 pa){ // sub
    reference_count[idx_rc(pa)]--;
}

然后在kernel/vm.c中修改uvmcopy函数。

函数原来是:

int
uvmcopy(pagetable_t old, pagetable_t new, uint64 sz)
{
  pte_t *pte;
  uint64 pa, i;
  uint flags;
  char *mem;

  for(i = 0; i < sz; i += PGSIZE){
    if((pte = walk(old, i, 0)) == 0)
      panic("uvmcopy: pte should exist");
    if((*pte & PTE_V) == 0)
      panic("uvmcopy: page not present");
    pa = PTE2PA(*pte);
    flags = PTE_FLAGS(*pte);
    if((mem = kalloc()) == 0)
      goto err;
    memmove(mem, (char*)pa, PGSIZE);
    if(mappages(new, i, PGSIZE, (uint64)mem, flags) != 0){
      kfree(mem);
      goto err;
    }
  }
  return 0;

 err:
  uvmunmap(new, 0, i / PGSIZE, 1);
  return -1;
}

现在不应该直接拷贝页,而是增加引用计数,改成如下:

int
uvmcopy(pagetable_t old, pagetable_t new, uint64 sz)
{
  pte_t *pte;
  uint64 pa, i;
  uint flags;

  for(i = 0; i < sz; i += PGSIZE){
    if((pte = walk(old, i, 0)) == 0)
      panic("uvmcopy: pte should exist");
    if((*pte & PTE_V) == 0)
      panic("uvmcopy: page not present");
    pa = PTE2PA(*pte);
    *pte = (*pte & ~PTE_W) | PTE_C;
    flags = PTE_FLAGS(*pte);
    if(mappages(new, i, PGSIZE,pa, flags) != 0){
      goto err;
    }
    add_rc(pa);// 不分配页,改成增加引用计数
  }
  return 0;

 err:
  uvmunmap(new, 0, i / PGSIZE, 1);
  return -1;
}

那么在解引用的时候,需要减去一次引用计数,这部分我们需要注意一下,这个引用是逻辑上是在物理地址的位置,所以只要更改uvmunmap中的do_free有关的部分就行,即修改kfree函数(在kernel/kalloc.c中)

原来代码:

void
kfree(void *pa)
{
  struct run *r;

  if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP)
    panic("kfree");

  // Fill with junk to catch dangling refs.
  memset(pa, 1, PGSIZE);

  r = (struct run*)pa;

  acquire(&kmem.lock);
  r->next = kmem.freelist;
  kmem.freelist = r;
  release(&kmem.lock);
}

改成如下,增加一下只有引用计数为才处理,否则就减去一次引用计数:

void
kfree(void *pa)
{
  struct run *r;

  if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP)
    panic("kfree");
  if(reference_count[idx_rc((uint64)pa)] > 1){// > 1 just sub 1
      sub_rc((uint64)pa);
      return;
  }
  // Fill with junk to catch dangling refs.
  memset(pa, 1, PGSIZE);

  r = (struct run*)pa;

  acquire(&kmem.lock);
  r->next = kmem.freelist;
  kmem.freelist = r;
  release(&kmem.lock);
  reference_count[idx_rc((uint64)pa)] = 0;
}

下面修改usertrap函数(在kernel/trap.c中)

原来代码:

void
usertrap(void)
{
  int which_dev = 0;

  if((r_sstatus() & SSTATUS_SPP) != 0)
    panic("usertrap: not from user mode");

  // send interrupts and exceptions to kerneltrap(),
  // since we're now in the kernel.
  w_stvec((uint64)kernelvec);

  struct proc *p = myproc();

  // save user program counter.
  p->trapframe->epc = r_sepc();

  if(r_scause() == 8){
    // system call

    if(p->killed)
      exit(-1);

    // sepc points to the ecall instruction,
    // but we want to return to the next instruction.
    p->trapframe->epc += 4;

    // an interrupt will change sstatus &c registers,
    // so don't enable until done with those registers.
    intr_on();

    syscall();
  } else if((which_dev = devintr()) != 0){
    // ok
  } else {
    printf("usertrap(): unexpected scause %p pid=%d\n", r_scause(), p->pid);
    printf("            sepc=%p stval=%p\n", r_sepc(), r_stval());
    p->killed = 1;
  }

  if(p->killed)
    exit(-1);

  // give up the CPU if this is a timer interrupt.
  if(which_dev == 2)
    yield();

  usertrapret();
}

现在需要增加当scause为13或15时的处理

改完后的代码为:

void
usertrap(void)
{
  int which_dev = 0;

  if((r_sstatus() & SSTATUS_SPP) != 0)
    panic("usertrap: not from user mode");

  // send interrupts and exceptions to kerneltrap(),
  // since we're now in the kernel.
  w_stvec((uint64)kernelvec);

  struct proc *p = myproc();

  // save user program counter.
  p->trapframe->epc = r_sepc();

  if(r_scause() == 8){
    // system call

    if(p->killed)
      exit(-1);

    // sepc points to the ecall instruction,
    // but we want to return to the next instruction.
    p->trapframe->epc += 4;

    // an interrupt will change sstatus &c registers,
    // so don't enable until done with those registers.
    intr_on();

    syscall();
  } else if((which_dev = devintr()) != 0){
// ok
} else {
      uint64 va = PGROUNDDOWN(r_stval());
      if(va >= MAXVA){
          p->killed = 1;
          exit(-1);
      }
      pte_t* pte = walk(p->pagetable,va,0);
      if(pte == 0){
          p->killed = 1;
          exit(-1);
      }
      uint64 pa = PTE2PA(*pte);
      uint64 flag = PTE_FLAGS(*pte);
      if((r_scause() == 13 || r_scause() == 15) && (flag & PTE_C) ){
          char* mem = kalloc();
          if(mem == 0){
              printf("here\n");
              p->killed = 1;
              exit(-1);
          }
          memmove(mem,(char*)pa,PGSIZE);
          kfree((char*)pa);
          *pte = PA2PTE(mem) | (flag & ~PTE_C) | PTE_W;
      }
      else{
          printf("usertrap(): unexpected scause %p pid=%d\n", r_scause(), p->pid);
          printf("            sepc=%p stval=%p\n", r_sepc(), r_stval());
          p->killed = 1;
      }
  }

  if(p->killed)
    exit(-1);

  // give up the CPU if this is a timer interrupt.
  if(which_dev == 2)
    yield();

  usertrapret();
}

这部分内容就是 分配页面,并且修改标志位,同时调用kfree进行处理,减少一次引用计数。

然后修改kernel/vm.c的copyout函数。

原来代码为:

int
copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len)
{
  uint64 n, va0, pa0;

  while(len > 0){
    va0 = PGROUNDDOWN(dstva);
    pa0 = walkaddr(pagetable, va0);
    if(pa0 == 0)
      return -1;
    n = PGSIZE - (dstva - va0);
    if(n > len)
      n = len;
    memmove((void *)(pa0 + (dstva - va0)), src, n);

    len -= n;
    src += n;
    dstva = va0 + PGSIZE;
  }
  return 0;
}

现在改为:

int
copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len)
{
  uint64 n, va0, pa0, flag;
  pte_t* pte;
  while(len > 0){
    va0 = PGROUNDDOWN(dstva);
    if(va0 >= MAXVA)
        return -1;
    pte = walk(pagetable,va0,0);
    if(pte == 0)
        return -1;
    pa0 = PTE2PA(*pte);
    flag = PTE_FLAGS(*pte);
    if(flag & PTE_C){
        char* mem = kalloc();
        if(mem == 0){
            return -1;
        }
        memmove(mem,(char*)pa0,PGSIZE);
        kfree((char*)pa0);
        *pte = PA2PTE((uint64)mem) | (flag & ~PTE_C) | PTE_W;
        pa0 = (uint64)mem;
    }
    if(pa0 == 0)
      return -1;
    n = PGSIZE - (dstva - va0);
    if(n > len)
      n = len;
    memmove((void *)(pa0 + (dstva - va0)), src, n);

    len -= n;
    src += n;
    dstva = va0 + PGSIZE;
  }
  return 0;
}

别忘了更改kalloc函数,位于(kernel/kalloc.c中)

原来为:

void *
kalloc(void)
{
  struct run *r;

  acquire(&kmem.lock);
  r = kmem.freelist;
  if(r)
    kmem.freelist = r->next;
  release(&kmem.lock);

  if(r)
    memset((char*)r, 5, PGSIZE); // fill with junk
  return (void*)r;
}

现在需要在分配页的时候初始化引用计数(设置为1)

也就是:

void *
kalloc(void)
{
  struct run *r;

  acquire(&kmem.lock);
  r = kmem.freelist;
  if(r)
    kmem.freelist = r->next;
  release(&kmem.lock);

  if(r)
    memset((char*)r, 5, PGSIZE); // fill with junk

  if(r)
    reference_count[idx_rc((uint64)r)] = 1;
  return (void*)r;
}

最后在kernel/defs.h中添加声名:

// kalloc.c
void            add_rc(uint64);
void            sub_rc(uint64);

// vm.c
pte_t*          walk(pagetable_t, uint64,int);

这是最后的实验结果:

参考文献:

https://github.com/jlu-xiurui/MIT6.S081-2021-FALL/tree/master/lab6-cow

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值