6.s081 lab3 xv6页表

动机:xv6中的每个进程都有一个用户页表,所有进程共享一个内核页表,为了实现更高的隔离性,我们要为每个进程分配一个自己的内核页表,避免所有进程因为共享内核代码和数据引发的系统错误。

 在内核空间,很多地方都是直接映射成物理地址

具体操作:

proc结构体中增加kpagetable作为进程的内核页表

pagetable_t kpagetable;      // the kernel table per process 专属内核页

仿造kvminit()写一个用于进程内核页表的初始化函数ukvminit

pagetable_t
ukvminit()
{
  pagetable_t kpagetable = (pagetable_t)kalloc();
  if(kpagetable==0){
    return kpagetable;
  }
  memset(kpagetable, 0, PGSIZE);

  // 照着kvinit设置
  // uart registers
  ukvmmap(kpagetable, UART0, UART0, PGSIZE, PTE_R | PTE_W);
  // virtio mmio disk interface
  ukvmmap(kpagetable, VIRTIO0, VIRTIO0, PGSIZE, PTE_R | PTE_W);
  // CLINT
  ukvmmap(kpagetable, CLINT, CLINT, 0x10000, PTE_R | PTE_W);
  // PLIC
  ukvmmap(kpagetable, PLIC, PLIC, 0x400000, PTE_R | PTE_W);
   // map kernel text executable and read-only.
  ukvmmap(kpagetable, KERNBASE, KERNBASE, (uint64)etext-KERNBASE, PTE_R | PTE_X);
  // map kernel data and the physical RAM we'll make use of.
  ukvmmap(kpagetable, (uint64)etext, (uint64)etext, PHYSTOP-(uint64)etext, PTE_R | PTE_W);
  // map the trampoline for trap entry/exit to
  // the highest virtual address in the kernel.
  ukvmmap(kpagetable, TRAMPOLINE, (uint64)trampoline, PGSIZE, PTE_R | PTE_X);

  return kpagetable;
}

在为进程分配空间时就执行内核页表初始化

static struct proc*
allocproc(void)
{
  ...

  // rolle do
  // 为这个进程分配并初始化一个新的专属内核页
  p->kpagetable = ukvminit();
  if(p->kpagetable==0){
    freeproc(p);
    release(&p->lock);
    return 0;
  }

  // rolle do
  // 确保内核页表中有内核堆栈的映射
  uint64 va = KSTACK((int)(p - proc));
  pte_t pa = kvmpa(va);
  printf("va: %s",va);
  printf("pa: %s",pa);
  memset((void *)pa, 0, PGSIZE);
  ukvmmap(p->kpagetable, va, (uint64)pa, PGSIZE, PTE_R | PTE_W);
  p->kstack = va;

  ...
}

 在scheduler切换进程的时候, 刷新TLB和使用的虚拟-物理页表影射base. 注意在进程切换跑完返回后, 要重新切换回全局的kernel page.

# kernel/proc.c
void
scheduler(void)
{
  struct proc *p;
  struct cpu *c = mycpu();
  
  c->proc = 0;
  for(;;){
    // Avoid deadlock by ensuring that devices can interrupt.
    intr_on();
    
    int found = 0;
    for(p = proc; p < &proc[NPROC]; p++) {
      acquire(&p->lock);
      if(p->state == RUNNABLE) {
        // Switch to chosen process.  It is the process's job
        // to release its lock and then reacquire it
        // before jumping back to us.
        p->state = RUNNING;
        c->proc = p;
        // 切换到要马上运行的新进程的内核页表
        w_satp(MAKE_SATP(p->kpagetable));
        sfence_vma();
        swtch(&c->context, &p->context);

        // Process is done running for now.
        // It should have changed its p->state before coming back.
        // 切换回全局内核页表
        kvminithart();
        c->proc = 0;

        found = 1;
      }
      release(&p->lock);
    }
    ...
}

在销毁一个进程时, 回收它的内核页表. 这里需要注意的是, 我们并不需要去回收内核页表所映射到的物理地址. 因为那些物理地址, 例如device mapping, 是全局共享的. 进程专属内核表只是全局内核表的一个复制. 但是间接映射所消耗分配的物理内存是需要回收的. 

pagetable可能有这样一个三级映射:

0x 810 (第一级) -> 0x 910 (第二级) -> 0x 1100(第三级) -> 0x 10000000L UART0

我们是需要把0x 8100x 9100x 1100 回收的, 但是UARTO不需要回收因为是共享的.

都是仿造全局页表的回收函数来写的

# kernel/vm.c

// Unmap the leaf node mapping
// of the per-process kernel page table
// so that we could call freewalk on that
void
ukvmunmap(pagetable_t pagetable, uint64 va, uint64 npages)
{
  uint64 a;
  pte_t *pte;

  if((va % PGSIZE) != 0)
    panic("ukvmunmap: not aligned");

  for(a = va; a < va + npages*PGSIZE; a += PGSIZE){
    if((pte = walk(pagetable, a, 0)) == 0)
      goto clean;
    if((*pte & PTE_V) == 0)
      goto clean;
    if(PTE_FLAGS(*pte) == PTE_V)
      panic("ukvmunmap: not a leaf");

    clean:
      *pte = 0;
  }
}

// Recursively free page-table pages similar to freewalk
// not need to already free leaf node
// 和freewalk一模一样, 除了不再出panic错当一个page的leaf还没被清除掉
// 因为当我们free pagetable和kpagetable的时候
// 只有1份物理地址, 且原本free pagetable的函数会负责清空它们
// 所以这个函数只需要把在kpagetable里所有间接mapping清除即可
void
ufreewalk(pagetable_t pagetable)
{
  // there are 2^9 = 512 PTEs in a page table.
  for(int i = 0; i < 512; i++){
    pte_t pte = pagetable[i];
    if((pte & PTE_V) && (pte & (PTE_R|PTE_W|PTE_X)) == 0){
      // this PTE points to a lower-level page table.
      uint64 child = PTE2PA(pte);
      ufreewalk((pagetable_t)child);
      pagetable[i] = 0;
    }
    pagetable[i] = 0;
  }
  kfree((void*)pagetable);
}

// helper function to first free all leaf mapping
// of a per-process kernel table but do not free the physical address
// and then remove all 3-levels indirection and the physical address
// for this kernel page itself
void freeprockvm(struct proc* p) {
  pagetable_t kpagetable = p->kpagetable;
  // reverse order of allocation
  // 按分配顺序的逆序来销毁映射, 但不回收物理地址
  ukvmunmap(kpagetable, p->kstack, PGSIZE/PGSIZE);
  ukvmunmap(kpagetable, TRAMPOLINE, PGSIZE/PGSIZE);
  ukvmunmap(kpagetable, (uint64)etext, (PHYSTOP-(uint64)etext)/PGSIZE);
  ukvmunmap(kpagetable, KERNBASE, ((uint64)etext-KERNBASE)/PGSIZE);
  ukvmunmap(kpagetable, PLIC, 0x400000/PGSIZE);
  ukvmunmap(kpagetable, CLINT, 0x10000/PGSIZE);
  ukvmunmap(kpagetable, VIRTIO0, PGSIZE/PGSIZE);
  ukvmunmap(kpagetable, UART0, PGSIZE/PGSIZE);
  ufreewalk(kpagetable);
}

======================================================================
# kernel/proc.c
// free a proc structure and the data hanging from it,
// including user pages.
// p->lock must be held.
static void
freeproc(struct proc *p)
{
  if(p->trapframe)
    kfree((void*)p->trapframe);
  p->trapframe = 0;
  if(p->pagetable)
    proc_freepagetable(p->pagetable, p->sz);
  p->pagetable = 0;
  p->sz = 0;
  p->pid = 0;
  p->parent = 0;
  p->name[0] = 0;
  p->chan = 0;
  p->killed = 0;
  p->xstate = 0;
  p->state = UNUSED;
  if (p->kpagetable) {
    freeprockvm(p);
    p->kpagetable = 0;
  }
  if (p->kstack) {
    p->kstack = 0;
  }
}

现在已经为每个进程完成了自己的内核页表的分配和回收了,现在将进一步优化:为了方便让进入内核的进程获取用户态的地址空间时,不需要再切换到用户态读取,直接在内核中实现查询和地址翻译。设计了一种将用户态页表塞到内核页表中的机制。(空间换时间)

首先需要了解 xv6 在 0 ~ 0xC000000(PLIC寄存器的首地址)的地址空间范围内腾出了空间,专门用来存放即将塞入内核页表的用户态页表。所以我们需要把用户页表赛到0~0xC000000中去。

Xv6 uses virtual addresses that start at zero for user address spaces, and luckily the kernel's memory starts at higher addresses. However, this scheme does limit the maximum size of a user process to be less than the kernel's lowest virtual address. After the kernel has booted, that address is 0xC000000 in xv6, the address of the PLIC registers;

 实际上是一个浅拷贝,只拷贝指针(页表项),不拷贝实际指向的东西

# kernel/vm.c

// Same as mappages without panic on remapping
// 和mappages一模一样, 只不过不再panic remapping, 直接强制复写
int umappages(pagetable_t pagetable, uint64 va, uint64 size, uint64 pa, int perm) {
  uint64 a, last;
  pte_t *pte;

  a = PGROUNDDOWN(va);
  last = PGROUNDDOWN(va + size - 1);
  for(;;){
    if((pte = walk(pagetable, a, 1)) == 0)
      return -1;
    *pte = PA2PTE(pa) | perm | PTE_V;
    if(a == last)
      break;
    a += PGSIZE;
    pa += PGSIZE;
  }
  return 0;
}

// copying from old page to new page from
// begin in old page to new in old page
// and mask off PTE_U bit
// 将从begin到end的虚拟地址的映射, 从oldpage复制到newpage
int
pagecopy(pagetable_t oldpage, pagetable_t newpage, uint64 begin, uint64 end) {
  pte_t *pte;
  uint64 pa, i;
  uint flags;
  begin = PGROUNDUP(begin);

  for (i = begin; i < end; i += PGSIZE) {
    if ((pte = walk(oldpage, i, 0)) == 0)
      panic("pagecopy walk oldpage nullptr");
    if ((*pte & PTE_V) == 0)
      panic("pagecopy oldpage pte not valid");
    pa = PTE2PA(*pte);
    flags = PTE_FLAGS(*pte) & (~PTE_U); // 把U flag抹去
    if (umappages(newpage, i, PGSIZE, pa, flags) != 0) {
      goto err;
    }
  }
  return 0;

err:
  uvmunmap(newpage, 0, i / PGSIZE, 1);
  return -1;
}

紧接着, 我们在fork()exec()sbrk() 和userinit()的相应位置进行pagetablekpagetale的同步.

# kernel/proc.c
// Create a new process, copying the parent.
// Sets up child kernel stack to return as if from fork() system call.
int
fork(void)
{
  ...

  // Copy user memory from parent to child.
  if(uvmcopy(p->pagetable, np->pagetable, p->sz) < 0){
    freeproc(np);
    release(&np->lock);
    return -1;
  }
  np->sz = p->sz;

  if (pagecopy(np->pagetable, np->kpagetable, 0, np->sz) != 0) {
    freeproc(np);
    release(&np->lock);
    return -1;
  }
  np->parent = p;

  ...
  return pid;
}
# kernel/proc.c
// Create a new process, copying the parent.
// Sets up child kernel stack to return as if from fork() system call.
int
fork(void)
{
  ...

  // Copy user memory from parent to child.
  if(uvmcopy(p->pagetable, np->pagetable, p->sz) < 0){
    freeproc(np);
    release(&np->lock);
    return -1;
  }
  np->sz = p->sz;

  if (pagecopy(np->pagetable, np->kpagetable, 0, np->sz) != 0) {
    freeproc(np);
    release(&np->lock);
    return -1;
  }
  np->parent = p;

  ...
  return pid;
}
# kernel/proc.c
// Grow or shrink user memory by n bytes.
// Return 0 on success, -1 on failure.
int
growproc(int n)
{
  uint sz;
  struct proc *p = myproc();

  sz = p->sz;
  if(n > 0){
    // 内核页的虚拟地址不能溢出PLIC
    if (sz + n > PLIC || (sz = uvmalloc(p->pagetable, sz, sz + n)) == 0) {
      return -1;
    }
    if (pagecopy(p->pagetable, p->kpagetable, p->sz, sz) != 0) {
      // 增量同步[old size, new size]
      return -1;
    }
  } else if(n < 0){
    sz = uvmdealloc(p->pagetable, sz, sz + n);
    if (sz != p->sz) {
      // 缩量同步[new size, old size]
      uvmunmap(p->kpagetable, PGROUNDUP(sz), (PGROUNDUP(p->sz) - PGROUNDUP(sz)) / PGSIZE, 0);
    }
  }
  ukvminithard(p->kpagetable);
  p->sz = sz;
  return 0;
}
# kernel/proc.c
// Set up first user process.
void
userinit(void)
{
  ...
  uvminit(p->pagetable, initcode, sizeof(initcode));
  p->sz = PGSIZE;

  pagecopy(p->pagetable, p->kpagetable, 0, p->sz);

  // prepare for the very first "return" from kernel to user.
  p->trapframe->epc = 0;      // user program counter
  p->trapframe->sp = PGSIZE;  // user stack pointer

  ...
}

所以我们的工作总结就是给每个进程加一个新的内核页,在进入内核后将用户态页表拷贝到内核页表的低地址部分,实现内核中直接查询和地址翻译的工作

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值