xv6 页表

内核空间

[[book-riscv.pdf#page=35&selection=0,86,60,83|book-riscv, page 35]]

image.png

kvmmake

QEMU模拟含有RAM的计算机,从物理地址0x80000000到至少0x86400000,也就是xv6的PHYSTOP (PHYSTOP的宏定义就是#define PHYSTOP (KERNBASE + 128*1024*1024)

下面这段代码都是在对uart,IO设备等进行恒等映射并设置了相应权限

pagetable_t kpgtbl;

  kpgtbl = (pagetable_t) kalloc();
  memset(kpgtbl, 0, PGSIZE);

  // uart registers
  kvmmap(kpgtbl, UART0, UART0, PGSIZE, PTE_R | PTE_W);

  // virtio mmio disk interface
  kvmmap(kpgtbl, VIRTIO0, VIRTIO0, PGSIZE, PTE_R | PTE_W);

  // PLIC
  kvmmap(kpgtbl, PLIC, PLIC, 0x400000, PTE_R | PTE_W);

  // map kernel text executable and read-only.
  kvmmap(kpgtbl, KERNBASE, KERNBASE, (uint64)etext-KERNBASE, PTE_R | PTE_X);

  // map kernel data and the physical RAM we'll make use of.
  kvmmap(kpgtbl, (uint64)etext, (uint64)etext, PHYSTOP-(uint64)etext, PTE_R | PTE_W);

下面这段代码将trampoline(具体实现在trampoline.S里面)映射到虚拟内存的最高处,并在下面用proc_mapstacks()映射了内核栈

    // map the trampoline for trap entry/exit to
    // the highest virtual address in the kernel.
    kvmmap(kpgtbl, TRAMPOLINE, (uint64)trampoline, PGSIZE, PTE_R | PTE_X);

    // map kernel stacks
    proc_mapstacks(kpgtbl);

Proc_mapstacks

Proc_mapstacks(kernel/proc.c:33)allocates a kernel stack for each process. It calls kvmmap to map each stack at the virtual address generated by KSTACK, which leaves room for the invalid stack-guard pages

// Allocate a page for each process's kernel stack.
// Map it high in memory, followed by an invalid
// guard page.
void
proc_mapstacks(pagetable_t kpgtbl) {
  struct proc *p;
  
  for(p = proc; p < &proc[NPROC]; p++) {
    char *pa = kalloc();
    if(pa == 0)
      panic("kalloc");
    uint64 va = KSTACK((int) (p - proc));
    kvmmap(kpgtbl, va, (uint64)pa, PGSIZE, PTE_R | PTE_W);
  }
}

+1就是给guard page留了位置,guard page没有物理内存与其相对应,它就只是个虚拟内存

#define KSTACK(p) (TRAMPOLINE - ((p)+1)* 2*PGSIZE)

mappages

关于PGROUNDDOWNhttps://stackoverflow.com/questions/43289022/what-do-pgroundup-and-pgrounddown-in-xv6-mean

⌊ x + ( y − 1 ) y ⌋ ∗ y \lfloor \frac{x+(y-1)}{y} \rfloor*y yx+(y1)y

它就是这个式子在n为2的幂的情况下的位运算优化版,n只能是2的幂
草,被GPT坑了一把,它说对所有正整数适用,结果是宏可以用,结果不保证

 #define PGROUNDUP(sz)  (((sz)+PGSIZE-1) & ~(PGSIZE-1))
 #define PGROUNDDOWN(a) (((a)) & ~(PGSIZE-1))
// Create PTEs for virtual addresses starting at va that refer to
// physical addresses starting at pa. va and size might not
// be page-aligned. Returns 0 on success, -1 if walk() couldn't
// allocate a needed page-table page.
int
mappages(pagetable_t pagetable, uint64 va, uint64 size, uint64 pa, int perm)
{
  uint64 a, last;
  pte_t *pte;

  if(size == 0)
    panic("mappages: size");
  
  a = PGROUNDDOWN(va);
  last = PGROUNDDOWN(va + size - 1);
  for(;;){
    if((pte = walk(pagetable, a, 1)) == 0)
      return -1;
    if(*pte & PTE_V)
      panic("mappages: remap");
    *pte = PA2PTE(pa) | perm | PTE_V;
    if(a == last)
      break;
    a += PGSIZE;
    pa += PGSIZE;
  }
  return 0;
}

这段取得对齐的上下界,并预留了一个PAGESIZE给guard page.

 a = PGROUNDDOWN(va);
 last = PGROUNDDOWN(va + size - 1);

walk

// Return the address of the PTE in page table pagetable
// that corresponds to virtual address va.  If alloc!=0,
// create any required page-table pages.
//
// The risc-v Sv39 scheme has three levels of page-table
// pages. A page-table page contains 512 64-bit PTEs.
// A 64-bit virtual address is split into five fields:
//   39..63 -- must be zero.
//   30..38 -- 9 bits of level-2 index.
//   21..29 -- 9 bits of level-1 index.
//   12..20 -- 9 bits of level-0 index.
//    0..11 -- 12 bits of byte offset within the page.
pte_t *
walk(pagetable_t pagetable, uint64 va, int alloc)
{
  if(va >= MAXVA)
    panic("walk");

  for(int level = 2; level > 0; level--) {
    pte_t *pte = &pagetable[PX(level, va)];
    if(*pte & PTE_V) {
      pagetable = (pagetable_t)PTE2PA(*pte);
    } else {
      if(!alloc || (pagetable = (pde_t*)kalloc()) == 0)
        return 0;
      memset(pagetable, 0, PGSIZE);
      *pte = PA2PTE(pagetable) | PTE_V;
    }
  }
  return &pagetable[PX(0, va)];
}
#define PA2PTE(pa) ((((uint64)pa) >> 12) << 10)

左移12位消除作为虚拟地址的偏移量,右移10位留给PTE的flag

#define PX(level,va) ((((uint64) (va)) >> PXSHIFT(level)) & PXMASK)

kvminithart()

//vm.c

// Switch h/w page table register to the kernel's page table,
// and enable paging.
void
kvminithart()
{
  w_satp(MAKE_SATP(kernel_pagetable));
  sfence_vma();
}


//riscv.h

// supervisor address translation and protection;
// holds the address of the page table.
static inline void 
w_satp(uint64 x)
{
  asm volatile("csrw satp, %0" : : "r" (x));
}

// flush the TLB.
static inline void
sfence_vma()
{
  // the zero, zero means flush all TLB entries.
  asm volatile("sfence.vma zero, zero");
}
#define MAKE_SATP(pagetable) (SATP_SV39 | (((uint64)pagetable) >> 12))

pagetable地址转为SATP标准格式

kvminitthartw_satp()satp写入kernel_pagetable开启虚拟内存,sfen_vma()刷新TLB

trampoline.S

跳板代码,从用户空间切入内核

主要是保存各种寄存器现场,这方面原理与之前写过的协程库[[实验M2 实现协程库libco | libco]]相同,但之前是基于x86汇编,此处实现为riscv汇编

物理内存分配

初始化

extern char end[]; // first address after kernel.
                   // defined by kernel.ld.

void
kinit()
{
  initlock(&kmem.lock, "kmem");
  freerange(end, (void*)PHYSTOP);
}

kinit()初始化锁,然后将endPHYSTOP的内存都通过freerange()放进freelist,是先有了这片空间,然后free进了freelist

freerange

void
freerange(void *pa_start, void *pa_end)
{
  char *p;
  p = (char*)PGROUNDUP((uint64)pa_start);
  for(; p + PGSIZE <= (char*)pa_end; p += PGSIZE)
    kfree(p);
}

freerange()使用了PFROUNDUP对齐PAGESIZE

kfree

kfree(void *pa)
{
  struct run *r;

  if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP)
    panic("kfree");

  // Fill with junk to catch dangling refs.
  memset(pa, 1, PGSIZE);

  r = (struct run*)pa;

  acquire(&kmem.lock);
  r->next = kmem.freelist;
  kmem.freelist = r;
  release(&kmem.lock);
}

kfree()只管按PAGESIZE填充空间后加到freelist,所以调用kfree()的调用者要自行进行对齐

释放时使用链表头插法,将被释放空间插在原本的空闲空间前面,然后将freelist指向被释放的空间成为新的起始结点

进程地址空间

mory. When a process asks xv6 for more user memory, xv6 first uses kalloc to allocate physical pages. It then adds PTEs to the process’s page table that point to the new physical pages. Xv6 sets the PTE_W, PTE_X, PTE_R, PTE_U, and PTE_V flags in these PTEs. Most processes do not use the entire user address space; xv6 leaves PTE_V clear in unused PTEs.

[[book-riscv.pdf#page=38&selection=47,2,75,21|book-riscv, page 38]]

当进程申请用户内存

  1. kalloc()分配物理内存
  2. 添加PTE到进程页表,指向新的物理内存
  3. 设置PTE的flag,PTE_W,PTE_X,PTE_R,PTE_U,PTE_V
  4. 当内存没有被真正使用时,PTE_V标记为clear

image.png

Figure 3.4 shows the layout of the user memory of an executing process in xv6 in more de- tail. The stack is a single page, and is shown with the initial contents as created by exec. Strings containing the command-line arguments, as well as an array of pointers to them, are at the very top of the stack. Just under that are values that allow a program to start at main as if the function main(argc, argv) had just been called.

[[book-riscv.pdf#page=38&selection=82,0,96,21|book-riscv, page 38]]

图3.4展示了一个正在执行的进程的细节。这个栈是单页,并且展示有exec()创建的初始化内容。在栈的最顶部,字符串包含了命令行参数,还有指向他们的指针数组。再下面是一些值,能让一个程序能像main(argc,argv)刚刚被调用一样,在main开始

举个栗子

栈结构实际内容注解
argument 2“.”
argument 1“-a”
argument 0“ls”程序的名字
00参数列表的结束
addr of argment 2"."的首地址argv[2]
addr of argment 1"-a"的首地址argv[1]
addr of argment 0"ls"的首地址地址argv[0]
argc3int argc

假设有个ls.c的main函数声明:

//ls.c
int main(int argc, char *argv[]) {return 0};
ls -a .

它的main函数初始栈结构应该就是上面的表格所展示的样子

sbrk

当参数为正值,返回新分配的内存的起始点(其实就是之前的brk地址+1,也就是proc的size)
当参数为负值,将

uint64
sys_sbrk(void)
{
  int addr;
  int n;

  if(argint(0, &n) < 0)
    return -1;
  addr = myproc()->sz;
  if(growproc(n) < 0)
    return -1;
  return addr;
}
// Grow or shrink user memory by n bytes.
// Return 0 on success, -1 on failure.
int
growproc(int n)
{
  uint sz;
  struct proc *p = myproc();

  sz = p->sz;
  if(n > 0){
    if((sz = uvmalloc(p->pagetable, sz, sz + n)) == 0) {
      return -1;
    }
  } else if(n < 0){
    sz = uvmdealloc(p->pagetable, sz, sz + n);
  }
  p->sz = sz;
  return 0;
}

疑问

  1. 为什么有时物理地址作为参数传入时用的是void *pa,而有时地址以整型传入uint64 kstack

据我观察,在xv6的实现中,不明确的地址有很多类型,uint64,void*,char*,貌似在不行使寻址能力的内核实现中会用uint64将其视为普通的64位整型进行运算,char*应该是想按字节

//morecore(uint)
char *p;
p = sbrk(nu * sizeof(Header));

//sys_sbrk系统调用实现中,地址被当做uint64
uint64 sys_sbrk(void);

uvmalloc中更是精神分裂,一会char*,一会kalloc()返回值是void*,而kalloc()中的地址还是struct *run强转过来的,然后传入mappages()时转成uint64

kalloc()是为了返回的地址可以被直接转成其它类型地址,所以用了void*
kalloc()struct *run是链表,链起了所有的空闲空间,用整页的首个64字节空间,指向下一页的首地址
mappages()需要作为整型的地址是因为它内部需要对于目标地址进行PGROUNDDOWN这种数值操作,而没有寻址操作。但是freerange也类似,除了内部需要调用kalloc()

void* kalloc() {
	struct run *r;
	//code
	return (void*)r;
}

// Allocate PTEs and physical memory to grow process from oldsz to
// newsz, which need not be page aligned.  Returns new size or 0 on error.
uint64
uvmalloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz)
{
  char *mem;
  uint64 a;

  if(newsz < oldsz)
    return oldsz;

  oldsz = PGROUNDUP(oldsz);
  for(a = oldsz; a < newsz; a += PGSIZE){
    mem = kalloc();
    if(mem == 0){
      uvmdealloc(pagetable, a, oldsz);
      return 0;
    }
    memset(mem, 0, PGSIZE);
    if(mappages(pagetable, a, PGSIZE, (uint64)mem, PTE_W|PTE_X|PTE_R|PTE_U) != 0){
      kfree(mem);
      uvmdealloc(pagetable, a, oldsz);
      return 0;
    }
  }
  return newsz;
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值