文章目录
Print a page table (easy)
第一个实验很简单,按照freewalk()的方式遍历页表并输出即可。
在vm.c中实现vmprint
函数
static void
vmprintwalk(pagetable_t pagetable, int level)
{
// there are 2^9 = 512 PTEs in a page table.
for (int i = 0; i < 512; i++) {
pte_t pte = pagetable[i];
if ((pte & PTE_V)) {
if (level == 0) {
printf("..");
} else if (level == 1) {
printf(".. ..");
} else {
printf(".. .. ..");
}
if ((pte & (PTE_R | PTE_W | PTE_X)) == 0) {
// this PTE points to a lower-level page table.
uint64 child = PTE2PA(pte);
printf("%d: pte %p pa %p\n", i, pte, child);
vmprintwalk((pagetable_t)child, level + 1);
} else {
printf("%d: pte %p pa %p\n", i, pte, PTE2PA(pte));
}
}
}
}
void
vmprint(pagetable_t pagetable)
{
printf("page table %p\n", pagetable);
vmprintwalk(pagetable, 0);
}
不要忘记在def.h中加上声明,以便再加其它文件中使用。
然后在exec函数返回argc加上下面这段语句。
// exec.c
if(p->pid==1)
vmprint(p->pagetable);
return argc; // this ends up in a0, the first argument to main(argc, argv)
A kernel page table per process (hard)
在这个实验中需要为每个进程提供一个内核页表。
根据提示,第一件事就是要在struct proc增加一个变量kpagetable,表示进程自己的内核页表。
// proc.h
// Per-process state
struct proc {
struct spinlock lock;
// p->lock must be held when using these:
enum procstate state; // Process state
struct proc *parent; // Parent process
void *chan; // If non-zero, sleeping on chan
int killed; // If non-zero, have been killed
int xstate; // Exit status to be returned to parent's wait
int pid; // Process ID
// these are private to the process, so p->lock need not be held.
uint64 kstack; // Virtual address of kernel stack
uint64 sz; // Size of process memory (bytes)
pagetable_t pagetable; // User page table
pagetable_t kpagetable; // Kernel page table
struct trapframe *trapframe; // data page for trampoline.S
struct context context; // swtch() here to run process
struct file *ofile[NOFILE]; // Open files
struct inode *cwd; // Current directory
char name[16]; // Process name (debugging)
};
和proc.c中用proc_pagetable
函数创建进程用户页表类似,实现一个proc_kpagetable
函数用于创建进程内核页表。并将原来procinit
负责的创建进程内核栈的工作转在此函数中。
//proc.c
//
pagetable_t
proc_kpagetable(struct proc *p)
{
pagetable_t pagetable = kvmmake();
// Allocate a page for the process's kernel stack.
// Map it high in memory, followed by an invalid
// guard page.
char *pa = kalloc();
if (pa == 0)
panic("kalloc");
// kstack的虚拟地址可以在同一处。
uint64 va = KSTACK(0);
kvmmap(pagetable, va, (uint64)pa, PGSIZE, PTE_R | PTE_W);
p->kstack = va;
return pagetable;
}
由于此处创建的不是全局内核页表,而是每个进程自己私有的进程内核页表,所以kstack的虚拟地址可以用同一个。
kvmmake
函数是基于原来的kvminit
修改的。顺带也修改了kvmmap
函数,给它增加了一个参数,指定其操作的页表。
// vm.c
pagetable_t
kvmmake()
{
pagetable_t kpagetable = (pagetable_t)kalloc();
memset(kpagetable, 0, PGSIZE);
// uart registers
kvmmap(kpagetable, UART0, UART0, PGSIZE, PTE_R | PTE_W);
// virtio mmio disk interface
kvmmap(kpagetable, VIRTIO0, VIRTIO0, PGSIZE, PTE_R | PTE_W);
// CLINT
kvmmap(kpagetable, CLINT, CLINT, 0x10000, PTE_R | PTE_W);
// PLIC
kvmmap(kpagetable, PLIC, PLIC, 0x400000, PTE_R | PTE_W);
// map kernel text executable and read-only.
kvmmap(kpagetable, KERNBASE, KERNBASE, (uint64)etext - KERNBASE,
PTE_R | PTE_X);
// map kernel data and the physical RAM we'll make use of.
kvmmap(kpagetable, (uint64)etext, (uint64)etext, PHYSTOP - (uint64)etext,
PTE_R | PTE_W);
// map the trampoline for trap entry/exit to
// the highest virtual address in the kernel.
kvmmap(kpagetable, TRAMPOLINE, (uint64)trampoline, PGSIZE, PTE_R | PTE_X);
return kpagetable;
}
void
kvminit()
{
kernel_pagetable = kvmmake();
}
// add a mapping to the kernel page table.
// only used when booting.
// does not flush TLB or enable paging.
void
kvmmap(pagetable_t pagetable, uint64 va, uint64 pa, uint64 sz, int perm)
{
if(mappages(pagetable, va, sz, pa, perm) != 0)
panic("kvmmap");
}
实现完进程内核页表的创建,还需要实现一个释放进程内核页表的函数。
void
proc_freekpagetable(pagetable_t pagetable)
{
for (int i = 0; i < 512; i++) {
pte_t pte = pagetable[i];
if ((pte & PTE_V)) {
pagetable[i] = 0;
if ((pte & (PTE_R | PTE_W | PTE_X)) == 0) {
uint64 child = PTE2PA(pte);
proc_freekpagetable((pagetable_t)child);
}
}
}
kfree((void *)pagetable);
}
proc_freekpagetable
只会递归的释放页表页,但不会释放页表指向的内核的物理页。但每个进程的kstack是进程独有的,所以回收进程时,其内核栈的物理页也需要释放。
// free a proc structure and the data hanging from it,
// including user pages.
// p->lock must be held.
static void
freeproc(struct proc *p)
{
if(p->trapframe)
kfree((void*)p->trapframe);
p->trapframe = 0;
if(p->pagetable)
proc_freepagetable(p->pagetable, p->sz);
p->pagetable = 0;
p->sz = 0;
p->pid = 0;
p->parent = 0;
p->name[0] = 0;
p->chan = 0;
p->killed = 0;
p->xstate = 0;
p->state = UNUSED;
// 在释放进程的内核页表前先将进程的内核栈的物理页释放。
if(p->kstack) {
uvmunmap(p->kpagetable, p->kstack, 1, 1);
p->kstack = 0;
}
// 释放进程内核页表。
if(p->kpagetable){
proc_freekpagetable(p->kpagetable);
}
}
修改scheduler
函数,让进程被调度前切换成自己的内核页表,其他时候则使用全局内核页表。
// proc.c
void
scheduler(void)
{
struct proc *p;
struct cpu *c = mycpu();
c->proc = 0;
for(;;){
// Avoid deadlock by ensuring that devices can interrupt.
intr_on();
int found = 0;
for(p = proc; p < &proc[NPROC]; p++) {
acquire(&p->lock);
if(p->state == RUNNABLE) {
// Switch to chosen process. It is the process's job
// to release its lock and then reacquire it
// before jumping back to us.
p->state = RUNNING;
c->proc = p;
// 调度运行前切换成自己的内核页表
w_satp(MAKE_SATP(p->kpagetable));
sfence_vma();
swtch(&c->context, &p->context);
// Process is done running for now.
// It should have changed its p->state before coming back.
c->proc = 0;
found = 1;
}
release(&p->lock);
}
#if !defined (LAB_FS)
if(found == 0) {
// 其他时间使用全局内核页表
w_satp(MAKE_SATP(kernel_pagetable));
sfence_vma();
intr_on();
asm volatile("wfi");
}
#else
;
#endif
}
}
到这里这个实验还没有结束,如果此时运行make qemu的话,会发生如下错误
(gdb) bt
#0 panic (s=s@entry=0x800080f0 "kvmpa") at kernel/printf.c:125
#1 0x0000000080001272 in kvmpa (va=va@entry=274877898512) at kernel/vm.c:149
#2 0x0000000080006396 in virtio_disk_rw (b=b@entry=0x80017998 <bcache+24>, write=write@entry=0) at kernel/virtio_disk.c:206
#3 0x0000000080003186 in bread (dev=dev@entry=1, blockno=blockno@entry=1) at kernel/bio.c:99
#4 0x000000008000363a in readsb (sb=0x80020040 <sb>, dev=1) at kernel/fs.c:43
#5 fsinit (dev=dev@entry=1) at kernel/fs.c:43
#6 0x0000000080001ba8 in forkret () at kernel/proc.c:599
#7 0x0000000080001b64 in myproc () at kernel/proc.c:65
Backtrace stopped: frame did not save the PC
猜测原因是procinit
函数中原本会在全局内核页表上创建每个进程的内核栈,但这部分内容被移到了allocproc
中,xv6在执行到virtio_disk_rw
时会调用kvmpa
获取虚拟地址对应的物理地址。但kvmpa
是根据全局内核页表转换的,由于全局内核页表中不存在进程的内核栈,所以就会报错。
解决方法是修改kvmpa
,增加一个pagetable参数,让它根据传入的页表做地址转换。并在virtio_disk_rw
调用它时传入myproc()->kpagetable
。
// kernel/vm.c
uint64
kvmpa(pagetable_t pagetable, uint64 va)
{
uint64 off = va % PGSIZE;
pte_t *pte;
uint64 pa;
pte = walk(pagetable, va, 0);
if(pte == 0)
panic("kvmpa");
if((*pte & PTE_V) == 0)
panic("kvmpa");
pa = PTE2PA(*pte);
return pa+off;
}
// kernel/virtio_disk.c
disk.desc[idx[0]].addr = (uint64) kvmpa(myproc()->kpagetable, (uint64) &buf0);
此时第二个实验完成。
Simplify copyin/copyinstr (hard)
第三个实验要求copyin和copyinstr能够直接使用进程在用户态传进来的指针,也就是说,需要在进程的内核页表中和进程的用户页表做相同的映射。用户空间从0开始,为了不覆盖内核物理内存,映射的用户空间不能超过PLIC。也就是要将映射用户空间控制在[0,PLIC)范围内。
xv6 book中可以看到内核的PLIC下面还有一段叫做CLINT的空间,但实验提示似乎让我们不用在意这段内存是否被覆盖,这是因为这段空间仅在内核启动时会用到(xv6的第五章会说),进程在内核态中不需要用到这段内存,所以可以放心覆盖。
首先第一步就是修改copyin
和copyinstr
的函数体,让他们分别直接调用copyin_new
和copyinstr_new
int
copyin(pagetable_t pagetable, char *dst, uint64 srcva, uint64 len)
{
return copyin_new(pagetable, dst, srcva, len);
}
int
copyinstr(pagetable_t pagetable, char *dst, uint64 srcva, uint64 max)
{
return copyinstr_new(pagetable, dst, srcva, max);
}
接下来的任务就是让copyin_new
和copyinstr_new
中可以直接引用用户传来的指针。为此我们需要一个函数,用于将进程用户页表拷贝到进程内核页表。
// vm.c
// 复制页表映射
int
copymapping(pagetable_t src, pagetable_t dst, uint64 vm, uint64 sz)
{
pte_t *pte;
uint64 pa, i;
uint flags;
if (vm >= PLIC || vm + sz >= PLIC || vm + sz < vm) {
return -1;
}
for (i = vm; i < vm + sz; i += PGSIZE) {
if ((pte = walk(src, i, 0)) == 0)
panic("copymapping: pte should exist");
if ((*pte & PTE_V) == 0)
panic("copymapping: page not present");
pa = PTE2PA(*pte);
// clear PTE_U, so it can be accessed in kernel mode
flags = PTE_FLAGS(*pte) & ~PTE_U;
if (mappagesv2(dst, i, PGSIZE, pa, flags) != 0) {
goto err;
}
}
return 0;
err:
uvmunmap(dst, 0, (i - vm) / PGSIZE, 0);
return -1;
}
copymapping
函数整体跟uvmmap
函数相似,但这里调用的是mappagesv2
函数,相比mappage
,它仅仅是少了对PTE_V的检查,以防止出现panic remap错误。当然直接用mappage
也是可以的,但是需要在使用copymapping
之前将dst页表在[vm,vm+sz)这段空间先前存在的映射接触,否则原来的mappage
检测到PTE_V标志时会引起panic remap。所以在这里选择用mappagev2
减少工作量。
有了copymapping
,只需要在fork、growproc、exec、initcode等用户页表会发生变化的位置调用它即可。
在fork中:
// proc.c
int
fork(void)
{
// ...
// Copy user memory from parent to child.
if(uvmcopy(p->pagetable, np->pagetable, p->sz) < 0){
freeproc(np);
release(&np->lock);
return -1;
}
// Copy child's page mapping to kernel page mapping
if (copymapping(np->pagetable, np->kpagetable, 0, p->sz) < 0) {
freeproc(np);
release(&np->lock);
return -1;
}
np->sz = p->sz;
np->parent = p;
// ...
return pid;
}
在growproc中:
// proc.c
int
growproc(int n)
{
uint sz;
struct proc *p = myproc();
sz = p->sz;
if(n > 0){
if((sz = uvmalloc(p->pagetable, sz, sz + n)) == 0) {
return -1;
}
if (copymapping(p->pagetable, p->kpagetable, p->sz, n) < 0) {
return -1;
}
} else if(n < 0){
sz = uvmdealloc(p->pagetable, sz, sz + n);
// free process's kernel page table without free physical memory
if (PGROUNDUP(sz) < PGROUNDUP(p->sz)) {
uvmunmap(p->kpagetable, PGROUNDUP(sz),(PGROUNDUP(p->sz) - PGROUNDUP(sz)) / PGSIZE, 0);
}
}
p->sz = sz;
return 0;
}
在exec中:
int
exec(char *path, char **argv)
{
// ...
proc_freepagetable(oldpagetable, oldsz);
if (copymapping(p->pagetable, p->kpagetable, 0, p->sz) < 0) {
goto bad;
}
if(p->pid==1)
vmprint(p->pagetable);
return argc; // this ends up in a0, the first argument to main(argc, argv)
bad:
if(pagetable)
proc_freepagetable(pagetable, sz);
if(ip){
iunlockput(ip);
end_op();
}
return -1;
}
在userinit中:
// proc.c
void
userinit(void)
{
struct proc *p;
p = allocproc();
initproc = p;
// allocate one user page and copy init's instructions
// and data into it.
uvminit(p->pagetable, initcode, sizeof(initcode));
p->sz = PGSIZE;
if (copymapping(p->pagetable, p->kpagetable, 0, p->sz) < 0) {
panic("copymapping: failed");
}
// prepare for the very first "return" from kernel to user.
p->trapframe->epc = 0; // user program counter
p->trapframe->sp = PGSIZE; // user stack pointer
safestrcpy(p->name, "initcode", sizeof(p->name));
p->cwd = namei("/");
p->state = RUNNABLE;
release(&p->lock);
}
第三个实验到这里就结束了。