chapter 3
Print a page table
这个exercise的内容为打印page table,需要实现一个打印函数vmprint,参数为pagetable_t,传入根页表地址,打印对应的pagetable。在这个exercise中只需要打印init进程的pagetable,所以需要在exec函数(exec.c)中添加对进程的pid的判断,如果pid为1,打印该进程的pagetable
打印的例子:
page table 0x0000000087f6e000
..0: pte 0x0000000021fda801 pa 0x0000000087f6a000
.. ..0: pte 0x0000000021fda401 pa 0x0000000087f69000
.. .. ..0: pte 0x0000000021fdac1f pa 0x0000000087f6b000
.. .. ..1: pte 0x0000000021fda00f pa 0x0000000087f68000
.. .. ..2: pte 0x0000000021fd9c1f pa 0x0000000087f67000
..255: pte 0x0000000021fdb401 pa 0x0000000087f6d000
.. ..511: pte 0x0000000021fdb001 pa 0x0000000087f6c000
.. .. ..510: pte 0x0000000021fdd807 pa 0x0000000087f76000
.. .. ..511: pte 0x0000000020001c0b pa 0x0000000080007000
…的数量与页表的level相同,然后是pte在pagetable的index,pte的内容和pa的内容
- 对于pagetable中pte只打印有效的pte(PTE_V位为1),打印三级页表的内容
- pte到pa的转换使用PTE2PA进行转换,pte和pa的差别实际上就是pa中的offset的位数(12 bits)和pte中flag的位数(10 bits)不同,以及每个页表的offset部分都为0
void vmprint_walk(pagetable_t pagetable, int depth) {
for (int i = 0; i < 512; i++) {
if (((pte_t)pagetable[i] & PTE_V) == 0)
continue;
for (int j = 0; j < depth; j++) {
if (j == depth - 1)
printf("..");
else
printf(".. ");
}
printf("%d: pte %p pa %p\n", i, (pte_t)pagetable[i], PTE2PA((pte_t)pagetable[i]));
if (depth < 3)
vmprint_walk((pagetable_t)PTE2PA((pte_t)pagetable[i]), depth + 1);
}
}
void vmprint(pagetable_t pagetable) {
printf("page table %p\n", pagetable);
vmprint_walk(pagetable, 1);
}
A kernel page table per process
在这个exercise中所需要完成的任务为每个process添加一个独立的kernel page table。
在目前的实现中,在kernel space所使用的pagetable都是在vm.c中声明的kernel_pagetable,在这个exercise中需要为每个process添加一个kernel_pagetable的拷贝,并且在进入到kernel space后使用process中的kernel pagetable。
因此在proc结构体中添加一个变量pagetable_t kpagetable,记录kernel pagetable。每个process的kpageble映射除了kernel stack外与global kernel pagetable一致,具体映射查看kvminit函数。除了kvminit中建立的映射外,还需要在kpagetable需要对对应的process的kernel stack进行映射。在procinit函数中,会为每个process分配一个kernel stack,每个kernel stack的大小为4096 bytes,并且在global kernel pagetable中建立对应的映射。因此在创建process时(allocproc函数),需要为kpagetable分配内存,建立kvminit中的映射关系,并且为kstack增加映射(不需要重新分配kernel stack的内存)。
在进行进程调度时,需要将原来的kvminithart(将satp设为global page table)替换为将satp设置为process中的kpagetable
当process运行结束释放proc时需要删除对应的kpagetable,但不删除kpagetable最终指向的物理内存(叶子节点)
1.在struct proc中添加kpagetable
proc.h
struct proc {
....
pagetable_t kpagetable; // Kernel page table
....
}
2.创建一个kernel pagetable
vm.c
// 创建一个kernel pagetable
pagetable_t kvmcreate() {
pagetable_t kpagetable = (pagetable_t) kalloc();
memset(kpagetable, 0, PGSIZE);
// uart registers
new_kvmmap(kpagetable, UART0, UART0, PGSIZE, PTE_R | PTE_W);
// virtio mmio disk interface
new_kvmmap(kpagetable, VIRTIO0, VIRTIO0, PGSIZE, PTE_R | PTE_W);
// CLINT
new_kvmmap(kpagetable, CLINT, CLINT, 0x10000, PTE_R | PTE_W);
// PLIC
new_kvmmap(kpagetable, PLIC, PLIC, 0x400000, PTE_R | PTE_W);
// map kernel text executable and read-only.
new_kvmmap(kpagetable, KERNBASE, KERNBASE, (uint64)etext-KERNBASE, PTE_R | PTE_X);
// map kernel data and the physical RAM we'll make use of.
new_kvmmap(kpagetable, (uint64)etext, (uint64)etext, PHYSTOP-(uint64)etext, PTE_R | PTE_W);
// map the trampoline for trap entry/exit to
// the highest virtual address in the kernel.
new_kvmmap(kpagetable, TRAMPOLINE, (uint64)trampoline, PGSIZE, PTE_R | PTE_X);
return kpagetable;
}
// 在kpagetable增加va到pa的映射
void new_kvmmap(pagetable_t kpagetable, uint64 va, uint64 pa, uint64 sz, int perm) {
if(mappages(kpagetable, va, sz, pa, perm) != 0)
panic("new_kvmmap");
}
3.分配per process kernel pagetable
proc.c
static struct proc*
allocproc(void)
{
struct proc *p;
// 在process数组中寻找一个UNUSED的process
for(p = proc; p < &proc[NPROC]; p++) {
acquire(&p->lock);
if(p->state == UNUSED) {
goto found;
} else {
release(&p->lock);
}
}
return 0;
found:
p->pid = allocpid();
// Allocate a trapframe page.
if((p->trapframe = (struct trapframe *)kalloc()) == 0){
release(&p->lock);
return 0;
}
// An empty user page table.
p->pagetable = proc_pagetable(p);
if(p->pagetable == 0){
freeproc(p);
release(&p->lock);
return 0;
}
// Set up new context to start executing at forkret,
// which returns to user space.
memset(&p->context, 0, sizeof(p->context));
p->context.ra = (uint64)forkret;
p->context.sp = p->kstack + PGSIZE;
// 每个process都分配一个kernel page table
p->kpagetable = kvmcreate();
// 建立kernel stack在process kpagepage上的映射
new_kvmmap(p->kpagetable, p->kstack, (uint64)kvmpa(p->kstack), PGSIZE, PTE_R | PTE_W);
return p;
}
4.在进程调度过程中启用kpagetable
需要注意的时pageon()在swtch()之前,swtch()后需要kvminithart(),否则会发生kerneltrap panic
proc.c
void
scheduler(void)
{
struct proc *p;
struct cpu *c = mycpu();
c->proc = 0;
for(;;){
// Avoid deadlock by ensuring that devices can interrupt.
intr_on();
int found = 0;
for(p = proc; p < &proc[NPROC]; p++) {
acquire(&p->lock);
if(p->state == RUNNABLE) {
// Switch to chosen process. It is the process's job
// to release its lock and then reacquire it
// before jumping back to us.
p->state = RUNNING;
c->proc = p;
// vmprint(p->pagetable);
pageon(p->kpagetable);
swtch(&c->context, &p->context);
// Process is done running for now.
// It should have changed its p->state before coming back.
kvminithart();
c->proc = 0;
found = 1;
}
release(&p->lock);
}
#if !defined (LAB_FS)
if(found == 0) {
intr_on();
asm volatile("wfi");
// kvminithart();
}
#else
;
#endif
}
}
vm.c
void pageon(pagetable_t kpagetable) {
w_satp(MAKE_SATP(kpagetable));
sfence_vma();
}
5.在进程结束时释放kpagetable
proc.c
static void
freeproc(struct proc *p)
{
if(p->trapframe)
kfree((void*)p->trapframe);
p->trapframe = 0;
if(p->pagetable)
proc_freepagetable(p->pagetable, p->sz);
p->pagetable = 0;
p->sz = 0;
p->pid = 0;
p->parent = 0;
p->name[0] = 0;
p->chan = 0;
p->killed = 0;
p->xstate = 0;
p->state = UNUSED;
// 释放kernel page table
if (p->kpagetable) {
new_freewalk(p->kpagetable, 1);
p->kpagetable = 0;
}
}
vm.c
// 删除pagetable,不删除叶子节点
void new_freewalk(pagetable_t pagetable, int depth)
{
// there are 2^9 = 512 PTEs in a page table.
for(int i = 0; i < 512; i++){
pte_t pte = pagetable[i];
if(pte & PTE_V){
uint64 child = PTE2PA(pte);
if (depth < 3)
new_freewalk((pagetable_t)child, depth + 1);
}
}
kfree((void*)pagetable);
}
exercise 2的一些疑问
- pageon为什么需要在swtch之前?
- swtch之后为什么需要kvminithart?
Simplify copyin/copyinstr
这个exercise的工作是将process的kernel page table的0-PLIC这个区间的vm变为user page table的映射,因此process的vm有效范围不能超过PLIC。建立映射后copyin和copyinstr就可以不用在软件层面模拟MMU的行为访问user page table获取user space的内存,而是直接通过MMU来直接访问user space的内存。
- 将copyin替换为copyin_new,将copyinstr替换为copyinstr_new
vm.c
int
copyin(pagetable_t pagetable, char *dst, uint64 srcva, uint64 len)
{
return copyin_new(pagetable, dst, srcva, len);
}
int
copyinstr(pagetable_t pagetable, char *dst, uint64 srcva, uint64 max)
{
return copyinstr_new(pagetable, dst, srcva, max);
}
2.当uvm映射发生改变时,kernel page table也要修改对应的页表项,在xv6中在fork(), exec()和sbrk()都会发生修改uvm映射的现象
实现对某个范围vm的页表进行拷贝的函数
// 拷贝user pagetable到kernel pagetable
int copyuvm2kvm(pagetable_t upagetable, pagetable_t kpagetable, uint64 start_sz, uint64 end_sz) {
pte_t *u_pte, *k_pte;
uint64 pa;
uint flags;
if (end_sz > PLIC) {
panic("copyuvm2kvm: sz is too large");
}
start_sz = PGROUNDUP(start_sz);
for (uint64 i = start_sz; i < end_sz; i += PGSIZE) {
if ((u_pte = walk(upagetable, i, 0)) == 0) {
panic("copyuvm2kvm: pte should exist");
}
if ((*u_pte & PTE_V) == 0) {
panic("copyuvm2kvm: page not present");
}
if ((k_pte = walk(kpagetable, i, 1)) == 0) {
panic("copyuvm2kvm: get k_pte fail");
}
pa = PTE2PA(*u_pte);
flags = PTE_FLAGS(*u_pte) & (~PTE_U);
*k_pte = PA2PTE(pa) | flags;
}
return 0;
}
在fork中增加映射
int
fork(void)
{
int i, pid;
struct proc *np;
struct proc *p = myproc();
// Allocate process.
if((np = allocproc()) == 0){
return -1;
}
// Copy user memory from parent to child.
if(uvmcopy(p->pagetable, np->pagetable, p->sz) < 0){
freeproc(np);
release(&np->lock);
return -1;
}
np->sz = p->sz;
np->parent = p;
// copy saved user registers.
*(np->trapframe) = *(p->trapframe);
// Cause fork to return 0 in the child.
np->trapframe->a0 = 0;
// increment reference counts on open file descriptors.
for(i = 0; i < NOFILE; i++)
if(p->ofile[i])
np->ofile[i] = filedup(p->ofile[i]);
np->cwd = idup(p->cwd);
safestrcpy(np->name, p->name, sizeof(p->name));
pid = np->pid;
np->state = RUNNABLE;
// 拷贝uvm到kvm
if (copyuvm2kvm(np->pagetable, np->kpagetable, 0, p->sz) < 0) {
freeproc(np);
release(&np->lock);
return -1;
}
release(&np->lock);
return pid;
}
在exec()中添加映射
int
exec(char *path, char **argv)
{
char *s, *last;
int i, off;
uint64 argc, sz = 0, sp, ustack[MAXARG+1], stackbase;
struct elfhdr elf;
struct inode *ip;
struct proghdr ph;
pagetable_t pagetable = 0, oldpagetable;
struct proc *p = myproc();
begin_op();
if((ip = namei(path)) == 0){
end_op();
return -1;
}
ilock(ip);
// Check ELF header
if(readi(ip, 0, (uint64)&elf, 0, sizeof(elf)) != sizeof(elf))
goto bad;
if(elf.magic != ELF_MAGIC)
goto bad;
if((pagetable = proc_pagetable(p)) == 0)
goto bad;
// Load program into memory.
for(i=0, off=elf.phoff; i<elf.phnum; i++, off+=sizeof(ph)){
if(readi(ip, 0, (uint64)&ph, off, sizeof(ph)) != sizeof(ph))
goto bad;
if(ph.type != ELF_PROG_LOAD)
continue;
if(ph.memsz < ph.filesz)
goto bad;
if(ph.vaddr + ph.memsz < ph.vaddr)
goto bad;
uint64 sz1;
if((sz1 = uvmalloc(pagetable, sz, ph.vaddr + ph.memsz)) == 0)
goto bad;
sz = sz1;
if(ph.vaddr % PGSIZE != 0)
goto bad;
if(loadseg(pagetable, ph.vaddr, ip, ph.off, ph.filesz) < 0)
goto bad;
}
iunlockput(ip);
end_op();
ip = 0;
p = myproc();
uint64 oldsz = p->sz;
// Allocate two pages at the next page boundary.
// Use the second as the user stack.
sz = PGROUNDUP(sz);
uint64 sz1;
if((sz1 = uvmalloc(pagetable, sz, sz + 2*PGSIZE)) == 0)
goto bad;
sz = sz1;
uvmclear(pagetable, sz-2*PGSIZE);
sp = sz;
stackbase = sp - PGSIZE;
// Push argument strings, prepare rest of stack in ustack.
for(argc = 0; argv[argc]; argc++) {
if(argc >= MAXARG)
goto bad;
sp -= strlen(argv[argc]) + 1;
sp -= sp % 16; // riscv sp must be 16-byte aligned
if(sp < stackbase)
goto bad;
if(copyout(pagetable, sp, argv[argc], strlen(argv[argc]) + 1) < 0)
goto bad;
ustack[argc] = sp;
}
ustack[argc] = 0;
// push the array of argv[] pointers.
sp -= (argc+1) * sizeof(uint64);
sp -= sp % 16;
if(sp < stackbase)
goto bad;
if(copyout(pagetable, sp, (char *)ustack, (argc+1)*sizeof(uint64)) < 0)
goto bad;
// arguments to user main(argc, argv)
// argc is returned via the system call return
// value, which goes in a0.
p->trapframe->a1 = sp;
// Save program name for debugging.
for(last=s=path; *s; s++)
if(*s == '/')
last = s+1;
safestrcpy(p->name, last, sizeof(p->name));
// Commit to the user image.
oldpagetable = p->pagetable;
p->pagetable = pagetable;
p->sz = sz;
p->trapframe->epc = elf.entry; // initial program counter = main
p->trapframe->sp = sp; // initial stack pointer
proc_freepagetable(oldpagetable, oldsz);
if (p->pid == 1)
vmprint(p->pagetable);
// 拷贝uvm到kvm
if (copyuvm2kvm(p->pagetable, p->kpagetable, 0, p->sz) < 0)
goto bad;
return argc; // this ends up in a0, the first argument to main(argc, argv)
bad:
if(pagetable)
proc_freepagetable(pagetable, sz);
if(ip){
iunlockput(ip);
end_op();
}
return -1;
}
sbrk的具体实现中在sys_sbrk中,修改uvm部分在growproc()中,在growproc只对修改部分的vm进行修改
int
growproc(int n)
{
uint sz;
struct proc *p = myproc();
sz = p->sz;
if(n > 0){
if((sz = uvmalloc(p->pagetable, sz, sz + n)) == 0) {
return -1;
}
} else if(n < 0){
sz = uvmdealloc(p->pagetable, sz, sz + n);
}
// 拷贝uvm到kvm
copyuvm2kvm(p->pagetable, p->kpagetable, p->sz, sz);
p->sz = sz;
return 0;
}
3.在userinit中增加对initcode部分的映射
void
userinit(void)
{
struct proc *p;
p = allocproc();
initproc = p;
// allocate one user page and copy init's instructions
// and data into it.
uvminit(p->pagetable, initcode, sizeof(initcode));
p->sz = PGSIZE;
// prepare for the very first "return" from kernel to user.
p->trapframe->epc = 0; // user program counter
p->trapframe->sp = PGSIZE; // user stack pointer
safestrcpy(p->name, "initcode", sizeof(p->name));
p->cwd = namei("/");
p->state = RUNNABLE;
copyuvm2kvm(p->pagetable, p->kpagetable, 0, p->sz);
release(&p->lock);
}
exercise 2的一些疑问
- initcode的作用?