File system preliminaries
On-Disk File System Structure
inode regions、data regions
JOS file system will not use inodes at all and instead will simply store all of a file’s (or sub-directory’s) meta-data within the (one and only) directory entry describing that file.
Sectors and Blocks
sector size is a property of the disk hardware, whereas block size is an aspect of the operating system using the disk. A file system’s block size must be a multiple of the sector size of the underlying disk.
Superblocks
block 1 on the disk
File Meta-data
name, size, type, pointers to the blocks comprising the file
direct blocks, indirect blocks
最大支持1034 blocks大小的文件
Directories versus Regular Files
The File System
Disk Access
Exercise1
// If this is the file server (type == ENV_TYPE_FS) give it I/O privileges.
if (type == ENV_TYPE_FS) {
env->env_tf.tf_eflags |= FL_IOPL_MASK;
}
Question1
不会,因为I/O privilege保存在%eflags
寄存器中,%eflags
寄存器内容在进程切换时随Trapframe
自动保存和恢复
The Block Cache
JOS reserve a large, fixed 3GB region of the file system environment’s address space, from 0x10000000 (DISKMAP) up to 0xD0000000 (DISKMAP+DISKMAX), as a “memory mapped” version of the disk.
Exercise2
addr = ROUNDDOWN(addr, PGSIZE);
if ((r = sys_page_alloc(0, addr, PTE_W | PTE_U | PTE_P)) != 0) {
panic("bc_pgfault: %e", r);
}
if ((r = ide_read(blockno * BLKSECTS, addr, BLKSECTS)) != 0) {
panic("bc_pgfault: %e", r);
}
void
flush_block(void *addr)
{
uint32_t blockno = ((uint32_t)addr - DISKMAP) / BLKSIZE;
int r;
if (addr < (void*)DISKMAP || addr >= (void*)(DISKMAP + DISKSIZE))
panic("flush_block of bad va %08x", addr);
// LAB 5: Your code here.
if (!va_is_mapped(addr) || !va_is_dirty(addr)) {
return;
}
addr = ROUNDDOWN(addr, PGSIZE);
if ((r = ide_write(blockno * BLKSECTS, addr, BLKSECTS)) != 0) {
panic("flush_block: %e", r);
}
if ((r = sys_page_map(0, addr, 0, addr, uvpt[PGNUM(addr)] & PTE_SYSCALL)) != 0) {
panic("flush_block: %e", r);
}
}
The Block Bitmap
Exercise3
一个block为4096字节,一共有8*4096位,所以BLKBITSIZE=BLKSIZE * 8
int
alloc_block(void)
{
// The bitmap consists of one or more blocks. A single bitmap block
// contains the in-use bits for BLKBITSIZE blocks. There are
// super->s_nblocks blocks in the disk altogether.
int i;
for (i = 0; i < super->s_nblocks; i++) {
if (block_is_free(i)) {
bitmap[i / 32] &= ~(1 << (i % 32));
flush_block(&bitmap[i / 32]);
return i;
}
}
return -E_NO_DISK;
}
File Operations
Exercise4
static int
file_block_walk(struct File *f, uint32_t filebno, uint32_t **ppdiskbno, bool alloc)
{
if (filebno >= NDIRECT + NINDIRECT) {
return -E_INVAL;
}
if (filebno < NDIRECT) {
*ppdiskbno = &f->f_direct[filebno];
} else {
if (!f->f_indirect && !alloc) {
return -E_NOT_FOUND;
}
if (!f->f_indirect && alloc) {
uint32_t newbno;
if ((newbno = alloc_block()) < 0) {
return -E_NO_DISK;
}
f->f_indirect = newbno;
memset(diskaddr(newbno), 0, BLKSIZE);
}
*ppdiskbno = &((uint32_t *)diskaddr(f->f_indirect))[filebno - NDIRECT];
}
return 0;
}
int
file_get_block(struct File *f, uint32_t filebno, char **blk)
{
uint32_t *pdiskbno;
int r;
if ((r = file_block_walk(f, filebno, &pdiskbno, 1)) != 0) {
return r;
}
if (!*pdiskbno) {
uint32_t newbno;
if ((newbno = alloc_block()) < 0) {
return -E_NO_DISK;
}
*pdiskbno = newbno;
memset(diskaddr(newbno), 0, BLKSIZE);
}
*blk = diskaddr(*pdiskbno);
return 0;
}
The file system interface
Exercise5
int
serve_read(envid_t envid, union Fsipc *ipc)
{
struct Fsreq_read *req = &ipc->read;
struct Fsret_read *ret = &ipc->readRet;
struct OpenFile *o;
int r;
if (debug)
cprintf("serve_read %08x %08x %08x\n", envid, req->req_fileid, req->req_n);
if ((r = openfile_lookup(envid, req->req_fileid, &o)) != 0) {
return r;
}
if ((r = file_read(o->o_file, ret->ret_buf, req->req_n, o->o_fd->fd_offset)) > 0) {
o->o_fd->fd_offset += r;
}
return r;
}
Exercise6
int
serve_write(envid_t envid, struct Fsreq_write *req)
{
struct OpenFile *o;
int r;
if (debug)
cprintf("serve_write %08x %08x %08x\n", envid, req->req_fileid, req->req_n);
if ((r = openfile_lookup(envid, req->req_fileid, &o)) != 0) {
return r;
}
if ((r = file_write(o->o_file, req->req_buf, req->req_n, o->o_fd->fd_offset)) > 0) {
o->o_fd->fd_offset += r;
}
return r;
}
static ssize_t
devfile_write(struct Fd *fd, const void *buf, size_t n)
{
int r;
fsipcbuf.write.req_fileid = fd->fd_file.id;
fsipcbuf.write.req_n = n;
assert(n <= PGSIZE - (sizeof(int) + sizeof(size_t)));
memmove(fsipcbuf.write.req_buf, buf, n);
if ((r = fsipc(FSREQ_WRITE, NULL)) < 0)
return r;
assert(r <= n);
return r;
}
Spawning Processes
Exercise7
static int
sys_env_set_trapframe(envid_t envid, struct Trapframe *tf)
{
struct Env *e;
int r;
if ((r = envid2env(envid, &e, 1)) != 0) {
return r;
}
user_mem_assert(e, tf, sizeof(struct Trapframe), PTE_W);
tf->tf_cs |= 3;
tf->tf_ss |= 3;
tf->tf_eflags |= FL_IF;
tf->tf_eflags &= ~FL_IOPL_3;
e->env_tf = *tf;
return 0;
}
case SYS_env_set_trapframe:
return sys_env_set_trapframe(a1, (struct Trapframe *)a2);
Sharing library state across fork and spawn
Exercise8
static int
duppage(envid_t envid, unsigned pn)
{
int r;
envid_t parent_envid = sys_getenvid();
void *va = (void *)(pn * PGSIZE);
// LAB 4: Your code here.
if ((uvpt[pn] & PTE_SHARE) == PTE_SHARE) {
if ((r = sys_page_map(parent_envid, va, envid, va, uvpt[pn] & PTE_SYSCALL)) != 0) {
panic("duppage: %e", r);
}
} else if ((uvpt[pn] & PTE_W) == PTE_W || (uvpt[pn] & PTE_COW) == PTE_COW) {
if ((r = sys_page_map(parent_envid, va, envid, va, PTE_COW | PTE_U | PTE_P)) != 0) {
panic("duppage: %e", r);
}
if ((r = sys_page_map(parent_envid, va, parent_envid, va, PTE_COW | PTE_U | PTE_P)) != 0) {
panic("duppage: %e", r);
}
} else {
if ((r = sys_page_map(parent_envid, va, envid, va, PTE_U | PTE_P)) != 0) {
panic("duppage: %e", r);
}
}
return 0;
}
static int
copy_shared_pages(envid_t child)
{
envid_t parent_envid = sys_getenvid();
uint32_t addr;
int r;
for (addr = 0; addr < USTACKTOP; addr += PGSIZE) {
if ((uvpd[PDX(addr)] & PTE_P) == PTE_P && (uvpt[PGNUM(addr)] & PTE_P) == PTE_P && (uvpt[PGNUM(addr)] & PTE_SHARE) == PTE_SHARE) {
if ((r = sys_page_map(parent_envid, (void *)addr, child, (void *)addr, uvpt[PGNUM(addr)] & PTE_SYSCALL)) != 0) {
panic("copy_shared_pages: %e", r);
}
}
}
return 0;
}
The keyboard interface
Exercise9
if (tf->tf_trapno == IRQ_OFFSET + IRQ_KBD) {
kbd_intr();
return;
}
if (tf->tf_trapno == IRQ_OFFSET + IRQ_SERIAL) {
serial_intr();
return;
}
The Shell
Exercise10
case '>': // Output redirection
// Grab the filename from the argument list
if (gettoken(0, &t) != 'w') {
cprintf("syntax error: > not followed by word\n");
exit();
}
if ((fd = open(t, O_WRONLY|O_CREAT|O_TRUNC)) < 0) {
cprintf("open %s for write: %e", t, fd);
exit();
}
if (fd != 1) {
dup(fd, 1);
close(fd);
}
break;