#define switch_to(n) {\
struct {long a,b;} __tmp; \
__asm__("cmpl %%ecx,_current\n\t" \
"je 1f\n\t" \
"movw %%dx,%1\n\t" \
"xchgl %%ecx,_current\n\t" \
"ljmp %0\n\t" \
"cmpl %%ecx,_last_task_used_math\n\t" \
"jne 1f\n\t" \
"clts\n" \
"1:" \
::"m" (*&__tmp.a),"m" (*&__tmp.b), \
"d" (_TSS(n)),"c" ((long) task[n])); \
}
之前的进程切换的程序说道 “ljmp %0\n\t” 这句最重要 这里是将进程1的tss数据以及ldt的代码段,数据段描述符数据给CPU的各个寄存器 而寄存器eip的值正是进程0调用fork()创建进程1时int 0x80中断导致的cpu硬件自动压栈的ss,esp,eflags,cs,eip的eip值 这个值指向int 0x80的下一行代码位置 也即if (__res >= 0) 这句
int fork(void)
{
long __res;
__asm__ volatile ("int $0x80"
: "=a" (__res) //将__res赋给eax
: "0" (__NR_fork)); //将2赋值给eax
if (__res >= 0) //int 0x80中断返回就执行这句
return (int) __res;
errno = -__res;
return -1;
}
这个__res就是eax的值 可是这个值copy_process()函数里
p->tss.eax = 0;
所以fork函数返回0
void main(void) /* This really IS void, no error here. */
{
...
if (!fork()) { /* we count on this going ok */ //fork返回0 条件为真
init();
...
}
所以这时执行init()函数
//main.c
void init(void)
{
int pid,i;
setup((void *) &drive_info);
(void) open("/dev/tty0",O_RDWR,0);
(void) dup(0);
(void) dup(0);
printf("%d buffers = %d bytes buffer space\n\r",NR_BUFFERS,
NR_BUFFERS*BLOCK_SIZE);
printf("Free mem: %d bytes\n\r",memory_end-main_memory_start);
if (!(pid=fork())) {
close(0);
if (open("/etc/rc",O_RDONLY,0))
_exit(1);
execve("/bin/sh",argv_rc,envp_rc);
_exit(2);
}
if (pid>0)
while (pid != wait(&i))
/* nothing */;
while (1) {
if ((pid=fork())<0) {
printf("Fork failed in init\r\n");
continue;
}
if (!pid) {
close(0);close(1);close(2);
setsid();
(void) open("/dev/tty0",O_RDWR,0);
(void) dup(0);
(void) dup(0);
_exit(execve("/bin/sh",argv,envp));
}
while (1)
if (pid == wait(&i))
break;
printf("\n\rchild %d died with code %04x\n\r",pid,i);
sync();
}
_exit(0); /* NOTE! _exit, not exit() */
}
init函数一开始是调用setup((void *) &drive_info);
setup函数就跟fork(),pause()函数的系统调用类似 区别就是setup()函数不是通过_syscall0()而是通过_syscall1()实现 最终调用到sys_setup函数
struct hd_i_struct {
int head,sect,cyl,wpcom,lzone,ctl;
};
struct hd_i_struct hd_info[] = { {0,0,0,0,0,0},{0,0,0,0,0,0} };
static struct hd_struct {
long start_sect;
long nr_sects;
} hd[5*MAX_HD]={{0,0},};
int sys_setup(void * BIOS)
{
static int callable = 1;
int i,drive;
unsigned char cmos_disks;
struct partition *p;
struct buffer_head * bh;
if (!callable)
return -1;
callable = 0;
#ifndef HD_TYPE //HD_TYPE 未定义 所以执行下面代码 主要是根据drive_info设置hd_info
for (drive=0 ; drive<2 ; drive++) {
hd_info[drive].cyl = *(unsigned short *) BIOS; //柱面数
hd_info[drive].head = *(unsigned char *) (2+BIOS); //磁头数
hd_info[drive].wpcom = *(unsigned short *) (5+BIOS);
hd_info[drive].ctl = *(unsigned char *) (8+BIOS);
hd_info[drive].lzone = *(unsigned short *) (12+BIOS);
hd_info[drive].sect = *(unsigned char *) (14+BIOS);//每磁道扇区数
BIOS += 16;
}
if (hd_info[1].cyl) //判断有几个硬盘
NR_HD=2;
else
NR_HD=1;
#endif
//一个物理硬盘最多可以分4个逻辑盘,0是物理盘,1-4是逻辑盘,共5个,第一个物理盘是0*5,第二个是1*5
for (i=0 ; i<NR_HD ; i++) {
hd[i*5].start_sect = 0;
hd[i*5].nr_sects = hd_info[i].head*
hd_info[i].sect*hd_info[i].cyl;
}
/*
We querry CMOS about hard disks : it could be that
we have a SCSI/ESDI/etc controller that is BIOS
compatable with ST-506, and thus showing up in our
BIOS table, but not register compatable, and therefore
not present in CMOS.
Furthurmore, we will assume that our ST-506 drives
<if any> are the primary drives in the system, and
the ones reflected as drive 1 or 2.
The first drive is stored in the high nibble of CMOS
byte 0x12, the second in the low nibble. This will be
either a 4 bit drive type or 0xf indicating use byte 0x19
for an 8 bit type, drive 1, 0x1a for drive 2 in CMOS.
Needless to say, a non-zero value means we have
an AT controller hard disk for that drive.
*/
if ((cmos_disks = CMOS_READ(0x12)) & 0xf0)
if (cmos_disks & 0x0f)
NR_HD = 2;
else
NR_HD = 1;
else
NR_HD = 0;
for (i = NR_HD ; i < 2 ; i++) {
hd[i*5].start_sect = 0;
hd[i*5].nr_sects = 0;
}
//硬盘最基础的就是信息就是分区表,其他信息都可以从这个信息引导出来,这个信息所在的块是引导块,所以要将引导快读入缓冲区
for (drive=0 ; drive<NR_HD ; drive++) {
if (!(bh = bread(0x300 + drive*5,0))) {
//第一个物理盘设备号是0x300,第二个就是0x305 调用 bread函数将引导快读入缓冲区 并返回这个缓冲区
printk("Unable to read partition table of drive %d\n\r",
drive);
panic("");
}
if (bh->b_data[510] != 0x55 || (unsigned char)
bh->b_data[511] != 0xAA) {
printk("Bad partition table on drive %d\n\r",drive);
panic("");
}
p = 0x1BE + (void *)bh->b_data;
for (i=1;i<5;i++,p++) {
hd[i+5*drive].start_sect = p->start_sect;
hd[i+5*drive].nr_sects = p->nr_sects;
}
brelse(bh);
}
if (NR_HD)
printk("Partition table%s ok.\n\r",(NR_HD>1)?"s":"");
rd_load();
mount_root();
return (0);
}
struct buffer_head * bread(int dev,int block)
{
struct buffer_head * bh;
if (!(bh=getblk(dev,block))) //在缓冲区得到与dev,block相符合的或空闲的缓冲块
panic("bread: getblk returned NULL\n");
if (bh->b_uptodate)//判断缓冲块是否更新
return bh;
ll_rw_block(READ,bh);//将缓冲块与请求项挂接
wait_on_buffer(bh);//将等待缓冲区解锁的进程挂起 并进程轮转
if (bh->b_uptodate)
return bh;
brelse(bh);
return NULL;
}
struct buffer_head * getblk(int dev,int block)
{
struct buffer_head * tmp, * bh;
repeat:
if (bh = get_hash_table(dev,block))//查找哈希表 检查此前是否有程序把硬盘逻辑块已经读入缓冲区 这里返回空
return bh;
tmp = free_list;
do {
if (tmp->b_count) //tmp->b_count现在为0
continue;
if (!bh || BADNESS(tmp)<BADNESS(bh)) { //bh 为 0
bh = tmp;
if (!BADNESS(tmp)) //BADNESS(tmp)为00 取得空闲的缓冲块
break;
}
/* and repeat until we find something good */
} while ((tmp = tmp->b_next_free) != free_list);
if (!bh) { //现在不会出现没有获得空闲缓冲块的情况
sleep_on(&buffer_wait);
goto repeat;
}
wait_on_buffer(bh); //缓冲块没有加锁
if (bh->b_count) //现在还没有使用缓冲块
goto repeat;
while (bh->b_dirt) { //缓冲块的数据没有被修改
sync_dev(bh->b_dev);
wait_on_buffer(bh);
if (bh->b_count)
goto repeat;
}
/* NOTE!! While we slept waiting for this block, somebody else might */
/* already have added "this" block to the cache. check it */
if (find_buffer(dev,block)) //没有挂接到hash表中
goto repeat;
/* OK, FINALLY we know that this buffer is the only one of it's kind, */
/* and that it's unused (b_count=0), unlocked (b_lock=0), and clean */
bh->b_count=1; //占用
bh->b_dirt=0;
bh->b_uptodate=0;
remove_from_queues(bh); //将这个空闲快挂接到hash_table中
bh->b_dev=dev;
bh->b_blocknr=block;
insert_into_queues(bh);
return bh;
}
struct buffer_head * get_hash_table(int dev, int block)
{
struct buffer_head * bh;
for (;;) {
if (!(bh=find_buffer(dev,block))) //现在肯定没 返回NULL
return NULL;
bh->b_count++;
wait_on_buffer(bh);
if (bh->b_dev == dev && bh->b_blocknr == block)
return bh;
bh->b_count--;
}
}
#define _hashfn(dev,block) (((unsigned)(dev^block))%NR_HASH)
#define hash(dev,block) hash_table[_hashfn(dev,block)]
static struct buffer_head * find_buffer(int dev, int block)
{
struct buffer_head * tmp;
for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next) //tmp->b_next为空
if (tmp->b_dev==dev && tmp->b_blocknr==block)
return tmp;
return NULL;
}
创建了缓冲块后并初始化了缓冲块又挂接到hash_table说明缓冲块获取完毕 这时回到bread()函数执行 执行
ll_rw_block(READ,bh);
void ll_rw_block(int rw, struct buffer_head * bh)
{
unsigned int major;
if ((major=MAJOR(bh->b_dev)) >= NR_BLK_DEV || //判断缓冲块对应的设备是否存在
!(blk_dev[major].request_fn)) { //判断这个设备的请求函数是否挂接正常
printk("Trying to read nonexistent block-device\n\r");
return;
}
make_request(major,rw,bh); //将缓冲块与请求项建立关系
}
static void add_request(struct blk_dev_struct * dev, struct request * req)
{
struct request * tmp;
req->next = NULL;
cli();
if (req->bh)
req->bh->b_dirt = 0;
if (!(tmp = dev->current_request)) {
dev->current_request = req;
sti();
(dev->request_fn)(); //do_hd_request()
return;
}
for ( ; tmp->next ; tmp=tmp->next) //电梯算法
if ((IN_ORDER(tmp,req) ||
!IN_ORDER(tmp,tmp->next)) &&
IN_ORDER(req,tmp->next))
break;
req->next=tmp->next; //挂接请求项
tmp->next=req;
sti();
}
static void make_request(int major,int rw, struct buffer_head * bh)
{
struct request * req;
int rw_ahead;
/* WRITEA/READA is special case - it is not really needed, so if the */
/* buffer is locked, we just forget about it, else it's a normal read */
if (rw_ahead = (rw == READA || rw == WRITEA)) {
if (bh->b_lock)
return;
if (rw == READA)
rw = READ;
else
rw = WRITE;
}
if (rw!=READ && rw!=WRITE)
panic("Bad block dev command, must be R/W/RA/WA");
lock_buffer(bh); //加锁 防止在其他地方被使用
if ((rw == WRITE && !bh->b_dirt) || (rw == READ && bh->b_uptodate)) { //未被更新过
unlock_buffer(bh);
return;
}
repeat:
/* we don't allow the write-requests to fill up the queue completely:
* we want some room for reads: they take precedence. The last third
* of the requests are only for reads.
*/
if (rw == READ)
req = request+NR_REQUEST; //读从尾端开始
else
req = request+((NR_REQUEST*2)/3); //写从2/3开始
/* find an empty request */
while (--req >= request)//从后向前搜索空闲请求项
if (req->dev<0) //找到空闲请求项
break;
/* if none found, sleep on new requests: check for rw_ahead */
if (req < request) {
if (rw_ahead) {
unlock_buffer(bh);
return;
}
sleep_on(&wait_for_request);
goto repeat;
}
/* fill up the request-info, and add it to the queue */ //设置请求项
req->dev = bh->b_dev;
req->cmd = rw;
req->errors=0;
req->sector = bh->b_blocknr<<1;
req->nr_sectors = 2;
req->buffer = bh->b_data;
req->waiting = NULL;
req->bh = bh;
req->next = NULL;
add_request(major+blk_dev,req);
}
void do_hd_request(void)
{
int i,r;
unsigned int block,dev;
unsigned int sec,head,cyl;
unsigned int nsect;
INIT_REQUEST;
dev = MINOR(CURRENT->dev);
block = CURRENT->sector;
if (dev >= 5*NR_HD || block+2 > hd[dev].nr_sects) {
end_request(0);
goto repeat;
}
block += hd[dev].start_sect;
dev /= 5;
__asm__("divl %4":"=a" (block),"=d" (sec):"0" (block),"1" (0),
"r" (hd_info[dev].sect));
__asm__("divl %4":"=a" (cyl),"=d" (head):"0" (block),"1" (0),
"r" (hd_info[dev].head));
sec++;
nsect = CURRENT->nr_sectors;
if (reset) {
reset = 0;
recalibrate = 1;
reset_hd(CURRENT_DEV); //通过调用hd_out向硬盘发送WIN_SPECIFY命令 建立硬盘读盘必要的参数
return;
}
if (recalibrate) {
recalibrate = 0;
hd_out(dev,hd_info[CURRENT_DEV].sect,0,0,0,
WIN_RESTORE,&recal_intr); //将向硬盘发送WIN_RESTORE命令 将磁头移动到0柱面,以便从硬盘读取数据
return;
}
if (CURRENT->cmd == WRITE) {
hd_out(dev,nsect,sec,head,cyl,WIN_WRITE,&write_intr);
for(i=0 ; i<3000 && !(r=inb_p(HD_STATUS)&DRQ_STAT) ; i++)
/* nothing */ ;
if (!r) {
bad_rw_intr();
goto repeat;
}
port_write(HD_DATA,CURRENT->buffer,256);
} else if (CURRENT->cmd == READ) {
hd_out(dev,nsect,sec,head,cyl,WIN_READ,&read_intr); //WIN_READ &read_intr
} else
panic("unknown hd-command");
}
static void hd_out(unsigned int drive,unsigned int nsect,unsigned int sect,
unsigned int head,unsigned int cyl,unsigned int cmd,
void (*intr_addr)(void))
{
register int port asm("dx");
if (drive>1 || head>15)
panic("Trying to write bad sector");
if (!controller_ready())
panic("HD controller not ready");
do_hd = intr_addr; //read_intr
outb_p(hd_info[drive].ctl,HD_CMD);
port=HD_DATA;
outb_p(hd_info[drive].wpcom>>2,++port);
outb_p(nsect,++port);
outb_p(sect,++port);
outb_p(cyl,++port);
outb_p(cyl>>8,++port);
outb_p(0xA0|(drive<<4)|head,++port);
outb(cmd,++port);
}
这里就开始下达读盘命令了 将引导块的数据不断的读入缓存中 同时程序也返回 最后到bread();执行
wait_on_buffer(bh);
static inline void wait_on_buffer(struct buffer_head * bh)
{
cli();
while (bh->b_lock) //前面已经加锁
sleep_on(&bh->b_wait);
sti();
}
void sleep_on(struct task_struct **p)
{
struct task_struct *tmp;
if (!p)
return;
if (current == &(init_task.task))
panic("task[0] trying to sleep");
tmp = *p;
*p = current;
current->state = TASK_UNINTERRUPTIBLE; //进程1设置为不可中断等待状态 进程1被挂起
schedule(); //进程调转 这是切换到进程0进行运转
if (tmp)
tmp->state=0;
}
循环一段时间后 硬盘把一个扇区的数据读出来了 这时产生硬盘中断
_hd_interrupt:
pushl %eax
pushl %ecx
pushl %edx
push %ds
push %es
push %fs
movl $0x10,%eax
mov %ax,%ds
mov %ax,%es
movl $0x17,%eax
mov %ax,%fs
movb $0x20,%al
outb %al,$0xA0 # EOI to interrupt controller #1
jmp 1f # give port chance to breathe
1: jmp 1f
1: xorl %edx,%edx
xchgl _do_hd,%edx
testl %edx,%edx
jne 1f
movl $_unexpected_hd_interrupt,%edx
1: outb %al,$0x20
call *%edx # "interesting" way of handling intr.
pop %fs
pop %es
pop %ds
popl %edx
popl %ecx
popl %eax
iret
然后就执行read_intr函数 将已经读到硬盘缓存中的数据复制到刚才被锁定的那个缓冲块中 此时读取了512字节 还有512自己未读出 所以仍要绑定到硬盘中断服务程序上 以待下次使用
static void read_intr(void)
{
if (win_result()) {
bad_rw_intr();
do_hd_request();
return;
}
port_read(HD_DATA,CURRENT->buffer,256);
CURRENT->errors = 0;
CURRENT->buffer += 512;
CURRENT->sector++;
if (--CURRENT->nr_sectors) {
do_hd = &read_intr; //绑定
return;
}
end_request(1);
do_hd_request();
}
当1024字节的内容都读取完毕时 又会进入这个中断服务程序 这时就不进入if里 而是end_request(1);
extern inline void end_request(int uptodate)
{
DEVICE_OFF(CURRENT->dev);
if (CURRENT->bh) {
CURRENT->bh->b_uptodate = uptodate; //设置uptodate为1
unlock_buffer(CURRENT->bh); //解锁缓冲块
}
if (!uptodate) {
printk(DEVICE_NAME " I/O error\n\r");
printk("dev %04x, block %d\n\r",CURRENT->dev,
CURRENT->bh->b_blocknr);
}
wake_up(&CURRENT->waiting);
wake_up(&wait_for_request);
CURRENT->dev = -1;
CURRENT = CURRENT->next;
}
extern inline void unlock_buffer(struct buffer_head * bh)
{
if (!bh->b_lock)
printk(DEVICE_NAME ": free buffer being unlocked\n");
bh->b_lock=0;
wake_up(&bh->b_wait);
}
void wake_up(struct task_struct **p)
{
if (p && *p) {
(**p).state=0; //设置为就绪态 准备切换到进程1
*p=NULL;
}
}
切换到进程1后又返回到bread()函数中执行
if (bh->b_uptodate)
return bh;
这时b_uptodate为1 所以返回到sys_setup函数
for (drive=0 ; drive<NR_HD ; drive++) {
if (!(bh = bread(0x300 + drive*5,0))) {
printk("Unable to read partition table of drive %d\n\r",
drive);
panic("");
}
if (bh->b_data[510] != 0x55 || (unsigned char)
bh->b_data[511] != 0xAA) {
printk("Bad partition table on drive %d\n\r",drive);
panic("");
}
p = 0x1BE + (void *)bh->b_data;
for (i=1;i<5;i++,p++) { //根据引导块中的分区信息设置hd[]
hd[i+5*drive].start_sect = p->start_sect;
hd[i+5*drive].nr_sects = p->nr_sects;
}
brelse(bh); //释放缓冲块
}
if (NR_HD)
printk("Partition table%s ok.\n\r",(NR_HD>1)?"s":"");
rd_load();
mount_root();
return (0);
接下来是格式化虚拟盘并更换根设备为虚拟盘
现在根设备是软盘
void rd_load(void)
{
struct buffer_head *bh;
struct super_block s;
int block = 256; /* Start at block 256 */ //从256扇区开始
int i = 1;
int nblocks;
char *cp; /* Move pointer */
if (!rd_length)
return;
printk("Ram disk: %d bytes, starting at 0x%x\n", rd_length,
(int) rd_start);
if (MAJOR(ROOT_DEV) != 2)
return;
bh = breada(ROOT_DEV,block+1,block,block+2,-1); //读取三个块 256 257 258到虚拟盘中
if (!bh) {
printk("Disk error while looking for ramdisk!\n");
return;
}
*((struct d_super_block *) &s) = *((struct d_super_block *) bh->b_data);
brelse(bh); //释放缓冲块
if (s.s_magic != SUPER_MAGIC)
/* No ram disk image present, assume normal floppy boot */
return;
nblocks = s.s_nzones << s.s_log_zone_size;
if (nblocks > (rd_length >> BLOCK_SIZE_BITS)) {
printk("Ram disk image too big! (%d blocks, %d avail)\n",
nblocks, rd_length >> BLOCK_SIZE_BITS);
return;
}
printk("Loading %d bytes into ram disk... 0000k",
nblocks << BLOCK_SIZE_BITS);
cp = rd_start;
while (nblocks) { //将软盘上准备格式化用的根文件系统复制到虚拟盘上
if (nblocks > 2)
bh = breada(ROOT_DEV, block, block+1, block+2, -1);
else
bh = bread(ROOT_DEV, block);
if (!bh) {
printk("I/O error on block %d, aborting load\n",
block);
return;
}
(void) memcpy(cp, bh->b_data, BLOCK_SIZE); //把根文件系统数据从缓冲块拷贝到虚拟盘
brelse(bh);
printk("\010\010\010\010\010%4dk",i);
cp += BLOCK_SIZE;
block++;
nblocks--;
i++;
}
printk("\010\010\010\010\010done \n");
ROOT_DEV=0x0101; //设置虚拟盘为根设备
}
设置虚拟盘为根设备之后 在根设备加载根文件系统
主要三个步骤
1.复制根设备的超级块到super_block[8]中,将根设备中的根i节点挂在super_block[8]中对应根设备的超级块上
2.将驻留缓冲区中16个缓冲块的根设备逻辑位图,i节点位图分别挂载在super_block[8]中根设备超级块s_zmap[8],s_imap[8]上
3.将当前进程的pwd,root指针指向根设备的根i节点
void mount_root(void)
{
int i,free;
struct super_block * p;
struct m_inode * mi;
if (32 != sizeof (struct d_inode))
panic("bad i-node size");
for(i=0;i<NR_FILE;i++) //初始化file_table[64]
file_table[i].f_count=0;
if (MAJOR(ROOT_DEV) == 2) {
printk("Insert root floppy and press ENTER");
wait_for_keypress();
}
for(p = &super_block[0] ; p < &super_block[NR_SUPER] ; p++) {//初始化super_block[8]
p->s_dev = 0;
p->s_lock = 0;
p->s_wait = NULL;
}
if (!(p=read_super(ROOT_DEV))) //读取根设备的super_block结构
panic("Unable to mount root");
if (!(mi=iget(ROOT_DEV,ROOT_INO))) //获得根设备的根i节点
panic("Unable to read root i-node");
mi->i_count += 3 ; /* NOTE! it is logically used 4 times, not 1 */
p->s_isup = p->s_imount = mi;
current->pwd = mi; //设置当前进程的pwd,root指向根设备的根i节点
current->root = mi;
free=0;
i=p->s_nzones;
while (-- i >= 0)
if (!set_bit(i&8191,p->s_zmap[i>>13]->b_data))
free++;
printk("%d/%d free blocks\n\r",free,p->s_nzones);
free=0;
i=p->s_ninodes+1;
while (-- i >= 0)
if (!set_bit(i&8191,p->s_imap[i>>13]->b_data))
free++;
printk("%d/%d free inodes\n\r",free,p->s_ninodes);
}