关闭

linux0.12之内核代码之『深入追踪fork函数』

标签: fork内核
521人阅读 评论(0) 收藏 举报
分类:

在上一篇fork.c分析中简单分析了内核中fork的实现,那从用户层去分析fork函数的实现。
目前已经知道这是一个系统调用函数,看看能不能找到fork函数原型,很不幸花了十分又十分钟,还是没有找到。
但是在内核init中的main.c有调用fork函数,就以此为线索吧。

void main(void)  /* This really IS void, no error here. */
{    /* The startup routine assumes (well, ...) this */
/*
 * Interrupts are still disabled. Do necessary setups, then
 * enable them
 */
  ROOT_DEV = ORIG_ROOT_DEV;
  SWAP_DEV = ORIG_SWAP_DEV;
 sprintf(term, "TERM=con%dx%d", CON_COLS, CON_ROWS);
 envp[1] = term;    
 envp_rc[1] = term;
  drive_info = DRIVE_INFO;
 memory_end = (1<<20) + (EXT_MEM_K<<10);
 memory_end &= 0xfffff000;
 if (memory_end > 16*1024*1024)
  memory_end = 16*1024*1024;
 if (memory_end > 12*1024*1024) 
  buffer_memory_end = 4*1024*1024;
 else if (memory_end > 6*1024*1024)
  buffer_memory_end = 2*1024*1024;
 else
  buffer_memory_end = 1*1024*1024;
 main_memory_start = buffer_memory_end;
#ifdef RAMDISK
 main_memory_start += rd_init(main_memory_start, RAMDISK*1024);
#endif
 mem_init(main_memory_start,memory_end);
 trap_init();
 blk_dev_init();
 chr_dev_init();
 tty_init();
 time_init();
 sched_init();
 buffer_init(buffer_memory_end);
 hd_init();
 floppy_init();
 sti();
 move_to_user_mode();
 if (!fork()) {  /* we count on this going ok */
  init();
 }

最后有提到fork,但是利用怎么也找不到fork这个函数的定义,只在unistd.h中有声明。
那就搜索含有fork的关键字吧,
在main.c的最上面有

/*
 * we need this inline - forking from kernel space will result
 * in NO COPY ON WRITE (!!!), until an execve is executed. This
 * is no problem, but for the stack. This is handled by not letting
 * main() use the stack at all after fork(). Thus, no function
 * calls - which means inline code for fork too, as otherwise we
 * would use the stack upon exit from 'fork()'.
 *
 * Actually only pause and fork are needed inline, so that there
 * won't be any messing with the stack from main(), but we define
 * some others too.
 */
static inline _syscall0(int,fork)
static inline _syscall0(int,pause)
static inline _syscall1(int,setup,void *,BIOS)
static inline _syscall0(int,sync)

出现fork函数了这是一个宏调用,这是一个内联函数,

#define _syscall0(type,name) \
type name(void) \
{ \
long __res; \
__asm__ volatile ("int $0x80" \
 : "=a" (__res) \
 : "0" (__NR_##name)); \
if (__res >= 0) \
 return (type) __res; \
errno = -__res; \
return -1; \
}

将这个宏展开,就会得到fork的函数实现
来来展开看看

syscall0(int,fork)

int fork(void)
{
 long __res;    \
 __asm__ volatile ( "int  $0x80" \
      :"=a" (__res) \
      :"0"  (__NR_fork) \
      ); \
 if(__res >= 0)
  return (int) __res;
 errno = -__res;
 return -1;
}

int 0x80就是系统调用,将NR_fork调用号注册进去

#define __NR_fork   2

看看 int 0x80 会中断执行什么,
首先我们知道,linux0.12中会有一个中断描述符表(IDT),就类似于中断向量表,
那常规理解,肯定要去注册才能使用,
所以在sched.c调度代码中有一个初始化调度函数

void sched_init(void)
{
 int i;
 struct desc_struct * p;

 if (sizeof(struct sigaction) != 16)
  panic("Struct sigaction MUST be 16 bytes");
 set_tss_desc(gdt+FIRST_TSS_ENTRY,&(init_task.task.tss));
 set_ldt_desc(gdt+FIRST_LDT_ENTRY,&(init_task.task.ldt));
 p = gdt+2+FIRST_TSS_ENTRY;
 for(i=1;i<NR_TASKS;i++) {
  task[i] = NULL;
  p->a=p->b=0;
  p++;
  p->a=p->b=0;
  p++;
 }
/* Clear NT, so that we won't have troubles with that later on */
 __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl");
 ltr(0);
 lldt(0);
 outb_p(0x36,0x43);  /* binary, mode 3, LSB/MSB, ch 0 */
 outb_p(LATCH & 0xff , 0x40);   /* LSB */
 outb(LATCH >> 8 , 0x40);   /* MSB */
 set_intr_gate(0x20,&timer_interrupt);
 outb(inb_p(0x21)&~0x01,0x21);
 set_system_gate(0x80,&system_call);
}

其中set_system_gate(0x80,&system_call);,很吊呀,
很像呀,
看看这个函数是什么gui,在system.h中,是一个宏

#define set_system_gate(n,addr) \
 _set_gate(&idt[n],15,3,addr)
将里面的宏再展开
#define _set_gate(gate_addr,type,dpl,addr) \
__asm__ ("movw %%dx,%%ax\n\t" \
 "movw %0,%%dx\n\t" \
 "movl %%eax,%1\n\t" \
 "movl %%edx,%2" \
 : \
 : "i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
 "o" (*((char *) (gate_addr))), \
 "o" (*(4+(char *) (gate_addr))), \
 "d" ((char *) (addr)),"a" (0x00080000))

大致就是将addr地址注册到idt第0x80表项中。

所以到现在 明白了一点
执行int 0x80系统就会去调用 system_call函数。(其实这就是常规的中断调用机制)

接下来回到 fork函数
在调用int 0x80时 ,还传递了一个参数放到eax中,
看看system_call是什麽gui,在sys_call.中

_system_call:
 push %ds
 push %es
 push %fs
 pushl %eax                       # save the orig_eax
 pushl %edx  
 pushl %ecx                       # push %ebx,%ecx,%edx as parameters
 pushl %ebx                       # to the system call
 movl $0x10,%edx             # set up ds,es to kernel space
 mov %dx,%ds
 mov %dx,%es
 movl $0x17,%edx             # fs points to local data space
 mov %dx,%fs
 cmpl _NR_syscalls,%eax
 jae bad_sys_call
 call _sys_call_table(,%eax,4)
 pushl %eax
其中这几句是关键
 cmpl _NR_syscalls,%eax
 jae bad_sys_call
 call _sys_call_table(,%eax,4)
在include/linux/sys.h中的
/* So we don't have to do any more manual updating.... */
int NR_syscalls = sizeof(sys_call_table)/sizeof(fn_ptr);

fn_ptr sys_call_table[] = { sys_setup, sys_exit, sys_fork, sys_read,
sys_write, sys_open, sys_close, sys_waitpid, sys_creat, sys_link,
sys_unlink, sys_execve, sys_chdir, sys_time, sys_mknod, sys_chmod,
sys_chown, sys_break, sys_stat, sys_lseek, sys_getpid, sys_mount,
sys_umount, sys_setuid, sys_getuid, sys_stime, sys_ptrace, sys_alarm,
sys_fstat, sys_pause, sys_utime, sys_stty, sys_gtty, sys_access,
sys_nice, sys_ftime, sys_sync, sys_kill, sys_rename, sys_mkdir,
sys_rmdir, sys_dup, sys_pipe, sys_times, sys_prof, sys_brk, sys_setgid,
sys_getgid, sys_signal, sys_geteuid, sys_getegid, sys_acct, sys_phys,
sys_lock, sys_ioctl, sys_fcntl, sys_mpx, sys_setpgid, sys_ulimit,
sys_uname, sys_umask, sys_chroot, sys_ustat, sys_dup2, sys_getppid,
sys_getpgrp, sys_setsid, sys_sigaction, sys_sgetmask, sys_ssetmask,
sys_setreuid,sys_setregid, sys_sigsuspend, sys_sigpending, sys_sethostname,
sys_setrlimit, sys_getrlimit, sys_getrusage, sys_gettimeofday, 
sys_settimeofday, sys_getgroups, sys_setgroups, sys_select, sys_symlink,
sys_lstat, sys_readlink, sys_uselib };
在sched.h中
typedef int (*fn_ptr)();

从这三个片段代码知道,sys_call_table就是一张函数指针数组,NR_syscalls就是用于计算数组元素的个数。
在system_call中怎么call一个数组呢,其实 call _sys_call_table(,%eax,4)
是这样的调用地址=sys_call_table + %eax*4;乘以四是因为,每个数组元素占4个字节(函数地址么)。
所以

cmpl _NR_syscalls,%eax
 jae bad_sys_call
 call _sys_call_table(,%eax,4)

的功能就是,判断调用参数是否超出系统调用的最大调用号,如果正常,调用传入调用号对应的系统函数。
整体流程为
这里写图片描述
下面的任务就是 研究sys_fork这个系统函数了.还在sys_call.s中

.align 2
_sys_fork:
 call _find_empty_process
 testl %eax,%eax
 js 1f
 push %gs
 pushl %esi
 pushl %edi
 pushl %ebp
 pushl %eax
 call _copy_process
 addl $20,%esp
1:  ret 

这里调用了两个函数 这在我上一篇已经分析了
http://blog.csdn.net/u010442328/article/details/46773737

2
0

查看评论
* 以上用户言论只代表其个人观点,不代表CSDN网站的观点或立场
    个人资料
    • 访问:61411次
    • 积分:2074
    • 等级:
    • 排名:第19165名
    • 原创:136篇
    • 转载:8篇
    • 译文:0篇
    • 评论:7条
    最新评论