其实一直都有想知道系统调用,到底是怎么样跑的。最近有想看下linux source code, 太复杂看不懂。所以就从linux 0.11先开始吧。看了某部分之和,有所得有所思就在此记录给自己看看。
以下的内容,是在看code的过程中,产生的疑惑,和解掉疑惑之和的一个记录。当中参考了《linux内核设计的艺术》,《Linux内核0.11(0.95)完全注释 v3.0》。
以fork为例,看看fork最终会如何跑。
int fork(void);
static inline _syscall0(int,fork)
static inline _syscall0(int,pause)
1. _syscall0 (疑惑点1. 记号粘贴操作符##的使用)
#define _syscall0(type,name) \
type name(void) \
{ \
long __res; \
__asm__ volatile ("int $0x80" \
: "=a" (__res) \
: "0" (__NR_##name)); \
if (__res >= 0) \
return (type) __res; \
errno = -__res; \
return -1; \
}
static inline _syscall0(int,fork)
2. 展开_syscall0(int,fork)之和来看fork实现。
int fork(void)
{ \
long __res; \
__asm__ volatile ("int $0x80" \
: "=a" (__res) \
: "0" (__NR_fork)); \
if (__res >= 0) \
return (type) __res; \
errno = -__res; \
return -1; \
}
#define __NR_fork 2
3. "int $0x80"是软中断,为系统调用的总接口,这意味着系统调用都会从这里开始。(疑惑点2,c语言中的函数在汇编中函数名会有变化,system_call变为_system_call)
sched_init中会调用set_system_gate(0x80,&system_call);,-routines for the 'fork' system call * (see also system_call.s),
_sys_call_table(,%eax,4)会根据传入__NR_fork参数定位到sys_fork,可以在system_call.c中找到_sys_fork的实现。
_system_call:
cmpl $nr_system_calls-1,%eax
ja bad_sys_call
push %ds
push %es
push %fs
pushl %edx
pushl %ecx # push %ebx,%ecx,%edx as parameters
pushl %ebx # to the system call
movl $0x10,%edx # set up ds,es to kernel space
mov %dx,%ds
mov %dx,%es
movl $0x17,%edx # fs points to local data space
mov %dx,%fs
call _sys_call_table(,%eax,4)
pushl %eax
movl _current,%eax
cmpl $0,state(%eax) # state
jne reschedule
cmpl $0,counter(%eax) # counter
je reschedule
ret_from_sys_call:
movl _current,%eax # task[0] cannot have signals
cmpl _task,%eax
je 3f
cmpw $0x0f,CS(%esp) # was old code segment supervisor ?
jne 3f
cmpw $0x17,OLDSS(%esp) # was stack segment = 0x17 ?
jne 3f
movl signal(%eax),%ebx
movl blocked(%eax),%ecx
notl %ecx
andl %ebx,%ecx
bsfl %ecx,%ecx
je 3f
btrl %ecx,%ebx
movl %ebx,signal(%eax)
incl %ecx
pushl %ecx
call _do_signal
popl %eax
3: popl %eax
popl %ebx
popl %ecx
popl %edx
pop %fs
pop %es
pop %ds
iret
fn_ptr sys_call_table[] = { sys_setup, sys_exit, sys_fork, sys_read,
sys_write, sys_open, sys_close, sys_waitpid, sys_creat, sys_link,
sys_unlink, sys_execve, sys_chdir, sys_time, sys_mknod, sys_chmod,
sys_chown, sys_break, sys_stat, sys_lseek, sys_getpid, sys_mount,
sys_umount, sys_setuid, sys_getuid, sys_stime, sys_ptrace, sys_alarm,
sys_fstat, sys_pause, sys_utime, sys_stty, sys_gtty, sys_access,
sys_nice, sys_ftime, sys_sync, sys_kill, sys_rename, sys_mkdir,
sys_rmdir, sys_dup, sys_pipe, sys_times, sys_prof, sys_brk, sys_setgid,
sys_getgid, sys_signal, sys_geteuid, sys_getegid, sys_acct, sys_phys,
sys_lock, sys_ioctl, sys_fcntl, sys_mpx, sys_setpgid, sys_ulimit,
sys_uname, sys_umask, sys_chroot, sys_ustat, sys_dup2, sys_getppid,
sys_getpgrp, sys_setsid, sys_sigaction, sys_sgetmask, sys_ssetmask,
sys_setreuid,sys_setregid };
4. _sys_fork的实现,其中重要的两个函数find_empty_process和copy_process在fork.c中。其余部分主要为汇编的参数传递。
.align 2
_sys_fork:
call _find_empty_process
testl %eax,%eax
js 1f
push %gs
pushl %esi
pushl %edi
pushl %ebp
pushl %eax
call _copy_process
addl $20,%esp
1: ret
用于辅助理解的system_call.s的注释,尤其是stack的相关汇编操作需要用到。
* Stack layout in 'ret_from_system_call':
*
* 0(%esp) - %eax
* 4(%esp) - %ebx
* 8(%esp) - %ecx
* C(%esp) - %edx
* 10(%esp) - %fs
* 14(%esp) - %es
* 18(%esp) - %ds
* 1C(%esp) - %eip
* 20(%esp) - %cs
* 24(%esp) - %eflags
* 28(%esp) - %oldesp
* 2C(%esp) - %oldss
看了fork这样的系统调用之和,其他的系统调用大同小异。为了辅助弄懂这个函数的执行过程,还需要继续学习下汇编。当前为了不打断学习,先把整体框架,看完,之和再慢慢补充学习中需要弄清楚的东西。