在我们日常使用的printf函数里面,大家也许只知道它的功能在输出一堆数据到你的眼前,但是在Linux中它具体是怎么实现的呢?
通过Linux kernel 0.11版本的代码,我们可以看到,应用程序调用printf函数时,printf函数会调用库函数的printf函数,而库函数的printf函数则会调用库的write函数,然而到这了程序依旧没有进入操作系统的内核,那么我们该如何才能进入操作系统内核呢?
操作系统给用户进入内核的唯一方式就是调用中断,在这write函数中调用了宏_syscall3,_syscall3这个宏还有两个表哥,分别为_syscall1, _syscall2,显而易见这三个宏是根据所需要传入的参数的个数来具体调用的,其中type是返回值类型,name是函数名字,atype是第一个参数的类型,a是第一个参数的名字,以此类推。大家请看我下面:
#define _syscall1(type,name,atype,a) \
type name(atype a) \
{ \
long __res; \
__asm__ volatile ("int $0x80" \
: "=a" (__res) \
: "0" (__NR_##name),"b" ((long)(a))); \
if (__res >= 0) \
return (type) __res; \
errno = -__res; \
return -1; \
}
#define _syscall2(type,name,atype,a,btype,b) \
type name(atype a,btype b) \
{ \
long __res; \
__asm__ volatile ("int $0x80" \
: "=a" (__res) \
: "0" (__NR_##name),"b" ((long)(a)),"c" ((long)(b))); \
if (__res >= 0) \
return (type) __res; \
errno = -__res; \
return -1; \
}
#define _syscall3(type,name,atype,a,btype,b,ctype,c) \
type name(atype a,btype b,ctype c) \
{ \
long __res; \
__asm__ volatile ("int $0x80" \
: "=a" (__res) \
: "0" (__NR_##name),"b" ((long)(a)),"c" ((long)(b)),"d" ((long)(c))); \
if (__res>=0) \
return (type) __res; \
errno=-__res; \
return -1; \
}
由于write函数原型为int write(int fd, char* buf, int count);有三个参数跟一个返回值,所以调用_syscall3,在_syscall3中可以看到内嵌汇编代码,在执行int 0x80之前需要往寄存器里面传值,' = '表示将执行之后的返回值返回给__res作为返回值,' =a '中的a表示给寄存器eax中传入__NR_##name,由于传入的name为write,所以将宏__NR_write的值传给eax,而ebx,ecx,edx中分别保存了函数的三个参数。传完参之后开始执行int 0x80。
注:宏__NR_write为功能号,在调用系统中断之前,得先给系统中断传值,不仅要将参数传给中断,还有将调用中断的函数传给中断。Linux 0.11的所有功能号(在Unistd.h文件中查看到)在文章结尾处列出。
OK在调用完int 0x80之后,需要去查IDT表。通过查询表中的段选择符,处理函数入口点偏移,进入system_call函数。。。
注:IDT表是在系统启动的时候进行初始化的。而0x80这个中断的初始化是在Sched.c文件中通过调用set_system_gate(0x80,&system_call);完成初始化。
#define set_system_gate(n,addr) \
_set_gate(&idt[n],15,3,addr)
#define _set_gate(gate_addr,type,dpl,addr) \
__asm__ ("movw %%dx,%%ax\n\t" \
"movw %0,%%dx\n\t" \
"movl %%eax,%1\n\t" \
"movl %%edx,%2" \
: \
: "i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
"o" (*((char *) (gate_addr))), \
"o" (*(4+(char *) (gate_addr))), \
"d" ((char *) (addr)),"a" (0x00080000))
通过代码可以看到,gate_addr是中断0x80的地址,type=15表示此中断号对应的是陷阱门,dpl=3让内核的dpl与用户的cpl相等,从而使得用户可以访问内核,addr=&system_call,这的system_call是一个汇编的子程序。初始化的时候先传参,'o'表示可以作为寻址时的基址使用,edx的低16位先存放了system_call的地址,然后把dx(也就是edx的低16位)的值传给ax(此时eax的高16位为0x0008,低16位为system_call的地址),再给dx传0xE700(E700表示的是1110 0111 0000 0000 其中 11表示DPL设置成3,111表示的是type),之后将eax的值传给IDT表中0x80对应的位置(上图中的第二行),将edx传给之后的位置(上图中的第一行)这样就完成的中断向量表中的0x80的初始化。。。。。。。注:这个过程是在计算机启动的时候完成的。
在查完表之后就要进入system_call函数了(注:eax里面保存的是调用者功能号,别跟初始化中断那部分弄混淆了)
sa_handler = 0
sa_mask = 4
sa_flags = 8
sa_restorer = 12
nr_system_calls = 72
/*
* Ok, I get parallel printer interrupts while using the floppy for some
* strange reason. Urgel. Now I just ignore them.
*/
.globl system_call,sys_fork,timer_interrupt,sys_execve
.globl hd_interrupt,floppy_interrupt,parallel_interrupt
.globl device_not_available, coprocessor_error
.align 2
bad_sys_call:
movl $-1,%eax
iret
.align 2
reschedule:
pushl $ret_from_sys_call
jmp schedule
.align 2
system_call:
//做参数判断
cmpl $nr_system_calls-1,%eax
ja bad_sys_call
// 做系统调用前的寄存器入栈操作
push %ds
push %es
push %fs
pushl %edx
pushl %ecx # push %ebx,%ecx,%edx as parameters
pushl %ebx # to the system call
movl $0x10,%edx # set up ds,es to kernel space
mov %dx,%ds
mov %dx,%es
movl $0x17,%edx # fs points to local data space
mov %dx,%fs
// 调用系统调用实现函数
call sys_call_table(,%eax,4)
pushl %eax
movl current,%eax
cmpl $0,state(%eax) # state
jne reschedule
cmpl $0,counter(%eax) # counter
je reschedule
这么长的汇编代码,我看着都头疼,给大家讲重点吧,0x10是内核的数据段,这的call sys_call_table(,%eax,4)这段代码是去查sys_call_table这个数组,查询的地址为sys_call_table的地址+(4 * eax中功能号的值)
fn_ptr sys_call_table[] =
{
sys_setup, sys_exit, sys_fork, sys_read, sys_write, sys_open,
sys_close, sys_waitpid, sys_creat, sys_link, sys_unlink, sys_execve,
sys_chdir, sys_time, sys_mknod, sys_chmod, sys_break, sys_stat,
sys_lseek, sys_getpid, sys_mount, sys_chown, sys_umount, sys_setuid,
sys_getuid, sys_stime, sys_ptrace, sys_alarm, sys_fstat, sys_pause,
sys_utime, sys_stty, sys_gtty, sys_access, sys_nice, sys_ftime,
sys_sync, sys_kill, sys_rename, sys_mkdir, sys_rmdir, sys_dup,
sys_pipe, sys_times, sys_prof, sys_brk, sys_setgid, sys_getgid,
sys_signal, sys_geteuid, sys_getegid, sys_acct, sys_phys, sys_lock,
sys_ioctl, sys_fcntl, sys_mpx, sys_setpgid, sys_ulimit, sys_uname,
sys_umask, sys_chroot, sys_ustat, sys_dup2, sys_getppid, sys_getpgrp,
sys_setsid, sys_sigaction,sys_sgetmask, sys_ssetmask,sys_setreuid, sys_setregid
};
查询完sys_table_table之后就要去执行sys_write函数了。。。。。接口的故事就完了,sys_write在I/O操作中继续
奉上功能号:
#define __NR_setup 0 /* used only by init, to get system going */
#define __NR_exit 1
#define __NR_fork 2
#define __NR_read 3
#define __NR_write 4
#define __NR_open 5
#define __NR_close 6
#define __NR_waitpid 7
#define __NR_creat 8
#define __NR_link 9
#define __NR_unlink 10
#define __NR_execve 11
#define __NR_chdir 12
#define __NR_time 13
#define __NR_mknod 14
#define __NR_chmod 15
#define __NR_chown 16
#define __NR_break 17
#define __NR_stat 18
#define __NR_lseek 19
#define __NR_getpid 20
#define __NR_mount 21
#define __NR_umount 22
#define __NR_setuid 23
#define __NR_getuid 24
#define __NR_stime 25
#define __NR_ptrace 26
#define __NR_alarm 27
#define __NR_fstat 28
#define __NR_pause 29
#define __NR_utime 30
#define __NR_stty 31
#define __NR_gtty 32
#define __NR_access 33
#define __NR_nice 34
#define __NR_ftime 35
#define __NR_sync 36
#define __NR_kill 37
#define __NR_rename 38
#define __NR_mkdir 39
#define __NR_rmdir 40
#define __NR_dup 41
#define __NR_pipe 42
#define __NR_times 43
#define __NR_prof 44
#define __NR_brk 45
#define __NR_setgid 46
#define __NR_getgid 47
#define __NR_signal 48
#define __NR_geteuid 49
#define __NR_getegid 50
#define __NR_acct 51
#define __NR_phys 52
#define __NR_lock 53
#define __NR_ioctl 54
#define __NR_fcntl 55
#define __NR_mpx 56
#define __NR_setpgid 57
#define __NR_ulimit 58
#define __NR_uname 59
#define __NR_umask 60
#define __NR_chroot 61
#define __NR_ustat 62
#define __NR_dup2 63
#define __NR_getppid 64
#define __NR_getpgrp 65
#define __NR_setsid 66
#define __NR_sigaction 67
#define __NR_sgetmask 68
#define __NR_ssetmask 69
#define __NR_setreuid 70
#define __NR_setregid 71