1. 几个重要概念
- 系统调用和库函数
- 同步中断(异常)和异步中断(中断)
- 中断向量(0~255),中断号,中断处理程序,中断描述符表(IDT)
2. trap_init()
- called-by: init/main.c:start_kernel
- loc: arch/x86/kernel/traps.c:824
系统调用入口:
...
870 #ifdef CONFIG_X86_32
871 set_system_trap_gate(IA32_SYSCALL_VECTOR, entry_INT80_32);
872 set_bit(IA32_SYSCALL_VECTOR, used_vectors);
873 #endif
...
其中,arch/x86/include/asm/irq_vectors.h:
#define IA32_SYSCALL_VECTOR 0x80
entry_INIT80_32就是系统调用的入口,汇编写的暂时也看不懂,arch/x86/entry/entry_32.S:
360 ENTRY(entry_INT80_32)
361 ASM_CLAC
362 pushl %eax # save orig_eax
363 SAVE_ALL
364 GET_THREAD_INFO(%ebp)
365 # system call tracing in operation / emulation
366 testl $_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp)
367 jnz syscall_trace_entry
368 cmpl $(NR_syscalls), %eax
369 jae syscall_badsys
370 syscall_call:
371 call *sys_call_table(, %eax, 4)
...
451 ENDPROC(entry_INT80_32)
不过,371行是关键,sys_call_table就是系统调用表,不过初始化已经不像过去那样直接,现在如果要修改系统调用表,好像只需要修改模板文件,编译时会调用脚本进行处理,arch/x86/entry/syscalls/syscall_64.tbl:
1 #
2 # 64-bit system call numbers and entry vectors
3 #
4 # The format is:
5 # <number> <abi> <name> <entry point>
6 #
7 # The abi is "common", "64" or "x32" for this file.
8 #
9 0 common read sys_read
10 1 common write sys_write
...
sys_write类似函数的*声明*出现在include/uapi/asm-generic/unistd.h:
197 /* fs/read_write.c */
198 #define __NR3264_lseek 62
199 __SC_3264(__NR3264_lseek, sys_llseek, sys_lseek)
200 #define __NR_read 63
201 __SYSCALL(__NR_read, sys_read)
202 #define __NR_write 64
203 __SYSCALL(__NR_write, sys_write)
而它们的定义则出现在fs/read_write.c:
577 SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
578 size_t, count)
579 {
580 struct fd f = fdget_pos(fd);
581 ssize_t ret = -EBADF;
582
583 if (f.file) {
584 loff_t pos = file_pos_read(f.file);
585 ret = vfs_write(f.file, buf, count, &pos);
586 if (ret >= 0)
587 file_pos_write(f.file, pos);
588 fdput_pos(f);
589 }
590
591 return ret;
592 }
SYSCALL_DEFINE3当然又是宏定义了,在include/linux/syscalls.h中:
184 #define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)
189 #define SYSCALL_DEFINEx(x, sname, ...) \
190 SYSCALL_METADATA(sname, x, __VA_ARGS__) \
191 __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
194 #define __SYSCALL_DEFINEx(x, name, ...) \
195 asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \
196 __attribute__((alias(__stringify(SyS##name)))); \
197 static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
198 asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
199 asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
200 { \
201 long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \
202 __MAP(x,__SC_TEST,__VA_ARGS__); \
203 __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \
204 return ret; \
205 } \
TMD,看到这里真是醉了,之所以把宏定义用的淋漓尽致,都是为了支持多种体系结构吧。
3. system_call_fastpath
566 # TASK-PID CPU# |||| TIMESTAMP FUNCTION
567 # | | | |||| | |
568 bash-1977 [000] .... 17284.993652: sys_close <-system_call_fastpath
见过这个函数吧?这个怪物暂时不明来历,grep没有抓到任何定义,只有这些:
Documentation/trace/ftrace.txt:574: bash-1977 [000] .... 17284.993652: sys_close <-system_call_fastpath
Documentation/trace/ftrace.txt:583: sshd-1974 [003] .... 17284.993658: sys_select <-system_call_fastpath
Documentation/trace/ftrace.txt:597:called this function "system_call_fastpath". The timestamp is the time
Documentation/trace/ftrace.txt:640: => system_call_fastpath
Documentation/trace/ftrace.txt:1053: => system_call_fastpath
Documentation/trace/ftrace.txt:1276: => system_call_fastpath
Documentation/trace/ftrace.txt:1376: => system_call_fastpath
Documentation/trace/ftrace.txt:2194: usleep-2665 [001] .... 4186.475355: sys_nanosleep <-system_call_fastpath
Documentation/trace/ftrace.txt:2575: bash-1994 [000] .... 5281.568967: sys_dup2 <-system_call_fastpath
Documentation/trace/ftrace.txt:2888: 17) 128 128 system_call_fastpath+0x16/0x1b
Documentation/kasan.txt:63: system_call_fastpath+0x12/0x17
Documentation/kasan.txt:104: [<ffffffff81cd3129>] system_call_fastpath+0x12/0x17
但是,比较老的内和版本3.0抓到了点东西:
arch/x86/kernel/entry_64.S:492:system_call_fastpath:
arch/x86/kernel/entry_64.S:573: jmp system_call_fastpath
搞不懂,先这样吧