一、linux内核体系结构
1、内核模式与体系结构
操作系统的工作方式:
1、把操作系统从用户态切换到内核态(用户应用程序 到 内核的流程)
2、实现操作系统的系统调用(操作系统的服务层)
3、应用操作系统提供的底层函数,进行功能实现
3、1操作系统的驱动结构
4、退出后从内核态切换到用户态
上面的这个也算一种分层吧,但是我还是比较喜欢下面这一种分层结构。
操作系统内核中各级模块的依赖关系:
linux内核整体模块
1、进程调度模块 2、内存管理模块 3、文件系统模块
4、进程通信模块 5、驱动管理模块
(又一种分类)
每个模块之间的关系
1、内存管理和驱动管理模块 虚拟内存的缓存和回存机制
2、VFS虚拟文件系统 把硬件当成文件来进行使用
linux操作系统的独立性
(要注意的是文件系统与虚拟文件系统 文件系统范围很广,linux中用的是一种文件系统叫虚拟文件系统)
在低版本到高版本的升级过程中
进程的调度算法发生了改变 进程的管理方式没有巨大的改变
内核驱动的种类发生了改变 内核驱动的管理模式没有巨大的改变
二、内核中断体系概括
1、linux的中断机制
1、硬件的中断响应 ---------内核驱动中的中断
2、系统调用的函数相应(sys_call)------系统调用
3、自定义中断--------软件的软中断模式
4、信号中断(kill -signal_number)-----信号的使用创建
5、系统的异常和错误----------系统的异常获取 了解系统异常的作用 异常不代表错误
分类
硬中断、软中断
硬中断:由电脑主机中的8259A类似的硬件中断控制芯片发出的中断
再类似于ARM中断控制器发出的中断
软中断:异常 第一类 CPU自行保留的中断
第二类 系统调用异常
代码结构
中断在哪里
linux0.1.1来学习 代码在github上有 linux-0.1.1
kernal文件夹 asm.s trap.c
system_call.s fork.c signal.c exit.c sys.c
2、中断的工作流程
做CPU工作模式的转化
进行寄存器的拷贝和压栈
设置中断异常向量表
保存正常运行的函数返回值
跳转到对应的中断服务函数上运行
进行模式的复原以及寄存器的复原
跳转回正常工作的函数地址继续运行
用代码来讲讲
linux中中断的工作流程
1、将所有的寄存器值入栈
ss寄存器 EFLAGS寄存器 esp寄存器 cs寄存器 eip寄存器 (错误码)
2、将异常码入栈
异常码(中断号)
3、将当前的函数返回值进行入栈
(为了在中断执行后能够找到在哪中断的,能够复原)
---------------------------------------------------------------------------保存
4、调用对应的中断服务函数
---------------------------------------------------------------------------执行
5、出栈函数返回值
6、返回所有入栈的寄存器值
(异常码就不要了)
---------------------------------------------------------------------------复原
如图所示吧
中断前处理过程 中断的恢复过程 中断的执行过程
asm.s trap.c 硬件中断的处理过程
system_call.s fork.c signal.c exit.c sys.c 软件及系统调用的处理过程
3、中断的代码实现
asm.s trap.c 分析一下。
asm.s 1991 linux写的
又是一幅图
真正的原代码在这里。
pushl不提
然后do_divide_error是C语言一个函数,这个函数在trap.c里面
又发现
无错误码与有错误码,这是个啥意思?
压入do_divide_error之后,xchgl指令是交换指令,把刚刚压好的do那个给了eax
然后就是一堆压栈。
调用C函数,然后把之前压入的全部弹出。
popl 弹出32 pop弹出16
所以我们看的出来不同的中断其实就是将相应的C处理函数地址压入栈以后,调用no_error_code来进行一个处理。前面的divide_error跟后面的error_code就是说我们在中断的过程中出错啦。
而trap.c呢,就是说我们的一个C函数的具体实现过程。
里面放着各种的函数。
其实代码不长,把他直接拉下来算了
asm.s
/*
* linux/kernel/asm.s
*
* (C) 1991 Linus Torvalds
*/
/*
* asm.s contains the low-level code for most hardware faults.
* page_exception is handled by the mm, so that isn't here. This
* file also handles (hopefully) fpu-exceptions due to TS-bit, as
* the fpu must be properly saved/resored. This hasn't been tested.
*/
.globl _divide_error,_debug,_nmi,_int3,_overflow,_bounds,_invalid_op
.globl _double_fault,_coprocessor_segment_overrun
.globl _invalid_TSS,_segment_not_present,_stack_segment
.globl _general_protection,_coprocessor_error,_irq13,_reserved
_divide_error:
pushl $_do_divide_error
no_error_code:
xchgl %eax,(%esp)
pushl %ebx
pushl %ecx
pushl %edx
pushl %edi
pushl %esi
pushl %ebp
push %ds
push %es
push %fs
pushl $0 # "error code"
lea 44(%esp),%edx
pushl %edx
movl $0x10,%edx
mov %dx,%ds
mov %dx,%es
mov %dx,%fs
call *%eax
addl $8,%esp
pop %fs
pop %es
pop %ds
popl %ebp
popl %esi
popl %edi
popl %edx
popl %ecx
popl %ebx
popl %eax
iret
_debug:
pushl $_do_int3 # _do_debug
jmp no_error_code
_nmi:
pushl $_do_nmi
jmp no_error_code
_int3:
pushl $_do_int3
jmp no_error_code
_overflow:
pushl $_do_overflow
jmp no_error_code
_bounds:
pushl $_do_bounds
jmp no_error_code
_invalid_op:
pushl $_do_invalid_op
jmp no_error_code
_coprocessor_segment_overrun:
pushl $_do_coprocessor_segment_overrun
jmp no_error_code
_reserved:
pushl $_do_reserved
jmp no_error_code
_irq13:
pushl %eax
xorb %al,%al
outb %al,$0xF0
movb $0x20,%al
outb %al,$0x20
jmp 1f
1: jmp 1f
1: outb %al,$0xA0
popl %eax
jmp _coprocessor_error
_double_fault:
pushl $_do_double_fault
error_code:
xchgl %eax,4(%esp) # error code <-> %eax
xchgl %ebx,(%esp) # &function <-> %ebx
pushl %ecx
pushl %edx
pushl %edi
pushl %esi
pushl %ebp
push %ds
push %es
push %fs
pushl %eax # error code
lea 44(%esp),%eax # offset
pushl %eax
movl $0x10,%eax
mov %ax,%ds
mov %ax,%es
mov %ax,%fs
call *%ebx
addl $8,%esp
pop %fs
pop %es
pop %ds
popl %ebp
popl %esi
popl %edi
popl %edx
popl %ecx
popl %ebx
popl %eax
iret
_invalid_TSS:
pushl $_do_invalid_TSS
jmp error_code
_segment_not_present:
pushl $_do_segment_not_present
jmp error_code
_stack_segment:
pushl $_do_stack_segment
jmp error_code
_general_protection:
pushl $_do_general_protection
jmp error_code
trap.c也就两百多行。
/*
* linux/kernel/traps.c
*
* (C) 1991 Linus Torvalds
*/
/*
* 'Traps.c' handles hardware traps and faults after we have saved some
* state in 'asm.s'. Currently mostly a debugging-aid, will be extended
* to mainly kill the offending process (probably by giving it a signal,
* but possibly by killing it outright if necessary).
*/
#include <string.h>
#include <linux/head.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <asm/system.h>
#include <asm/segment.h>
#include <asm/io.h>
#define get_seg_byte(seg,addr) ({ \
register char __res; \
__asm__("push %%fs;mov %%ax,%%fs;movb %%fs:%2,%%al;pop %%fs" \
:"=a" (__res):"0" (seg),"m" (*(addr))); \
__res;})
#define get_seg_long(seg,addr) ({ \
register unsigned long __res; \
__asm__("push %%fs;mov %%ax,%%fs;movl %%fs:%2,%%eax;pop %%fs" \
:"=a" (__res):"0" (seg),"m" (*(addr))); \
__res;})
#define _fs() ({ \
register unsigned short __res; \
__asm__("mov %%fs,%%ax":"=a" (__res):); \
__res;})
int do_exit(long code);
void page_exception(void);
void divide_error(void);
void debug(void);
void nmi(void);
void int3(void);
void overflow(void);
void bounds(void);
void invalid_op(void);
void device_not_available(void);
void double_fault(void);
void coprocessor_segment_overrun(void);
void invalid_TSS(void);
void segment_not_present(void);
void stack_segment(void);
void general_protection(void);
void page_fault(void);
void coprocessor_error(void);
void reserved(void);
void parallel_interrupt(void);
void irq13(void);
static void die(char * str,long esp_ptr,long nr)
{
long * esp = (long *) esp_ptr;
int i;
printk("%s: %04x\n\r",str,nr&0xffff);
printk("EIP:\t%04x:%p\nEFLAGS:\t%p\nESP:\t%04x:%p\n",
esp[1],esp[0],esp[2],esp[4],esp[3]);
printk("fs: %04x\n",_fs());
printk("base: %p, limit: %p\n",get_base(current->ldt[1]),get_limit(0x17));
if (esp[4] == 0x17) {
printk("Stack: ");
for (i=0;i<4;i++)
printk("%p ",get_seg_long(0x17,i+(long *)esp[3]));
printk("\n");
}
str(i);
printk("Pid: %d, process nr: %d\n\r",current->pid,0xffff & i);
for(i=0;i<10;i++)
printk("%02x ",0xff & get_seg_byte(esp[1],(i+(char *)esp[0])));
printk("\n\r");
do_exit(11); /* play segment exception */
}
void do_double_fault(long esp, long error_code)
{
die("double fault",esp,error_code);
}
void do_general_protection(long esp, long error_code)
{
die("general protection",esp,error_code);
}
void do_divide_error(long esp, long error_code)
{
die("divide error",esp,error_code);
}
void do_int3(long * esp, long error_code,
long fs,long es,long ds,
long ebp,long esi,long edi,
long edx,long ecx,long ebx,long eax)
{
int tr;
__asm__("str %%ax":"=a" (tr):"0" (0));
printk("eax\t\tebx\t\tecx\t\tedx\n\r%8x\t%8x\t%8x\t%8x\n\r",
eax,ebx,ecx,edx);
printk("esi\t\tedi\t\tebp\t\tesp\n\r%8x\t%8x\t%8x\t%8x\n\r",
esi,edi,ebp,(long) esp);
printk("\n\rds\tes\tfs\ttr\n\r%4x\t%4x\t%4x\t%4x\n\r",
ds,es,fs,tr);
printk("EIP: %8x CS: %4x EFLAGS: %8x\n\r",esp[0],esp[1],esp[2]);
}
void do_nmi(long esp, long error_code)
{
die("nmi",esp,error_code);
}
void do_debug(long esp, long error_code)
{
die("debug",esp,error_code);
}
void do_overflow(long esp, long error_code)
{
die("overflow",esp,error_code);
}
void do_bounds(long esp, long error_code)
{
die("bounds",esp,error_code);
}
void do_invalid_op(long esp, long error_code)
{
die("invalid operand",esp,error_code);
}
void do_device_not_available(long esp, long error_code)
{
die("device not available",esp,error_code);
}
void do_coprocessor_segment_overrun(long esp, long error_code)
{
die("coprocessor segment overrun",esp,error_code);
}
void do_invalid_TSS(long esp,long error_code)
{
die("invalid TSS",esp,error_code);
}
void do_segment_not_present(long esp,long error_code)
{
die("segment not present",esp,error_code);
}
void do_stack_segment(long esp,long error_code)
{
die("stack segment",esp,error_code);
}
void do_coprocessor_error(long esp, long error_code)
{
if (last_task_used_math != current)
return;
die("coprocessor error",esp,error_code);
}
void do_reserved(long esp, long error_code)
{
die("reserved (15,17-47) error",esp,error_code);
}
void trap_init(void)
{
int i;
set_trap_gate(0,÷_error);
set_trap_gate(1,&debug);
set_trap_gate(2,&nmi);
set_system_gate(3,&int3); /* int3-5 can be called from all */
set_system_gate(4,&overflow);
set_system_gate(5,&bounds);
set_trap_gate(6,&invalid_op);
set_trap_gate(7,&device_not_available);
set_trap_gate(8,&double_fault);
set_trap_gate(9,&coprocessor_segment_overrun);
set_trap_gate(10,&invalid_TSS);
set_trap_gate(11,&segment_not_present);
set_trap_gate(12,&stack_segment);
set_trap_gate(13,&general_protection);
set_trap_gate(14,&page_fault);
set_trap_gate(15,&reserved);
set_trap_gate(16,&coprocessor_error);
for (i=17;i<48;i++)
set_trap_gate(i,&reserved);
set_trap_gate(45,&irq13);
outb_p(inb_p(0x21)&0xfb,0x21);
outb(inb_p(0xA1)&0xdf,0xA1);
set_trap_gate(39,¶llel_interrupt);
}
分析一波最常见的die函数
#define get_seg_byte(seg,addr) ({ \
register char __res; \
__asm__("push %%fs;mov %%ax,%%fs;movb %%fs:%2,%%al;pop %%fs" \
:"=a" (__res):"0" (seg),"m" (*(addr))); \
__res;})
#define get_seg_long(seg,addr) ({ \
register unsigned long __res; \
__asm__("push %%fs;mov %%ax,%%fs;movl %%fs:%2,%%eax;pop %%fs" \
:"=a" (__res):"0" (seg),"m" (*(addr))); \
__res;})
#define _fs() ({ \
register unsigned short __res; \
__asm__("mov %%fs,%%ax":"=a" (__res):); \
__res;})
esp指针是栈指针,根据栈指针来取一些栈里面的信息,打印出来。
再打印一些其他的栈内容。
get_seg_long 栈里面取一个四个字节的内容。
所以die函数干的事情就是说能帮你打印当前出的错的栈里面的信息。可以方程print函数。以及答应错误号。
再来看trap_init函数。
里面有两个函数 set_trap_gate set_system_gate。
区别在于他俩所设置的权限不同。
set_trap_gate特权级为0.只能由用户来。设置的权限比较高。
set_system_gate 设置的权限比较低,可以有用户跟系统啥的来调用。
system_call.s
/*
* linux/kernel/system_call.s
*
* (C) 1991 Linus Torvalds
*/
/*
* system_call.s contains the system-call low-level handling routines.
* This also contains the timer-interrupt handler, as some of the code is
* the same. The hd- and flopppy-interrupts are also here.
*
* NOTE: This code handles signal-recognition, which happens every time
* after a timer-interrupt and after each system call. Ordinary interrupts
* don't handle signal-recognition, as that would clutter them up totally
* unnecessarily.
*
* Stack layout in 'ret_from_system_call':
*
* 0(%esp) - %eax
* 4(%esp) - %ebx
* 8(%esp) - %ecx
* C(%esp) - %edx
* 10(%esp) - %fs
* 14(%esp) - %es
* 18(%esp) - %ds
* 1C(%esp) - %eip
* 20(%esp) - %cs
* 24(%esp) - %eflags
* 28(%esp) - %oldesp
* 2C(%esp) - %oldss
*/
SIG_CHLD = 17
EAX = 0x00
EBX = 0x04
ECX = 0x08
EDX = 0x0C
FS = 0x10
ES = 0x14
DS = 0x18
EIP = 0x1C
CS = 0x20
EFLAGS = 0x24
OLDESP = 0x28
OLDSS = 0x2C
state = 0 # these are offsets into the task-struct.
counter = 4
priority = 8
signal = 12
sigaction = 16 # MUST be 16 (=len of sigaction)
blocked = (33*16)
# offsets within sigaction
sa_handler = 0
sa_mask = 4
sa_flags = 8
sa_restorer = 12
nr_system_calls = 72
/*
* Ok, I get parallel printer interrupts while using the floppy for some
* strange reason. Urgel. Now I just ignore them.
*/
.globl _system_call,_sys_fork,_timer_interrupt,_sys_execve
.globl _hd_interrupt,_floppy_interrupt,_parallel_interrupt
.globl _device_not_available, _coprocessor_error
.align 2
bad_sys_call:
movl $-1,%eax
iret
.align 2
reschedule:
pushl $ret_from_sys_call
jmp _schedule
.align 2
_system_call:
cmpl $nr_system_calls-1,%eax
ja bad_sys_call
push %ds
push %es
push %fs
pushl %edx
pushl %ecx # push %ebx,%ecx,%edx as parameters
pushl %ebx # to the system call
movl $0x10,%edx # set up ds,es to kernel space
mov %dx,%ds
mov %dx,%es
movl $0x17,%edx # fs points to local data space
mov %dx,%fs
call _sys_call_table(,%eax,4)
pushl %eax
movl _current,%eax
cmpl $0,state(%eax) # state
jne reschedule
cmpl $0,counter(%eax) # counter
je reschedule
ret_from_sys_call:
movl _current,%eax # task[0] cannot have signals
cmpl _task,%eax
je 3f
cmpw $0x0f,CS(%esp) # was old code segment supervisor ?
jne 3f
cmpw $0x17,OLDSS(%esp) # was stack segment = 0x17 ?
jne 3f
movl signal(%eax),%ebx
movl blocked(%eax),%ecx
notl %ecx
andl %ebx,%ecx
bsfl %ecx,%ecx
je 3f
btrl %ecx,%ebx
movl %ebx,signal(%eax)
incl %ecx
pushl %ecx
call _do_signal
popl %eax
3: popl %eax
popl %ebx
popl %ecx
popl %edx
pop %fs
pop %es
pop %ds
iret
.align 2
_coprocessor_error:
push %ds
push %es
push %fs
pushl %edx
pushl %ecx
pushl %ebx
pushl %eax
movl $0x10,%eax
mov %ax,%ds
mov %ax,%es
movl $0x17,%eax
mov %ax,%fs
pushl $ret_from_sys_call
jmp _math_error
.align 2
_device_not_available:
push %ds
push %es
push %fs
pushl %edx
pushl %ecx
pushl %ebx
pushl %eax
movl $0x10,%eax
mov %ax,%ds
mov %ax,%es
movl $0x17,%eax
mov %ax,%fs
pushl $ret_from_sys_call
clts # clear TS so that we can use math
movl %cr0,%eax
testl $0x4,%eax # EM (math emulation bit)
je _math_state_restore
pushl %ebp
pushl %esi
pushl %edi
call _math_emulate
popl %edi
popl %esi
popl %ebp
ret
.align 2
_timer_interrupt:
push %ds # save ds,es and put kernel data space
push %es # into them. %fs is used by _system_call
push %fs
pushl %edx # we save %eax,%ecx,%edx as gcc doesn't
pushl %ecx # save those across function calls. %ebx
pushl %ebx # is saved as we use that in ret_sys_call
pushl %eax
movl $0x10,%eax
mov %ax,%ds
mov %ax,%es
movl $0x17,%eax
mov %ax,%fs
incl _jiffies
movb $0x20,%al # EOI to interrupt controller #1
outb %al,$0x20
movl CS(%esp),%eax
andl $3,%eax # %eax is CPL (0 or 3, 0=supervisor)
pushl %eax
call _do_timer # 'do_timer(long CPL)' does everything from
addl $4,%esp # task switching to accounting ...
jmp ret_from_sys_call
.align 2
_sys_execve:
lea EIP(%esp),%eax
pushl %eax
call _do_execve
addl $4,%esp
ret
.align 2
_sys_fork:
call _find_empty_process
testl %eax,%eax
js 1f
push %gs
pushl %esi
pushl %edi
pushl %ebp
pushl %eax
call _copy_process
addl $20,%esp
1: ret
_hd_interrupt:
pushl %eax
pushl %ecx
pushl %edx
push %ds
push %es
push %fs
movl $0x10,%eax
mov %ax,%ds
mov %ax,%es
movl $0x17,%eax
mov %ax,%fs
movb $0x20,%al
outb %al,$0xA0 # EOI to interrupt controller #1
jmp 1f # give port chance to breathe
1: jmp 1f
1: xorl %edx,%edx
xchgl _do_hd,%edx
testl %edx,%edx
jne 1f
movl $_unexpected_hd_interrupt,%edx
1: outb %al,$0x20
call *%edx # "interesting" way of handling intr.
pop %fs
pop %es
pop %ds
popl %edx
popl %ecx
popl %eax
iret
_floppy_interrupt:
pushl %eax
pushl %ecx
pushl %edx
push %ds
push %es
push %fs
movl $0x10,%eax
mov %ax,%ds
mov %ax,%es
movl $0x17,%eax
mov %ax,%fs
movb $0x20,%al
outb %al,$0x20 # EOI to interrupt controller #1
xorl %eax,%eax
xchgl _do_floppy,%eax
testl %eax,%eax
jne 1f
movl $_unexpected_floppy_interrupt,%eax
1: call *%eax # "interesting" way of handling intr.
pop %fs
pop %es
pop %ds
popl %edx
popl %ecx
popl %eax
iret
_parallel_interrupt:
pushl %eax
movb $0x20,%al
outb %al,$0x20
popl %eax
iret
最终调用的是sys_call_table[]。里面放了所有系统调用。