2.2:系统调用-实现

最新推荐文章于 2022-09-08 22:33:23 发布

18279216400

最新推荐文章于 2022-09-08 22:33:23 发布

阅读量144

点赞数

分类专栏： linux-0.11 文章标签：内核 linux 操作系统

本文链接：https://blog.csdn.net/weixin_43690845/article/details/105274706

版权

linux-0.11 专栏收录该内容

23 篇文章 1 订阅

订阅专栏

1：内核态和用户态

所有的程序都需要装入内存中使用，所以放置内核代码的那一段程序就是“内核态区域”，放置用户态的那一段代码就是“用户态区域”。用户态代码无法通过jump直接跳到内核态内存区域执行，或通过MOV获取到内核态内存中的数据。

那么是如何实现这种操作的呢？
操作系统会划定每个区域的特权级，如果访问内存的特权级高于自己，CPU就会拒绝执行。具体来说，cpu会取出俩个主要数值：当前特权级（CPL）和描述符特权级（DPL）进行对比，只有特权级满足，才会执行这条指令。用户态的CPL是3，内核态的CPL是0.
当系统初始化GDT时，会将系统内核处内存区域的DPL设置位0，而对在用户态执行的命令，操作系统会让其CPL=3，这就保证了用户态无法访问到内核态内存。

2：用户态进入内核态

用户程序可以通过int 0x80中断，进入相应的处理函数，进入内核。那么这其中的特权检查是如何处理的呢？
1)系统在初始化ix 0X80中断时会将其DPL初始化为3，段描述符是0X0008，中断处理函数是system_call

void sched_init(void) ---->
set_system_gate(0x80,&system_call); ----->
#define set_system_gate(n,addr) \
        _set_gate(&idt[n],15,3,addr)	----->
#define _set_gate(gate_addr,type,dpl,addr)

#define _set_gate(gate_addr,type,dpl,addr) \
__asm__ ("movw %%dx,%%ax\n\t" \
        "movw %0,%%dx\n\t" \
        "movl %%eax,%1\n\t" \
        "movl %%edx,%2" \
        : \
        : "i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
        "o" (*((char *) (gate_addr))), \
        "o" (*(4+(char *) (gate_addr))), \
        "d" ((char *) (addr)),"a" (0x00080000))

2）此时，当执行int 0X80中断执行时，根据0X80查找IDT，进行特权检查，此时CPL=3，DPL=3，检查通过，继续执行。中断处理是跳到另一段程序去执行，所以是修改PC，具体是修改CS:EIP，从IDT中取出CS=0X0008，EIP=system_call函数的入口地址。接下来取出指令，通过GDT表获得基地址，加上system_call的偏移，就跳到了操作系统的system_call函数。由于此时CS设置位0X0008，最后俩位为0，即CPL=0，因此接下来的指令具有内核态特权。

3：重新审视"Hello World"

那么整个printf(“Hello World”)背后发生了什么呢？
1：通过库函数处理，调用到write

char buf[20];
write(1,buf,11);	//将字符串填入buf中

2：系统调用
write需要展开成一段包含int 0x80的代码，即通过下面这个宏

#define _syscall3(type,name,atype,a,btype,b,ctype,c) \
type name(atype a,btype b,ctype c) \
{ \
long __res; \
__asm__ volatile ("int $0x80" \
        : "=a" (__res) \
        : "0" (__NR_##name),"b" ((long)(a)),"c" ((long)(b)),"d" ((long)(c))); \
if (__res>=0) \
        return (type) __res; \
errno=-__res; \
return -1; \
}

即下面这样

syscall3(int,write,int,fd,char*,buf,int,count)

3：进入int 0x80，即跳到system_call中
1）将用户程序使用的数据段等压栈，设置内核相关段寄存器

system_call:
        cmpl $nr_system_calls-1,%eax
        ja bad_sys_call
        push %ds
        push %es
        push %fs
        pushl %edx
        pushl %ecx              # push %ebx,%ecx,%edx as parameters
        pushl %ebx              # to the system call
        movl $0x10,%edx         # set up ds,es to kernel space
        mov %dx,%ds
        mov %dx,%es
        movl $0x17,%edx         # fs points to local data space
        mov %dx,%fs
        call sys_call_table(,%eax,4)

2）调用sys_call_table
从下面的代码，可以看出这个是一个函数指针数组，调用那个函数由%eax决定，而%eax在write汇编展开时会进行相应的定义：“0” (_NR##name)，即__NR_write = 4,那么此时就会调用到sys_write

call sys_call_table(,%eax,4)
fn_ptr sys_call_table[] = { sys_setup, sys_exit, sys_fork, sys_read,
sys_write.......}
typedef int (*fn_ptr)();

3）write函数参数的传递
函数展开时，“b” ((long)(a)),“c” ((long)(b)),“d” ((long)©)会将需要传递的信息放人%ebx,%ecx，%edx中，然后将这些寄存器压入栈中，那么调用时自然就有了相应的参数
4）如何获取用户进程内存缓冲去中的ASIIC字符
将fs设置位0X17，这样段选择符的最后三位为111，即特权级为3（用户态段），TI=1说明需要查找的段描述符在LDT表中，对应的是用户态的代码段，数据段等信息（内核的代码段，数据段这些信息在GDT表中）。这样就可以利用FS段寄存器在操作系统内核中找到当前进程的用户态内存了。

movl $0x17,%edx         # fs points to local data space

5）调用sys_write

int sys_write(unsigned int fd,char * buf,int count)
{
        struct file * file;
        struct m_inode * inode;
        
        if (fd>=NR_OPEN || count <0 || !(file=current->filp[fd]))
                return -EINVAL;
        if (!count)
                return 0;
        inode=file->f_inode;
        if (inode->i_pipe)
                return (file->f_mode&2)?write_pipe(inode,buf,count):-EIO;
        if (S_ISCHR(inode->i_mode))
                return rw_char(WRITE,inode->i_zone[0],buf,count,&file->f_pos);
        if (S_ISBLK(inode->i_mode))
                return block_write(inode->i_zone[0],&file->f_pos,buf,count);
        if (S_ISREG(inode->i_mode))
                return file_write(inode,file,buf,count);
        printk("(Write)inode->i_mode=%06o\n\r",inode->i_mode);
        return -EINVAL;
}