Linux内核设计的艺术-进程2的创建及执行

最新推荐文章于 2021-05-15 02:23:11 发布

jltxgcy

最新推荐文章于 2021-05-15 02:23:11 发布

阅读量1.7k

点赞数 1

分类专栏： Linux内核设计的艺术

本文链接：https://blog.csdn.net/jltxgcy/article/details/20355899

版权

Linux内核设计的艺术专栏收录该内容

18 篇文章 1 订阅

订阅专栏

1、打开标准输入设备
代码路径：init/main.c 目前处于进程1的3特权级

void init(void)
{
	int pid,i;

	setup((void *) &drive_info);
	(void) open("/dev/tty0",O_RDWR,0);
	(void) dup(0);
	(void) dup(0);
	...
}

open同样调用int 0x80进入进程1的0特权级，sys_open如下：
代码路径：fs/open.c

int sys_open(const char * filename,int flag,int mode)
{
	struct m_inode * inode;
	struct file * f;
	int i,fd;

	mode &= 0777 & ~current->umask;//暂时不考虑
	for(fd=0 ; fd<NR_OPEN ; fd++)//找到进程第一个空闲的文件指针
		if (!current->filp[fd])
			break;
	if (fd>=NR_OPEN)
		return -EINVAL;
	current->close_on_exec &= ~(1<<fd);//最后一位置0
	f=0+file_table;
	for (i=0 ; i<NR_FILE ; i++,f++)//找到了file_table第一个file结构体
		if (!f->f_count) break;
	if (i>=NR_FILE)
		return -EINVAL;
	(current->filp[fd]=f)->f_count++;//将进程1的filp[20]与file_table[64]挂接，并增加引用计数，f_count为1
	if ((i=open_namei(filename,flag,mode,&inode))<0) {
		current->filp[fd]=NULL;
		f->f_count=0;
		return i;
	}
	...
}

代码路径：include/linux/fs.h

#define NR_OPEN 20
#define NR_FILE 64

代码路径：fs/namei.c

int open_namei(const char * pathname, int flag, int mode,
	struct m_inode ** res_inode)
{
	const char * basename;
	int inr,dev,namelen;
	struct m_inode * dir, *inode;
	struct buffer_head * bh;
	struct dir_entry * de;

	if ((flag & O_TRUNC) && !(flag & O_ACCMODE))
		flag |= O_WRONLY;
	mode &= 0777 & ~current->umask;
	mode |= I_REGULAR;//暂时不考虑
	if (!(dir = dir_namei(pathname,&namelen,&basename)))//获取枝梢i节点,namelen为tty0的长度，basename指向tty0的第一个字母't'
		return -ENOENT;
	if (!namelen) {			/* special case: '/usr/' etc */
		if (!(flag & (O_ACCMODE|O_CREAT|O_TRUNC))) {
			*res_inode=dir;
			return 0;
		}
		iput(dir);
		return -EISDIR;
	}
	bh = find_entry(&dir,basename,namelen,&de);//此时根据dev的i节点和tty0来查找tty0的目录项
        ...
}

代码路径：fs/namei.c

static struct m_inode * dir_namei(const char * pathname,
	int * namelen, const char ** name)
{
	char c;
	const char * basename;
	struct m_inode * dir;

	if (!(dir = get_dir(pathname)))
		return NULL;
	basename = pathname;
	while ((c=get_fs_byte(pathname++)))
		if (c=='/')
			basename=pathname;
	*namelen = pathname-basename-1;//得到tty0名字的长度
	*name = basename;//得到tty0中第一个‘t’字符的地址
	return dir;
}

代码路径：fs/namei.c

static struct m_inode * get_dir(const char * pathname)
{
	char c;
	const char * thisname;
	struct m_inode * inode;
	struct buffer_head * bh;
	int namelen,inr,idev;
	struct dir_entry * de;

	if (!current->root || !current->root->i_count)
		panic("No root inode");
	if (!current->pwd || !current->pwd->i_count)
		panic("No cwd inode");
	if ((c=get_fs_byte(pathname))=='/') {
		inode = current->root;//根i节点
		pathname++;//指向d
	} else if (c)
		inode = current->pwd;
	else
		return NULL;	/* empty name is bad */
	inode->i_count++;//根i节点i_count为5
	while (1) {
		thisname = pathname;
		if (!S_ISDIR(inode->i_mode) || !permission(inode,MAY_EXEC)) {
			iput(inode);//不执行
			return NULL;
		}
		for(namelen=0;(c=get_fs_byte(pathname++))&&(c!='/');namelen++)//如果遇到/或者字符串结尾就退出
			/* nothing */ ;
		if (!c)
			return inode;//第二次循环返回空了，执行到此，返回dev的i节点
		if (!(bh = find_entry(&inode,thisname,namelen,&de))) {//此时根据根i节点和dev来查找dev的目录项,此时thisname为dev,namelen为3
			iput(inode);
			return NULL;
		}
		inr = de->inode;//dev的i节点号
		idev = inode->i_dev;//虚拟盘，0x101
		brelse(bh);
		iput(inode);//根i节点i_count为4
		if (!(inode = iget(idev,inr)))//获取了dev的i节点(inode_table第二个结构体)，i_count为1
			return NULL;
	}
}

代码路径：include/linux/fs.h

...
#define NAME_LEN 14
...
struct dir_entry {
	unsigned short inode;
	char name[NAME_LEN];
};

代码路径：include/linux/fs.h

...
struct m_inode {
	unsigned short i_mode;
	unsigned short i_uid;
	unsigned long i_size;
	unsigned long i_mtime;
	unsigned char i_gid;
	unsigned char i_nlinks;
	unsigned short i_zone[9];
/* these are in memory also */
	struct task_struct * i_wait;
	unsigned long i_atime;
	unsigned long i_ctime;
	unsigned short i_dev;
	unsigned short i_num;
	unsigned short i_count;
	unsigned char i_lock;
	unsigned char i_dirt;
	unsigned char i_pipe;
	unsigned char i_mount;
	unsigned char i_seek;
	unsigned char i_update;
};
...

程序执行到了open_namei，找到了tty0的目录项，接下来继续执行：

int open_namei(const char * pathname, int flag, int mode,
	struct m_inode ** res_inode)
{
	...
	bh = find_entry(&dir,basename,namelen,&de);
	...
	inr = de->inode;//tty0的i节点号
	dev = dir->i_dev;//0x101
	brelse(bh);
	iput(dir);//第二个inode_table的i_count为0
	if (flag & O_EXCL)
		return -EEXIST;//不执行
	if (!(inode=iget(dev,inr)))//得到了tty0的i节点，(inode_table第二个结构体)，i_count为1
		return -EACCES;
	if ((S_ISDIR(inode->i_mode) && (flag & O_ACCMODE)) ||
	    !permission(inode,ACC_MODE(flag))) {
		iput(inode);//不执行
		return -EPERM;
	}
	inode->i_atime = CURRENT_TIME;
	if (flag & O_TRUNC)
		truncate(inode);//不执行
	*res_inode = inode;
	return 0;
}

返回sys_open继续执行：

...
int sys_open(const char * filename,int flag,int mode)
{
	struct m_inode * inode;
	struct file * f;
	int i,fd;

	mode &= 0777 & ~current->umask;
	for(fd=0 ; fd<NR_OPEN ; fd++)
		if (!current->filp[fd])//找到进程第一个空闲的文件指针
			break;
	if (fd>=NR_OPEN)
		return -EINVAL;
	current->close_on_exec &= ~(1<<fd);//最后一位清0
	f=0+file_table;
	for (i=0 ; i<NR_FILE ; i++,f++)//找到了file_table第一个file结构体
		if (!f->f_count) break;
	if (i>=NR_FILE)
		return -EINVAL;
	(current->filp[fd]=f)->f_count++;//将进程1的filp[20]与file_table[64]挂接，并增加引用计数，f_count为1
	if ((i=open_namei(filename,flag,mode,&inode))<0) {
		current->filp[fd]=NULL;
		f->f_count=0;
		return i;
	}
/* ttys are somewhat special (ttyxx major==4, tty major==5) */
	if (S_ISCHR(inode->i_mode)) {//检查tty0文件的i节点属性，确定它是一个设备文件
		if (MAJOR(inode->i_zone[0])==4) {
			if (current->leader && current->tty<0) {
				current->tty = MINOR(inode->i_zone[0]);
				tty_table[current->tty].pgrp = current->pgrp;
			}
		} else if (MAJOR(inode->i_zone[0])==5)
			if (current->tty<0) {
				iput(inode);
				current->filp[fd]=NULL;
				f->f_count=0;
				return -EPERM;
			}
	}
/* Likewise with block-devices: check for floppy_change */
	if (S_ISBLK(inode->i_mode))//暂时不考虑
		check_disk_change(inode->i_zone[0]);
	f->f_mode = inode->i_mode;
	f->f_flags = flag;
	f->f_count = 1;
	f->f_inode = inode;
	f->f_pos = 0;
	return (fd);//fd为0
}
...

至此进程1的current->filp[0]存放的file_table第一个元素地址，file_table第一个元素，又存放着inode的地址，f_count为1

2、打开标准输出、标准错误输出设备

又返回了进程1的3特权级，接着执行init()

代码路径：init/main.c

void init(void)
{
	int pid,i;

	setup((void *) &drive_info);
	(void) open("/dev/tty0",O_RDWR,0);
	(void) dup(0);
	(void) dup(0);
        ...
}

执行dup(0)，又陷入了进程1的0特权级，开始执行sys_dup

代码路径：fs/fcntl.c

static int dupfd(unsigned int fd, unsigned int arg)//fd为0，arg为0
{
	if (fd >= NR_OPEN || !current->filp[fd])
		return -EBADF;
	if (arg >= NR_OPEN)
		return -EINVAL;
	while (arg < NR_OPEN)
		if (current->filp[arg])
			arg++;
		else
			break;//arg为1
	if (arg >= NR_OPEN)
		return -EMFILE;
	current->close_on_exec &= ~(1<<arg);//最后一位和倒数第二位都置0，再执行一次dup(0)，最后一位和倒数第二位和倒数第三位全为0
	(current->filp[arg] = current->filp[fd])->f_count++;//0和1共同指向一个文件地址，并且f_count为2
	return arg;
}

然后又返回进程1的3特权级，又一次执行dup(0)，结果是current->filp[0]，current->filp[1]，current->filp[2]共同指向第一个file(f_count为3)，file指向了第二个inode(i_count为1)，inode此时为dev/tty0

3、进程1创建进程2并切换到进程2执行

代码路径：init/main.c

void init(void)
{
	int pid,i;
        ...
	if (!(pid=fork())) {//进程1创建进程2
		close(0);
		if (open("/etc/rc",O_RDONLY,0))
			_exit(1);
		execve("/bin/sh",argv_rc,envp_rc);
		_exit(2);
	}
       	if (pid>0)
		while (pid != wait(&i))
			/* nothing */;
        ...
}

fork的本质是：从进程1的3特权级进入进程1的0特权级，然后：

(1)task[2]指向新的task_struct

(2)进程1的task_struct复制给进程2的task_struct，单独设置pid为2，father为1，其他的寄存器都放入前面push进来的参数

(3)设置进程2的分页管理

(4)进程2共享进程1的文件，进程2的filp[0]，filp[1]，filp[2]和进程1的filp[0]，filp[1]，filp[2]同指向一个file，此时f_count为6，进程2的pwd，root和进程1的

pwd，root同指向根i节点(第一个inode)，根i节点i_count变为6。

最后从进程1的0特权级跳回进程1的3特权级，由于pid为2，进入下面的wait循环，又由进程1的3特权级跳到进程1的0特权级，执行sys_waitpid

代码路径：kernel/exit.c

int sys_waitpid(pid_t pid,unsigned long * stat_addr, int options)
{
	int flag, code;
	struct task_struct ** p;

	verify_area(stat_addr,4);
repeat:
	flag=0;
	for(p = &LAST_TASK ; p > &FIRST_TASK ; --p) {
		if (!*p || *p == current)//当前进程是1
			continue;
		if ((*p)->father != current->pid)//进程2是p，它的father是1，正好是进程1的pid
			continue;                 //所以筛选出进程2
		if (pid>0) {
			if ((*p)->pid != pid)
				continue;
		} else if (!pid) {
			if ((*p)->pgrp != current->pgrp)
				continue;
		} else if (pid != -1) {
			if ((*p)->pgrp != -pid)
				continue;
		}
		switch ((*p)->state) {
			case TASK_STOPPED:
				if (!(options & WUNTRACED))
					continue;
				put_fs_long(0x7f,stat_addr);
				return (*p)->pid;
			case TASK_ZOMBIE:
				current->cutime += (*p)->utime;
				current->cstime += (*p)->stime;
				flag = (*p)->pid;
				code = (*p)->exit_code;
				release(*p);
				put_fs_long(code,stat_addr);
				return flag;
			default://进程2处于就绪态
				flag=1;//置1
				continue;
		}
	}
	if (flag) {
		if (options & WNOHANG)
			return 0;
		current->state=TASK_INTERRUPTIBLE;//进程1设置为可中断等待状态
		schedule();//调度，切换到进程2执行
		if (!(current->signal &= ~(1<<(SIGCHLD-1))))
			goto repeat;
		else
			return -EINTR;
	}
	return -ECHILD;
}

4、加载Shell程序

切换到进程2的3特权级，此时fork为0（特意设置的），所以执行以下代码：

	if (!(pid=fork())) {//进程1创建进程2
		close(0);
		if (open("/etc/rc",O_RDONLY,0))
			_exit(1);
		execve("/bin/sh",argv_rc,envp_rc);
		_exit(2);
	}

close(0)，切换到进程2的0特权级，执行sys_close

代码路径：fs/open.c

int sys_close(unsigned int fd)
{	
	struct file * filp;

	if (fd >= NR_OPEN)
		return -EINVAL;
	current->close_on_exec &= ~(1<<fd);//原来就是0，现在还是0，执行时需要关闭的位
	if (!(filp = current->filp[fd]))//进程2的filp[0]
		return -EINVAL;
	current->filp[fd] = NULL;//进程2的filp[0]为空
	if (filp->f_count == 0)
		panic("Close: file count is 0");
	if (--filp->f_count)//f_count减少为5
		return (0);
	iput(filp->f_inode);//不会执行到这，如果f_count为0，会执行到此处
	return (0);
}

又切换到进程2的3特权级，开始执行open，又切换到进程2的0特权级，执行sys_open......整体的流程和前面的open大体一致，
进程2，currrent->filp[0]指向了第二个file(f_count为1)，file指向了第三个inode(i_count为1)，inode此时为etc/rc,
currrent->filp[1]和currrent->filp[2]指向了第一个file(f_count为5)，file指向了第二个inode(i_count为1)，inode此时为dev/tty0
进程1，currrent->filp[0]和currrent->filp[1]和currrent->filp[2]指向了第一个file(f_count为5)，file指向了第二个inode(i_count为1)，inode此时为dev/tty0 ，执行完毕后放回进程2的3特权级。

接着开始执行execve，又进入了进程2的0特权级，执行sys_execve，接着执行do_execve()

代码路径：kernel/system_call.s

sys_execve:
	lea EIP(%esp),%eax  //把内核栈中存放eip的地址压入堆栈
	pushl %eax
	call do_execve
	addl $4,%esp
	ret

代码路径：fs/exec.c

之后执行过程，请参考通过进程2加载shell进程，详解execve。

返回shell进程(进程2)的3特权级，此时线性地址为128MB，由于刚才我们已经清空了页目录表（或者认为U/S=0，为内核态，用户态访问不了）和对应的页表，所以

产生缺页中断，把寄存器参数压入用户栈中。

之后的执行过程，请参考通过开始执行shell进程，理解缺页异常。

目前读出了/etc/update &这条信息，解释并跳到对应的地方执行，创建了update进程（整体流程参考进程1和shell进程的今生来世），又跳回来。

然后又读出了echo "/dev/hd1" > /etc/mtab，解释并跳到对应的地方执行，然后跳回来。

再次读取已经没有信息了，所以调用exit，进入shell进程0特权级，开始执行下面的函数：

代码路径:kernel/exit.c

int do_exit(long code)
{
	int i;
	free_page_tables(get_base(current->ldt[1]),get_limit(0x0f));//释放了shell的页目录表和页表
	free_page_tables(get_base(current->ldt[2]),get_limit(0x17));//还有所占据的内存页面（刚申请的4KB）
	for (i=0 ; i<NR_TASKS ; i++)
		if (task[i] && task[i]->father == current->pid) {//检查shell有子进程
			task[i]->father = 1;//把update进程的父进程设置为进程1
			if (task[i]->state == TASK_ZOMBIE)
				/* assumption task[1] is always init */
				(void) send_sig(SIGCHLD, task[1], 1);
		}
	for (i=0 ; i<NR_OPEN ; i++)
		if (current->filp[i])
			sys_close(i);//关闭所有关联的文件
	iput(current->pwd);//根i节点计数减1
	current->pwd=NULL;//挂空，不指向inode
	iput(current->root);//根i节点计数减1
	current->root=NULL;//挂空，不指向inode
	iput(current->executable);
	current->executable=NULL;
	if (current->leader && current->tty >= 0)
		tty_table[current->tty].pgrp = 0;
	if (last_task_used_math == current)
		last_task_used_math = NULL;
	if (current->leader)
		kill_session();
	current->state = TASK_ZOMBIE;//shell进程设置为僵死状态
	current->exit_code = code;
	tell_father(current->father);//给进程1发送信号
	schedule();  //进程调度，开始执行进程1
	return (-1);	/* just to suppress warnings */
}

所以现在进程1，currrent->filp[0]和currrent->filp[1]和currrent->filp[2]指向了第一个file(f_count为3)，file指向了第二个inode(i_count为1)，inode此时为dev/tty0 ，shell进程(进程2)都释放了。currrent->filp[0]指向了第二个file(f_count为1)，file指向了第三个inode(i_count为1)，inode此时为etc/rc，此句话现在,是第二个file(f_count为0)，file指向了第三个inode(i_count为0)，inode此时为etc/rc。

进程1的pwd，root指向根i节点(第一个inode)，根i节点i_count变为4，shell进程(进程2)都释放了。

代码路径：kernel/exit.c

static void tell_father(int pid)
{
	int i;

	if (pid)
		for (i=0;i<NR_TASKS;i++) {
			if (!task[i])
				continue;
			if (task[i]->pid != pid)
				continue;
			task[i]->signal |= (1<<(SIGCHLD-1));//给进程1发送信号
			return;
		}
/* if we don't find any fathers, we just release ourselves */
/* This is not really OK. Must change it to make father 1 */
	printk("BAD BAD - no father found\n\r");
	release(current);
}

然后执行schedule()

代码路径：kernel/sched.c

void schedule(void)
{
	int i,next,c;
	struct task_struct ** p;

/* check alarm, wake up any interruptible tasks that have got a signal */

	for(p = &LAST_TASK ; p > &FIRST_TASK ; --p)
		if (*p) {
			if ((*p)->alarm && (*p)->alarm < jiffies) {
					(*p)->signal |= (1<<(SIGALRM-1));
					(*p)->alarm = 0;
				}
			if (((*p)->signal & ~(_BLOCKABLE & (*p)->blocked)) &&
			(*p)->state==TASK_INTERRUPTIBLE)//发现进程1接受到信号，并且处于可中断等待状态
				(*p)->state=TASK_RUNNING;//进程1设置为就绪态
		}

/* this is the scheduler proper: */

	while (1) {
		c = -1;
		next = 0;
		i = NR_TASKS;
		p = &task[NR_TASKS];
		while (--i) {
			if (!*--p)
				continue;
			if ((*p)->state == TASK_RUNNING && (*p)->counter > c)
				c = (*p)->counter, next = i;
		}
		if (c) break;
		for(p = &LAST_TASK ; p > &FIRST_TASK ; --p)
			if (*p)
				(*p)->counter = ((*p)->counter >> 1) +
						(*p)->priority;
	}
	switch_to(next);//切换到进程1执行
}

此时切换到了进程1的0特权级，进程1是在sys_waitpid时切换出去的

int sys_waitpid(pid_t pid,unsigned long * stat_addr, int options)
{
	int flag, code;
	struct task_struct ** p;

	verify_area(stat_addr,4);
repeat:
	flag=0;
	for(p = &LAST_TASK ; p > &FIRST_TASK ; --p) {
		if (!*p || *p == current)
			continue;
		if ((*p)->father != current->pid)//找到shell进程(进程2)
			continue;
		if (pid>0) {
			if ((*p)->pid != pid)
				continue;
		} else if (!pid) {
			if ((*p)->pgrp != current->pgrp)
				continue;
		} else if (pid != -1) {
			if ((*p)->pgrp != -pid)
				continue;
		}
		switch ((*p)->state) {//shell进程是僵死状态
			case TASK_STOPPED:
				if (!(options & WUNTRACED))
					continue;
				put_fs_long(0x7f,stat_addr);
				return (*p)->pid;
			case TASK_ZOMBIE:
				current->cutime += (*p)->utime;
				current->cstime += (*p)->stime;
				flag = (*p)->pid;//返回的是2
				code = (*p)->exit_code;
				release(*p);//释放shell进程的task_struct结构
				put_fs_long(code,stat_addr);
				return flag;//返回2
			default:
				flag=1;
				continue;
		}
	}
	if (flag) {
		if (options & WNOHANG)
			return 0;
		current->state=TASK_INTERRUPTIBLE;
		schedule();//上次执行到这里，往下执行
		if (!(current->signal &= ~(1<<(SIGCHLD-1))))
			goto repeat;//检测到进程1接受到了信号，goto repeat
		else
			return -EINTR;
	}
	return -ECHILD;
}

返回进程1的3特权级，继续执行init

void init(void)
{
	...
	if (!(pid=fork())) {
		close(0);
		if (open("/etc/rc",O_RDONLY,0))
			_exit(1);
		execve("/bin/sh",argv_rc,envp_rc);
		_exit(2);
	}
	if (pid>0)
		while (pid != wait(&i))//2！=2为假，所以接着往下执行，重新创建shell进程(进程4)
			/* nothing */;
	while (1) {
		if ((pid=fork())<0) {
			printf("Fork failed in init\r\n");
			continue;
		}
		if (!pid) {
			close(0);close(1);close(2);
			setsid();
			(void) open("/dev/tty0",O_RDWR,0);
			(void) dup(0);
			(void) dup(0);
			_exit(execve("/bin/sh",argv,envp));
		}
		while (1)
			if (pid == wait(&i))
				break;
		printf("\n\rchild %d died with code %04x\n\r",pid,i);
		sync();
	}
	_exit(0);	/* NOTE! _exit, not exit() */
}

重建shell进程，此时的进程号为4，但是task[2]指向这个进程

进程4，currrent->filp[0]和currrent->filp[1]和currrent->filp[2]指向了第二个file(f_count为3)，file指向了第三个inode(i_count为1)，inode此时为/dev/tty0。
进程1，currrent->filp[0]和currrent->filp[1]和currrent->filp[2]指向了第一个file(f_count为3)，file指向了第二个inode(i_count为1)，inode此时为/dev/tty0。

进程4的pwd，root和进程1的pwd，root同指向根i节点(第一个inode)，根i节点i_count变为6

jltxgcy

关注

1
点赞
踩
2

收藏

觉得还不错? 一键收藏
1
评论
Linux内核设计的艺术-进程2的创建及执行

1、打开终端设备文件及复制文件句柄代码路径：init/main.c 目前处于进程1的3特权级void init(void){ int pid,i; setup((void *) &drive_info); (void) open("/dev/tty0",O_RDWR,0); (void) dup(0); (void) dup(0); ...} op
复制链接

扫一扫

专栏目录