Linux内核源代码情景分析-execve()

本文详细介绍了Linux内核中的execve()系统调用的工作过程,从用户空间到内核空间的转换,涉及copy_strings_kernel、search_binary_handler、flush_old_exec等多个关键步骤。内容涵盖进程如何放弃父进程的用户空间、内存映射的建立以及页面映射等,深入探讨了内存管理和执行新程序的细节。
摘要由CSDN通过智能技术生成

    子进程开始执行execve:

    execve("/bin/echo", args, NULL});

    系统调用execve内核入口是sys_execve,代码如下:

asmlinkage int sys_execve(struct pt_regs regs)
{
	int error;
	char * filename;

	filename = getname((char *) regs.ebx);//ebx为"/bin/echo",把字符串从用户空间拷贝到系统空间
	error = PTR_ERR(filename);
	if (IS_ERR(filename))
		goto out;
	error = do_execve(filename, (char **) regs.ecx, (char **) regs.edx, &regs);//ecx为args,edx为NULL
	if (error == 0)
		current->ptrace &= ~PT_DTRACE;
	putname(filename);
out:
	return error;
}
int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs)
{
	struct linux_binprm bprm;
	struct file *file;
	int retval;
	int i;

	file = open_exec(filename);//打开目标文件 /bin/echo

	retval = PTR_ERR(file);
	if (IS_ERR(file))
		return retval;

	bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);//1024*32减去一个指针的大小
	memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0])); //page指针数组初始化为0

	bprm.file = file;
	bprm.filename = filename;
	bprm.sh_bang = 0;
	bprm.loader = 0;
	bprm.exec = 0;
	if ((bprm.argc = count(argv, bprm.p / sizeof(void *))) < 0) {//对指针数组argv[]中参数的个数进行计数,而bprm.p / sizeof(void *)表示允许的最大值,由于agrv[]是在用户空间而不在系统空间	
		allow_write_access(file);
		fput(file);
		return bprm.argc;
	}

	if ((bprm.envc = count(envp, bprm.p / sizeof(void *))) < 0) {//同上
		allow_write_access(file);
		fput(file);
		return bprm.envc;
	}

	retval = prepare_binprm(&bprm);//见下面的代码
	if (retval < 0) 
		goto out; 

	retval = copy_strings_kernel(1, &bprm.filename, &bprm);//由于bprm.filename已经在系统空间了,所以copy_strings_kernel从系统空间中拷贝
	if (retval < 0) 
		goto out; 

	bprm.exec = bprm.p;
	retval = copy_strings(bprm.envc, envp, &bprm);//从用户空间拷贝,参考copy_strings_kernel
	if (retval < 0) 
		goto out; 

	retval = copy_strings(bprm.argc, argv, &bprm);//从用户空间拷贝,参考copy_strings_kernel
	if (retval < 0) 
		goto out; 

	retval = search_binary_handler(&bprm,regs);//已经从可执行文件头部读入了128个字节存放在bprm的缓冲区,而且运行所需的参数和环境变量也已经搜集在bprm中,现在就由formats队列中的成员逐个来认领,谁要识别到它代表的可执行文件格式,运行的时候就交给它
	if (retval >= 0)
		/* execve success */
		return retval;

out:
	/* Something went wrong, return the inode and free the argument pages*/
	allow_write_access(bprm.file);
	if (bprm.file)
		fput(bprm.file);

	for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
		struct page * page = bprm.page[i];
		if (page)
			__free_page(page);
	}

	return retval;
}

struct linux_binprm{
	char buf[BINPRM_BUF_SIZE];//128
	struct page *page[MAX_ARG_PAGES];//32
	unsigned long p; /* current top of mem */
	int sh_bang;
	struct file * file;
	int e_uid, e_gid;
	kernel_cap_t cap_inheritable, cap_permitted, cap_effective;
	int argc, envc;
	char * filename;	/* Name of binary */
	unsigned long loader, exec;
};
int prepare_binprm(struct linux_binprm *bprm)
{
	int mode;
	struct inode * inode = bprm->file->f_dentry->d_inode;

	mode = inode->i_mode;
	/* Huh? We had already checked for MAY_EXEC, WTF do we check this? */
	if (!(mode & 0111))	/* with at least _one_ execute bit set */
		return -EACCES;
	if (bprm->file->f_op == NULL)
		return -EACCES;

	bprm->e_uid = current->euid;
	bprm->e_gid = current->egid;

	if(!IS_NOSUID(inode)) {
		/* Set-uid? */
		if (mode & S_ISUID)
			bprm->e_uid = inode->i_uid;

		/* Set-gid? */
		/*
		 * If setgid is set but no group execute bit then this
		 * is a candidate for mandatory locking, not a setgid
		 * executable.
		 */
		if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))
			bprm->e_gid = inode->i_gid;
	}

	/* We don't have VFS support for capabilities yet */
	cap_clear(bprm->cap_inheritable);
	cap_clear(bprm->cap_permitted);
	cap_clear(bprm->cap_effective);

	/*  To support inheritance of root-permissions and suid-root
         *  executables under compatibility mode, we raise all three
         *  capability sets for the file.
         *
         *  If only the real uid is 0, we only raise the inheritable
         *  and permitted sets of the executable file.
         */

	if (!issecure(SECURE_NOROOT)) {
		if (bprm->e_uid == 0 || current->uid == 0) {
			cap_set_full(bprm->cap_inheritable);
			cap_set_full(bprm->cap_permitted);
		}
		if (bprm->e_uid == 0) 
			cap_set_full(bprm->cap_effective);
	}

	memset(bprm->buf,0,BINPRM_BUF_SIZE);//从可执行文件中读入开头的128个字节到linux_binprm结构bprm中的缓冲区
	return kernel_read(bprm->file,0,bprm->buf,BINPRM_BUF_SIZE);
}


    copy_strings_kernel从系统空间拷贝,代码如下:

int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm)
{
	int r;
	mm_segment_t oldfs = get_fs();
	set_fs(KERNEL_DS); 
	r = copy_strings(argc, argv, bprm);
	set_fs(oldfs);
	return r; 
}
int copy_strings(int argc,char ** argv, struct linux_binprm *bprm) 
{
	while (argc-- > 0) {//参数的个数
		char *str;
		int len;
		unsigned long pos;

		if (get_user(str, argv+argc) || !str || !(len = strnlen_user(str, bprm->p))) 
			return -EFAULT;
		if (bprm->p < len) 
			return -E2BIG; 

		bprm->p -= len;//堆栈的指针再减去参数的长度
		/* XXX: add architecture specific overflow check here. */ 

		pos = bprm->p;
		while (len > 0) {
			char *kaddr;
			int i, new, err;
			struct page *page;
			int offset, bytes_to_copy;

			offset = pos % PAGE_SIZE;//offset为在本页中的偏移
			i = pos/PAGE_SIZE;//i为31,最后一页
			page = bprm->page[i];
			new = 0;
			if (!page) {
				page = alloc_page(GFP_HIGHUSER);//分配页面
				bprm->page[i] = page;
				if (!page)
					return -ENOMEM;
				new = 1;
			}
			kaddr = kmap(page);//得到页面低端地址

			if (new && offset)//新分配的页面
				memset(kaddr, 0, offset);//从页面低端地址到(页面低端地址 + offset)都清0
			bytes_to_copy = PAGE_SIZE - offset;
			if (bytes_to_copy > len) {
				bytes_to_copy = len;
				if (new)
					memset(kaddr+offset+len, 0, PAGE_SIZE-offset-len);
			}
			err =
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值