子进程开始执行execve:
execve("/bin/echo", args, NULL});
系统调用execve内核入口是sys_execve,代码如下:
asmlinkage int sys_execve(struct pt_regs regs)
{
int error;
char * filename;
filename = getname((char *) regs.ebx);//ebx为"/bin/echo",把字符串从用户空间拷贝到系统空间
error = PTR_ERR(filename);
if (IS_ERR(filename))
goto out;
error = do_execve(filename, (char **) regs.ecx, (char **) regs.edx, ®s);//ecx为args,edx为NULL
if (error == 0)
current->ptrace &= ~PT_DTRACE;
putname(filename);
out:
return error;
}
int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs)
{
struct linux_binprm bprm;
struct file *file;
int retval;
int i;
file = open_exec(filename);//打开目标文件 /bin/echo
retval = PTR_ERR(file);
if (IS_ERR(file))
return retval;
bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);//1024*32减去一个指针的大小
memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0])); //page指针数组初始化为0
bprm.file = file;
bprm.filename = filename;
bprm.sh_bang = 0;
bprm.loader = 0;
bprm.exec = 0;
if ((bprm.argc = count(argv, bprm.p / sizeof(void *))) < 0) {//对指针数组argv[]中参数的个数进行计数,而bprm.p / sizeof(void *)表示允许的最大值,由于agrv[]是在用户空间而不在系统空间
allow_write_access(file);
fput(file);
return bprm.argc;
}
if ((bprm.envc = count(envp, bprm.p / sizeof(void *))) < 0) {//同上
allow_write_access(file);
fput(file);
return bprm.envc;
}
retval = prepare_binprm(&bprm);//见下面的代码
if (retval < 0)
goto out;
retval = copy_strings_kernel(1, &bprm.filename, &bprm);//由于bprm.filename已经在系统空间了,所以copy_strings_kernel从系统空间中拷贝
if (retval < 0)
goto out;
bprm.exec = bprm.p;
retval = copy_strings(bprm.envc, envp, &bprm);//从用户空间拷贝,参考copy_strings_kernel
if (retval < 0)
goto out;
retval = copy_strings(bprm.argc, argv, &bprm);//从用户空间拷贝,参考copy_strings_kernel
if (retval < 0)
goto out;
retval = search_binary_handler(&bprm,regs);//已经从可执行文件头部读入了128个字节存放在bprm的缓冲区,而且运行所需的参数和环境变量也已经搜集在bprm中,现在就由formats队列中的成员逐个来认领,谁要识别到它代表的可执行文件格式,运行的时候就交给它
if (retval >= 0)
/* execve success */
return retval;
out:
/* Something went wrong, return the inode and free the argument pages*/
allow_write_access(bprm.file);
if (bprm.file)
fput(bprm.file);
for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
struct page * page = bprm.page[i];
if (page)
__free_page(page);
}
return retval;
}
struct linux_binprm{
char buf[BINPRM_BUF_SIZE];//128
struct page *page[MAX_ARG_PAGES];//32
unsigned long p; /* current top of mem */
int sh_bang;
struct file * file;
int e_uid, e_gid;
kernel_cap_t cap_inheritable, cap_permitted, cap_effective;
int argc, envc;
char * filename; /* Name of binary */
unsigned long loader, exec;
};
int prepare_binprm(struct linux_binprm *bprm)
{
int mode;
struct inode * inode = bprm->file->f_dentry->d_inode;
mode = inode->i_mode;
/* Huh? We had already checked for MAY_EXEC, WTF do we check this? */
if (!(mode & 0111)) /* with at least _one_ execute bit set */
return -EACCES;
if (bprm->file->f_op == NULL)
return -EACCES;
bprm->e_uid = current->euid;
bprm->e_gid = current->egid;
if(!IS_NOSUID(inode)) {
/* Set-uid? */
if (mode & S_ISUID)
bprm->e_uid = inode->i_uid;
/* Set-gid? */
/*
* If setgid is set but no group execute bit then this
* is a candidate for mandatory locking, not a setgid
* executable.
*/
if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))
bprm->e_gid = inode->i_gid;
}
/* We don't have VFS support for capabilities yet */
cap_clear(bprm->cap_inheritable);
cap_clear(bprm->cap_permitted);
cap_clear(bprm->cap_effective);
/* To support inheritance of root-permissions and suid-root
* executables under compatibility mode, we raise all three
* capability sets for the file.
*
* If only the real uid is 0, we only raise the inheritable
* and permitted sets of the executable file.
*/
if (!issecure(SECURE_NOROOT)) {
if (bprm->e_uid == 0 || current->uid == 0) {
cap_set_full(bprm->cap_inheritable);
cap_set_full(bprm->cap_permitted);
}
if (bprm->e_uid == 0)
cap_set_full(bprm->cap_effective);
}
memset(bprm->buf,0,BINPRM_BUF_SIZE);//从可执行文件中读入开头的128个字节到linux_binprm结构bprm中的缓冲区
return kernel_read(bprm->file,0,bprm->buf,BINPRM_BUF_SIZE);
}
copy_strings_kernel从系统空间拷贝,代码如下:
int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm)
{
int r;
mm_segment_t oldfs = get_fs();
set_fs(KERNEL_DS);
r = copy_strings(argc, argv, bprm);
set_fs(oldfs);
return r;
}
int copy_strings(int argc,char ** argv, struct linux_binprm *bprm)
{
while (argc-- > 0) {//参数的个数
char *str;
int len;
unsigned long pos;
if (get_user(str, argv+argc) || !str || !(len = strnlen_user(str, bprm->p)))
return -EFAULT;
if (bprm->p < len)
return -E2BIG;
bprm->p -= len;//堆栈的指针再减去参数的长度
/* XXX: add architecture specific overflow check here. */
pos = bprm->p;
while (len > 0) {
char *kaddr;
int i, new, err;
struct page *page;
int offset, bytes_to_copy;
offset = pos % PAGE_SIZE;//offset为在本页中的偏移
i = pos/PAGE_SIZE;//i为31,最后一页
page = bprm->page[i];
new = 0;
if (!page) {
page = alloc_page(GFP_HIGHUSER);//分配页面
bprm->page[i] = page;
if (!page)
return -ENOMEM;
new = 1;
}
kaddr = kmap(page);//得到页面低端地址
if (new && offset)//新分配的页面
memset(kaddr, 0, offset);//从页面低端地址到(页面低端地址 + offset)都清0
bytes_to_copy = PAGE_SIZE - offset;
if (bytes_to_copy > len) {
bytes_to_copy = len;
if (new)
memset(kaddr+offset+len, 0, PAGE_SIZE-offset-len);
}
err =