/*
* This creates a new process as a copy of the old one,
* but does not actually start it yet.
*
* It copies the registers, and all the appropriate
* parts of the process environment (as per the clone
* flags). The actual kick-off is left to the caller.
*/
static task_t *copy_process(unsigned long clone_flags,
unsigned long stack_start,
struct pt_regs *regs,
unsigned long stack_size,
int __user *parent_tidptr,
int __user *child_tidptr,
int pid)
{ /*copy_process()的第一个参数是clone_flags,这个参数一开始是由sys_fork()或sys_clone()或是sys_vfork()传进来的.*/
int retval;
struct task_struct *p = NULL;
/*检查clone_flags具体的是什么*/
if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
return ERR_PTR(-EINVAL);
/*
* Thread groups must share signals as well, and detached threads
* can only be started up within the thread group.
*/
if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
return ERR_PTR(-EINVAL);
/*
* Shared signal handlers imply shared VM. By way of the above,
* thread groups also imply shared VM. Blocking this case allows
* for various simplifications in other code.
*/
if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
return ERR_PTR(-EINVAL);
/*检查参数clone_flags所传递的一致性.如果1.CLONE_NEWNS和CLONE_FS都被设。2.CLONE_THREAD被置位,但是CLONE_SIGHAND标志被清0(也就是说同一线程组的轻量级进程必须共享信号)3.CLONE_SIGHAND标志被设置,但CLONE_VM被清0(也就是要求共享信//号处理程序的轻量级进程必须共享内存描述符)。以上的三种情况会返回错误代码*/
retval = security_task_create(clone_flags);
/*安全性检查,2.6内核的改进*/
if (retval)
goto fork_out;
retval = -ENOMEM;
p = dup_task_struct(current);
/*接拷贝当前进程的task_t和thread_info到子进程 */
if (!p)
goto fork_out;
retval = -EAGAIN;
if (atomic_read(&p->user->processes) >=
p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
p->user != &root_user)
goto bad_fork_free;
}
/*获得用户所拥有的进程数*/
atomic_inc(&p->user->__count);
atomic_inc(&p->user->processes);
get_group_info(p->group_info);
/*如果系统的进程数量(nr_threads)大于了系统所能拥有的最大进程数目值时,失败。而系统所能容纳的最大进程数取决于系统内存的大小,32位机器上,每个进程所占的页面大小理论上为2^32,不过一般来说内村栈和线程信息描述符所占用的空间不超过物理内存的1/8,不过可以修改/proc/sys/kernel/threads-max文件来改变这个值*/
if (nr_threads >= max_threads)
goto bad_fork_cleanup_count;
if (!try_module_get(task_thread_info(p)->exec_domain->module))
goto bad_fork_cleanup_count;
if (p->binfmt && !try_module_get(p->binfmt->module))
goto bad_fork_cleanup_put_domain;
/*子进程还在进行初始化,没有execve,因此次数置为0*/
p->did_exec = 0;
//copy父进程的所有标志
copy_flags(clone_flags, p);
p->pid = pid;//子进程号
retval = -EFAULT;
//如果clone_flags参数中的CLONE_PARENT_SETTID被设置了
if (clone_flags & CLONE_PARENT_SETTID)
//把子进程的PID复制到parent_tidptr所指向的用户态变量
if (put_user(p->pid, parent_tidptr))
goto bad_fork_cleanup;
p->proc_dentry = NULL;
INIT_LIST_HEAD(&p->children);//初始化几个链表(表头指向自己)
INIT_LIST_HEAD(&p->sibling);
p->vfork_done = NULL;
spin_lock_init(&p->alloc_lock);
spin_lock_init(&p->proc_lock);
clear_tsk_thread_flag(p, TIF_SIGPENDING);
init_sigpending(&p->pending);
//初始化时间信息
p->utime = cputime_zero;
p->stime = cputime_zero;
p->sched_time = 0;
p->rchar = 0; /* I/O counter: bytes read */
p->wchar = 0; /* I/O counter: bytes written */
p->syscr = 0; /* I/O counter: read syscalls */
p->syscw = 0; /* I/O counter: write syscalls */
acct_clear_integrals(p);
p->it_virt_expires = cputime_zero;
p->it_prof_expires = cputime_zero;
p->it_sched_expires = 0;
INIT_LIST_HEAD(&p->cpu_timers[0]);
INIT_LIST_HEAD(&p->cpu_timers[1]);
INIT_LIST_HEAD(&p->cpu_timers[2]);
p->lock_depth = -1; /* -1 = no lock */
do_posix_clock_monotonic_gettime(&p->start_time);
p->security = NULL;
p->io_context = NULL;
p->io_wait = NULL;
p->audit_context = NULL;
#ifdef CONFIG_NUMA
p->mempolicy = mpol_copy(p->mempolicy);
if (IS_ERR(p->mempolicy)) {
retval = PTR_ERR(p->mempolicy);
p->mempolicy = NULL;
goto bad_fork_cleanup;
}
#endif
p->tgid = p->pid;
//拷贝其他信息
if (clone_flags & CLONE_THREAD)
p->tgid = current->tgid;
//调用一些copy_*函数来创建新的数据结构,并把父进程的相应的数据结构的值复制到新的数据结构中
if ((retval = security_task_alloc(p)))
goto bad_fork_cleanup_policy;
if ((retval = audit_alloc(p)))
goto bad_fork_cleanup_security;
/* copy all the process information */
if ((retval = copy_semundo(clone_flags, p)))
goto bad_fork_cleanup_audit;
if ((retval = copy_files(clone_flags, p)))
goto bad_fork_cleanup_semundo;
if ((retval = copy_fs(clone_flags, p)))
goto bad_fork_cleanup_files;
if ((retval = copy_sighand(clone_flags, p)))
goto bad_fork_cleanup_fs;
if ((retval = copy_signal(clone_flags, p)))
goto bad_fork_cleanup_sighand;
if ((retval = copy_mm(clone_flags, p)))
goto bad_fork_cleanup_signal;
if ((retval = copy_keys(clone_flags, p)))
goto bad_fork_cleanup_mm;
if ((retval = copy_namespace(clone_flags, p)))
goto bad_fork_cleanup_keys;
/*调用copy_thread,并发出clone()系统调用时CPU的值,这些值在父进程的内核栈中,这样就初始化了子进程的内核栈*/
retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
if (retval)
goto bad_fork_cleanup_namespace;
/*copy_thread后,它把eax寄存器对应字段的值置为0,这也是clone(),fork(),vfork()的对应子进程返回值。所以我们看到的上面三个系统调用对与子进程来说返为0,而父进程的内核栈中对应的值被置为子进程的ID号*/
p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
/*
* Clear TID on mm_release()?
*/
p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
/*清空子进程的thread_info结构的TIF_SYSCALL_TRACE标志,目的是为了使ret_from_fork()函数不会把系统调用结束的消息通知给调试进程,其中ret_from_fork()的地址在上面调用copy_thread()时已经保存到了子进程thread.eip中了,那么当fork()结束的时候子进程就会根据自己的eip来ret_from_fork,这就是所谓的地址转移*/
#ifdef TIF_SYSCALL_EMU
clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
#endif
/* Our parent execution domain becomes current domain
These must match for thread signalling to apply */
p->parent_exec_id = p->self_exec_id;
/*初始化p->exit_signal,如果CLONE_THREAD标志被置位了,就把exit_signal置为-1*/
p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);
p->pdeath_signal = 0;
p->exit_state = 0;
/*
* Ok, make it visible to the rest of the system.
* We dont wake it up yet.
*/
p->group_leader = p;
INIT_LIST_HEAD(&p->ptrace_children);
INIT_LIST_HEAD(&p->ptrace_list);
/*完成对新进程调度程序数据结构的初始化,它把新进程的状态置为TASK_RUNNING,分配时间片,并且当父进程只有唯一的时间片用于分配时调用scheduler_tick())*/
sched_fork(p, clone_flags);
//为父进程锁住任务列表
write_lock_irq(&tasklist_lock);
/*
* The task hasn't been attached yet, so its cpus_allowed mask will
* not be changed, nor will its assigned CPU.
*
* The cpus_allowed mask of the parent may have changed after it was
* copied first time - so re-copy it here, then check the child's CPU
* to ensure it is on a valid CPU (and if not, just force it back to
* parent's CPU). This avoids alot of nasty races.
*/
p->cpus_allowed = current->cpus_allowed;
if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
!cpu_online(task_cpu(p))))
/*把新进程的thread_info结构中的cpu子段设置为本地CPU号*/
set_task_cpu(p, smp_processor_id());
/*
* Check for pending SIGKILL! The new thread should not be allowed
* to slip out of an OOM kill. (or normal SIGKILL.)
*/
if (sigismember(¤t->pending.signal, SIGKILL)) {
write_unlock_irq(&tasklist_lock);
retval = -EINTR;
goto bad_fork_cleanup_namespace;
}
/* 初始化表示亲子关系的字段,特别是CLONE_PARENT||CLONE_THREAD被设置了,就用current->real_parent初始化p->real_parent和p->parent*/
if (clone_flags & (CLONE_PARENT|CLONE_THREAD))
p->real_parent = current->real_parent;
else
p->real_parent = current;
p->parent = p->real_parent;
if (clone_flags & CLONE_THREAD) {
spin_lock(¤t->sighand->siglock);
/*
* Important: if an exit-all has been started then
* do not create this new thread - the whole thread
* group is supposed to exit anyway.
*/
if (current->signal->flags & SIGNAL_GROUP_EXIT) {
spin_unlock(¤t->sighand->siglock);
write_unlock_irq(&tasklist_lock);
retval = -EAGAIN;
goto bad_fork_cleanup_namespace;
}
p->group_leader = current->group_leader;/* 设置p->group_leader初值*/
if (current->signal->group_stop_count > 0) {
/*
* There is an all-stop in progress for the group.
* We ourselves will stop as soon as we check signals.
* Make the new thread part of that group stop too.
*/
current->signal->group_stop_count++;
set_tsk_thread_flag(p, TIF_SIGPENDING);
}
if (!cputime_eq(current->signal->it_virt_expires,
cputime_zero) ||
!cputime_eq(current->signal->it_prof_expires,
cputime_zero) ||
current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY ||
!list_empty(¤t->signal->cpu_timers[0]) ||
!list_empty(¤t->signal->cpu_timers[1]) ||
!list_empty(¤t->signal->cpu_timers[2])) {
/*
* Have child wake up on its first tick to check
* for process CPU timers.
*/
p->it_prof_expires = jiffies_to_cputime(1);
}
spin_unlock(¤t->sighand->siglock);
}
/*
* inherit ioprio
*/
p->ioprio = current->ioprio;
SET_LINKS(p);
/*下面就是有关跟踪的设置了*/
if (unlikely(p->ptrace & PT_PTRACED))
__ptrace_link(p, current->parent);
attach_pid(p, PIDTYPE_PID, p->pid);
attach_pid(p, PIDTYPE_TGID, p->tgid);
if (thread_group_leader(p)) {
attach_pid(p, PIDTYPE_PGID, process_group(p));
attach_pid(p, PIDTYPE_SID, p->signal->session);
if (p->pid)
__get_cpu_var(process_counts)++;
}
if (!current->signal->tty && p->signal->tty)
p->signal->tty = NULL;
nr_threads++;
total_forks++;
write_unlock_irq(&tasklist_lock);
proc_fork_connector(p);
cpuset_fork(p);
retval = 0;
fork_out:
if (retval)
return ERR_PTR(retval);
return p;
bad_fork_cleanup_namespace:
exit_namespace(p);
bad_fork_cleanup_keys:
exit_keys(p);
bad_fork_cleanup_mm:
if (p->mm)
mmput(p->mm);
bad_fork_cleanup_signal:
exit_signal(p);
bad_fork_cleanup_sighand:
exit_sighand(p);
bad_fork_cleanup_fs:
exit_fs(p); /* blocking */
bad_fork_cleanup_files:
exit_files(p); /* blocking */
bad_fork_cleanup_semundo:
exit_sem(p);
bad_fork_cleanup_audit:
audit_free(p);
bad_fork_cleanup_security:
security_task_free(p);
bad_fork_cleanup_policy:
#ifdef CONFIG_NUMA
mpol_free(p->mempolicy);
#endif
bad_fork_cleanup:
if (p->binfmt)
module_put(p->binfmt->module);
bad_fork_cleanup_put_domain:
module_put(task_thread_info(p)->exec_domain->module);
bad_fork_cleanup_count:
put_group_info(p->group_info);
atomic_dec(&p->user->processes);
free_uid(p->user);
bad_fork_free:
free_task(p);
goto fork_out;
}