Linux中关于进程创建的主要函数有fork,vfork,clone,他们的实现都是有do_fork来完成的,只是传入的参数有差别,do_fork中调用函数copy_process从父进程中复制相关内容到子进程,其中这个复制量的确定是根据传入参数flag来确定是否需要重新申请内存还是共享父进程的资源,下面对具体的代码进行分析。
/*唯一使用的标志是SIGCHLD。这意味着在子进程终止后
发送SIGCHLD信号通知父进程。
*/
int sys_fork(struct pt_regs *regs)
{
return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
}
/*
* This is trivial, and on the face of it looks like it
* could equally well be done in user mode.
*
* Not so, for quite unobvious reasons - register pressure.
* In user mode vfork() cannot have a stack frame, and if
* done by calling the "clone()" system call directly, you
* do not have enough call-clobbered registers to hold all
* the information you need.
*/
int sys_vfork(struct pt_regs *regs)
{
return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
NULL, NULL);
}
/*可以看到,参数的设置取决于用户的设置*/
int sys_clone(struct pt_regs *regs)
{
unsigned long clone_flags;
unsigned long newsp;
int __user *parent_tidptr, *child_tidptr;
clone_flags = regs->bx;
newsp = regs->cx;
parent_tidptr = (int __user *)regs->dx;
child_tidptr = (int __user *)regs->di;
if (!newsp)
newsp = regs->sp;
return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr);
}
下面是具体的do_fork函数
/*
* Ok, this is the main fork-routine.
*
* It copies the process, and if successful kick-starts
* it and waits for it to finish using the VM if required.
*/
/*clone_flags是一个标志集合,用来指定控制复制过程的一些属性。
最低字节指定了在子进程终止时被发给父进程的信号号码。其余的高位
字节保存了各种常数,下文会分别讨论。
start_stack是用户状态下栈的起始地址。
regs是一个指向寄存器集合的指针,其中以原始形式保存了调用参数。
该参数使用的数据类型是特定于体系结构的struct pt_regs,其中按照
系统调用执行时寄存器在内核栈上的存储顺序,保存了所有的寄存器
(更详细的信息,请参考附录A)。
stack_size是用户状态下栈的大小。该参数通常是不必要的,设置为0。
parent_tidptr和child_tidptr是指向用户空间中地址的两个指针
,分别指向父子进程的TID。NPTL(Native Posix Threads Library)
库的线程实现需要这两个参数。
*/
long do_fork(unsigned long clone_flags,
unsigned long stack_start,
struct pt_regs *regs,
unsigned long stack_size,
int __user *parent_tidptr,
int __user *child_tidptr)
{
struct task_struct *p;
int trace = 0;
long nr;
/*
* Do some preliminary argument and permissions checking before we
* actually start allocating stuff
*/
/*下面是标志组合的正确性检查*/
if (clone_flags & CLONE_NEWUSER) {
if (clone_flags & CLONE_THREAD)
return -EINVAL;
/* hopefully this check will go away when userns support is
* complete
*//*权限*/
if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
!capable(CAP_SETGID))
return -EPERM;
}
/*
* We hope to recycle these flags after 2.6.26
*/
/*如果设置了CLONE_STOPPED标志,进程内核打印
输出创建标志信息*/
if (unlikely(clone_flags & CLONE_STOPPED)) {
static int __read_mostly count = 100;
/*函数printk_ratelimit为打印控制,具体会在后面做分析*/
if (count > 0 && printk_ratelimit()) {
char comm[TASK_COMM_LEN];
count--;
printk(KERN_INFO "fork(): process `%s' used deprecated "
"clone flags 0x%lx\n",
get_task_comm(comm, current),
clone_flags & CLONE_STOPPED);
}
}
/*
* When called from kernel_thread, don't do user tracing stuff.
*/
/*whether a register set came from user mode.*/
if (likely(user_mode(regs)))
trace = tracehook_prepare_clone(clone_flags);/*Its return value will be passed to tracehook_finish_clone().*/
/*复制一个进程,并对相应标志位进行设定,参见后面的详细介绍
*/
p = copy_process(clone_flags, stack_start, regs, stack_size,
child_tidptr, NULL, trace);
/*
* Do this prior waking up the new thread - the thread pointer
* might get invalid after that point, if the thread exits quickly.
*/
if (!IS_ERR(p)) {/*判断p的有效性*/
struct completion vfork;
/*???*/
trace_sched_process_fork(current, p);
nr = task_pid_vnr(p);/*返回进程p的命名空间中的pid*/
/*Write a simple value into user space*/
if (clone_flags & CLONE_PARENT_SETTID)/* if set the TID in the parent */
put_user(nr, parent_tidptr);/*将nr放到parent_tidptr地址中
此地址为用户空间中的*/
/*如果设置了这个标志,将初始化进程中的
completion结构*/
if (clone_flags & CLONE_VFORK) {
p->vfork_done = &vfork;
init_completion(&vfork);
}
/*audit context内容的复制,需要该变量空间已经存在*/
audit_finish_fork(p);
/*如果进子进程被跟踪则发送SIGSTOP信号.
由于子进程现在还没有运行,信号不能被处理
所以设置TIF_SIGPENDING标志*/
tracehook_report_clone(regs, clone_flags, nr, p);
/*
* We set PF_STARTING at creation in case tracing wants to
* use this to distinguish a fully live task from one that
* hasn't gotten to tracehook_report_clone() yet. Now we
* clear it and set the child going.
*/
p->flags &= ~PF_STARTING;
/*如果子进程初始化成STOP状态
则发送SIGSTOP信号.由于子进程现在还没有运行,信号不能被处理
所以设置TIF_SIGPENDING标志*/
if (unlikely(clone_flags & CLONE_STOPPED)) {
/*
* We'll start up with an immediate SIGSTOP.
*/
sigaddset(&p->pending.signal, SIGSTOP);
set_tsk_thread_flag(p, TIF_SIGPENDING);
__set_task_state(p, TASK_STOPPED);
} else {/*进入运行队列,调用调度类的唤醒函数,后面详细介绍*/
wake_up_new_task(p, clone_flags);
}
tracehook_report_clone_complete(trace, regs,
clone_flags, nr, p);
/*如果定义了CLONE_VFORK标志.则将当前进程投入睡眠*/
if (clone_flags & CLONE_VFORK) {
freezer_do_not_count();
wait_for_completion(&vfork);
freezer_count();
tracehook_report_vfork_done(p, nr);
}
} else {
nr = PTR_ERR(p);
}
return nr;
}
对于设置了CLONE_VFORK标志的,调用下面函数
/**
* init_completion: - Initialize a dynamically allocated completion
* @x: completion structure that is to be initialized
*
* This inline function will initialize a dynamically created completion
* structure.
*/
static inline void init_completion(struct completion *x)
{ /*done标志为0。表示子进程还没有将父进程唤醒*/
x->done = 0;
/*初始化一个等待队列*/
init_waitqueue_head(&x->wait);
}
和下面的代码呼应
/*如果定义了CLONE_VFORK标志.则将当前进程投入睡眠*/
if (clone_flags & CLONE_VFORK) {
freezer_do_not_count();
wait_for_completion(&vfork);
freezer_count();
tracehook_report_vfork_done(p, nr);
}
唤醒函数,将进程入运行队列
/*
* wake_up_new_task - wake up a newly created task for the first time.
*
* This function will do some initial scheduler statistics housekeeping
* that must be done for every newly created context, then puts the task
* on the runqueue and wakes it.
*/
void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
{
unsigned long flags;
struct rq *rq;
rq = task_rq_lock(p, &flags);/*获得指定task的rq*/
BUG_ON(p->state != TASK_RUNNING);
update_rq_clock(rq);/*更新rq的时钟计时*/
if (!p->sched_class->task_new || !current->se.on_rq) {
activate_task(rq, p, 0);/*进入运行队列*/
} else {
/*
* Let the scheduling class do new task startup
* management (if any):
*/
p->sched_class->task_new(rq, p);
inc_nr_running(rq);
}
trace_sched_wakeup_new(rq, p, 1);
/*用以決定一個Task是否可以中斷目前正在
運作的Task,取得執行權.*/
check_preempt_curr(rq, p, WF_FORK);
#ifdef CONFIG_SMP
if (p->sched_class->task_wake_up)
p->sched_class->task_wake_up(rq, p);
#endif
task_rq_unlock(rq, &flags);
}
对于copy_process函数比较繁琐,也是do_fork主要执行函数,完成进程资源的复制,根据相关的标志位,有的资源需要和父进程共享,具体下一篇我们会看到。