do_fork源码:
int do_fork(unsigned long clone_flags, unsigned long stack_start,
struct pt_regs *regs, unsigned long stack_size)
{
//clone_flags由2部分组成。低位部分由clone,fork,vfork传参。表示子进程死亡时向父进程发出的信号?
//高位部分是一些标记位,用以标记资源和特性的标记位,具体如下。
//#define CSIGNAL 0x000000ff /* signal mask to be sent at exit */
//#define CLONE_VM 0x00000100 /* set if VM shared between processes */
//#define CLONE_FS 0x00000200 /* set if fs info shared between processes */
//#define CLONE_FILES 0x00000400 /* set if open files shared between processes */
//#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */
//#define CLONE_PID 0x00001000 /* set if pid shared */
//#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */
//#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */
//#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */
//#define CLONE_THREAD 0x00010000 /* Same thread group? */
int retval;
struct task_struct *p;
struct completion vfork;
retval = -EPERM;//当clone_flags设置为CLONE_PID时返回-EPERM
/*
* CLONE_PID is only allowed for the initial SMP swapper
* calls
*/
if (clone_flags & CLONE_PID) {
if (current->pid)
goto fork_out;
}
retval = -ENOMEM; //当分配一个新的task_struct失败时返回-ENOMEM
p = alloc_task_struct();//分配两个连续的物理页,低1k用于存储task_struct。高7k作为系统空间堆栈。
if (!p)
goto fork_out;
*p = *current;
retval = -EAGAIN;
/*
* Check if we are over our maximum process limit, but be sure to
* exclude root. This is needed to make it possible for login and
* friends to set the per-user process limit to something lower
* than the amount of processes root is running. -- Rik
*/
//p->user->processes表示用户当前进程数,p->rlim[RLIMIT_NPROC].rlim_cur表示当前用户允许运行的最大进程数。
//capable检察权限。也就是说这里只对没有CAP_SYS_ADMIN和CAP_SYS_RESOURCE且数额超限的情况跳转到bad_fork_free
if (atomic_read(&p->user->processes) >= p->rlim[RLIMIT_NPROC].rlim_cur
&& !capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE))
goto bad_fork_free;
atomic_inc(&p->user->__count);//user->__count是struct user_struct的引用计数
atomic_inc(&p->user->processes);//user->processes表示该用户拥有的进程数
/*
* Counter increases are protected by
* the kernel lock so nr_threads can't
* increase under us (but it may decrease).
*/
//针对于不属于任何用户的内核线程而设置的检验
if (nr_threads >= max_threads)
goto bad_fork_cleanup_count;
get_exec_domain(p->exec_domain);//检查进程的执行域,驱动模块可拆卸
if (p->binfmt && p->binfmt->module)//文件格式模块的可拆卸
__MOD_INC_USE_COUNT(p->binfmt->module);//如果使用了某个模块,则这个模块的计数加一。计数为0卸载这一模块
p->did_exec = 0; //不知道是干啥的
p->swappable = 0; //不知道是干啥的
p->state = TASK_UNINTERRUPTIBLE;//因为get_pid是独占操作,所以预先置为睡眠状态
copy_flags(clone_flags, p);//写入p->flags
p->pid = get_pid(clone_flags);//根据clone_pid值返回一个pid
p->run_list.next = NULL;
p->run_list.prev = NULL;
p->p_cptr = NULL;
init_waitqueue_head(&p->wait_chldexit);//初始化新创建的进程的等待子进程的队列(子进程先运行,父进程等待子进程)
p->vfork_done = NULL;
if (clone_flags & CLONE_VFORK) {//是否设置了由子进程唤醒?
p->vfork_done = &vfork;
init_completion(&vfork);
}
spin_lock_init(&p->alloc_lock);//初始化自旋锁
p->sigpending = 0;//初始化新进程的待处理信号队列?
init_sigpending(&p->pending);//struct sigpending定义在signal.h中?
//it_real_value,it_real_incr用于REAL定时器,单位为jiffies。系统根据it_real_value
//设置定时器的第一个终止时间。在定时器到期时,向进程发送SIGALRM信号,同时根据it_real_incr重置终止时间
//it_prof_value,it_prof_incr用于Profile定时器,单位为jiffies。当进程运行时,不管在何种状态下,每个tick都使
//it_prof_value值减一,当减到0时,向进程发送信号SIGPROF,并根据it_prof_incr重置时间
//it_virt_value,it_virt_value用于Virtual定时器,单位为jiffies。当进程运行时,不管在何种状态下,每个tick都使
//it_virt_value值减一,当减到0时,向进程发送信号SIGVTALRM,根据it_virt_incr重置初值。
//Real定时器根据系统时间实时更新,不管进程是否在运行
//Virtual定时器只在进程运行时,根据进程在用户态消耗的时间更新
//Profile定时器在进程运行时,根据进程消耗的时(不管在用户态还是内核态)更新
//初始化定时器
p->it_real_value = p->it_virt_value = p->it_prof_value = 0;
p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0;
init_timer(&p->real_timer);
p->real_timer.data = (unsigned long) p;
//会话首领
p->leader = 0; /* session leadership doesn't inherit */
//tty_old_pgrp和pgrp的区别?
/*******进程控制终端所在的组标识********/
p->tty_old_pgrp = 0;
//tms_utime记录的是进程执行用户代码的时间
//tms_stime记录的是进程执行内核代码的时间
//tms_cutime记录的是子进程执行用户代码的时间
//tms_cstime记录的是子进程执行内核代码的时间
p->times.tms_utime = p->times.tms_stime = 0;
p->times.tms_cutime = p->times.tms_cstime = 0;
#ifdef CONFIG_SMP//是否有多cpu
{
int i;
p->cpus_runnable = ~0UL;
p->processor = current->processor;
/* ?? should we just memset this ?? */
for(i = 0; i < smp_num_cpus; i++)
p->per_cpu_utime[i] = p->per_cpu_stime[i] = 0;
spin_lock_init(&p->sigmask_lock);
}
#endif
p->lock_depth = -1; /* -1 = no lock */
p->start_time = jiffies;//进程创建时间赋值为当前时间
INIT_LIST_HEAD(&p->local_pages);//local_pages表示属于该进程的局部页面形成一个双向链表?这里是初始化循环链表。
retval = -ENOMEM;
/* copy all the process information */
if (copy_files(clone_flags, p))//有条件复制已打开文件的控制结构
goto bad_fork_cleanup;
if (copy_fs(clone_flags, p))//也与文件系统相关
goto bad_fork_cleanup_files;
if (copy_sighand(clone_flags, p))//复制对信号的处理方式
goto bad_fork_cleanup_fs;
if (copy_mm(clone_flags, p))//复制用户空间
goto bad_fork_cleanup_sighand;
retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);//复制系统空间堆栈
if (retval)
goto bad_fork_cleanup_mm;
p->semundo = NULL;
/* Our parent execution domain becomes current domain
These must match for thread signalling to apply */
p->parent_exec_id = p->self_exec_id;
/* ok, now we should be set up.. */
p->swappable = 1;
p->exit_signal = clone_flags & CSIGNAL;
p->pdeath_signal = 0;
/*
* "share" dynamic priority between parent and child, thus the
* total amount of dynamic priorities in the system doesnt change,
* more scheduling fairness. This is only important in the first
* timeslice, on the long run the scheduling behaviour is unchanged.
*/
p->counter = (current->counter + 1) >> 1;
current->counter >>= 1;
if (!current->counter)
current->need_resched = 1;
/*
* Ok, add it to the run-queues and make it
* visible to the rest of the system.
*
* Let it rip!
*/
retval = p->pid;
p->tgid = retval;
INIT_LIST_HEAD(&p->thread_group);
/* Need tasklist lock for parent etc handling! */
write_lock_irq(&tasklist_lock);
/* CLONE_PARENT and CLONE_THREAD re-use the old parent */
p->p_opptr = current->p_opptr;
p->p_pptr = current->p_pptr;
if (!(clone_flags & (CLONE_PARENT | CLONE_THREAD))) {
p->p_opptr = current;
if (!(p->ptrace & PT_PTRACED))
p->p_pptr = current;
}
if (clone_flags & CLONE_THREAD) {
p->tgid = current->tgid;
list_add(&p->thread_group, ¤t->thread_group);
}
SET_LINKS(p);
hash_pid(p);
nr_threads++;
write_unlock_irq(&tasklist_lock);
if (p->ptrace & PT_PTRACED)
send_sig(SIGSTOP, p, 1);
wake_up_process(p); /* do this last */
++total_forks;
if (clone_flags & CLONE_VFORK)
wait_for_completion(&vfork);
fork_out:
return retval;
bad_fork_cleanup_mm:
exit_mm(p);
bad_fork_cleanup_sighand:
exit_sighand(p);
bad_fork_cleanup_fs:
exit_fs(p); /* blocking */
bad_fork_cleanup_files:
exit_files(p); /* blocking */
bad_fork_cleanup:
put_exec_domain(p->exec_domain);
if (p->binfmt && p->binfmt->module)
__MOD_DEC_USE_COUNT(p->binfmt->module);
bad_fork_cleanup_count:
atomic_dec(&p->user->processes);
free_uid(p->user);
bad_fork_free:
free_task_struct(p);
goto fork_out;
}