六: wait4 ()系统调用
在父进程中,用wait4()可以获得子进程的退出状态,并且防止在父进程退出前,子进程退出造成僵死 状态,
关于wait4()在用户空间的调用方式可以自行参考相关资料,在这里只是讨论内核对这个系统调用的实 现过程。
Wait4()的系统调用入口为sys_wait4().代码如下所示:
asmlinkage long sys_wait4(pid_t pid, int __user *stat_addr,
int options, struct rusage __user *ru)
{
long ret;
//options的标志为须为WNOHANG…__WALL的组合,否则会出错
//相关标志的作用在do_wait()中再进行分析
if (options & ~(WNOHANG|WUNTRACED|WCONTINUED|
__WNOTHREAD|__WCLONE|__WALL))
return -EINVAL;
ret = do_wait(pid, options | WEXITED, NULL, stat_addr, ru);
/* avoid REGPARM breakage on x86: */
prevent_tail_call(ret);
return ret;
}
do_wait()是其中的核心处理函数。代码如下:
static long do_wait(pid_t pid, int options, struct siginfo __user *infop,
int __user *stat_addr, struct rusage __user *ru)
{
//初始化一个等待队列
DECLARE_WAITQUEUE(wait, current);
struct task_struct *tsk;
int flag, retval;
int allowed, denied;
//将当前进程加入等待队列,子进程退出给父进程发送信号会wake up些等待队列
add_wait_queue(¤t->signal->wait_chldexit,&wait);
repeat:
flag = 0;
allowed = denied = 0;
//设置进程状态为TASK_INTERRUPTIBLE.下次调度必须要等到子进程唤醒才可以了
current->state = TASK_INTERRUPTIBLE;
read_lock(&tasklist_lock);
tsk = current;
do {
struct task_struct *p;
struct list_head *_p;
int ret;
//遍历进程下的子进程
list_for_each(_p,&tsk->children) {
p = list_entry(_p, struct task_struct, sibling);
//判断是否是我们要wait 的子进程
ret = eligible_child(pid, options, p);
if (!ret)
continue;
if (unlikely(ret < 0)) {
denied = ret;
continue;
}
allowed = 1;
switch (p->state) {
//子进程为TASK_TRACED.即处于跟踪状态。则取子进程的相关信息
case TASK_TRACED:
flag = 1;
//判断是否是被父进程跟踪的子进程
//如果是则返回1..不是返回0
if (!my_ptrace_child(p))
continue;
/*FALLTHROUGH*/
case TASK_STOPPED:
flag = 1;
//WUNTRACED:子进程是停止的,也马上返回
//没有定义WUNTRACED 参数.继续遍历子进程
/*从此看出.生父进程是不会处理STOP状态的子进程的.只有
发起跟踪的进程才会
*/
if (!(options & WUNTRACED) &&
!my_ptrace_child(p))
continue;
//WNOWAIT:不会将zombie子进程的退出状态撤销
//下次调用wait系列函数的时候还可以继续获得这个退出状态
retval = wait_task_stopped(p, ret == 2,
(options & WNOWAIT),
infop,
stat_addr, ru);
if (retval == -EAGAIN)
goto repeat;
if (retval != 0) /* He released the lock. */
goto end;
break;
default:
// case EXIT_DEAD:
//不需要处理DEAD状态
if (p->exit_state == EXIT_DEAD)
continue;
// case EXIT_ZOMBIE:
//子进程为僵尸状态
if (p->exit_state == EXIT_ZOMBIE) {
if (ret == 2)
goto check_continued;
if (!likely(options & WEXITED))
continue;
retval = wait_task_zombie(
p, (options & WNOWAIT),
infop, stat_addr, ru);
/* He released the lock. */
if (retval != 0)
goto end;
break;
}
check_continued:
/*
* It's running now, so it might later
* exit, stop, or stop and then continue.
*/
flag = 1;
//WCONTINUED:报告任何继续运行的指定进程号的子进程的状态
if (!unlikely(options & WCONTINUED))
continue;
//取进程的相关状态
retval = wait_task_continued(
p, (options & WNOWAIT),
infop, stat_addr, ru);
if (retval != 0) /* He released the lock. */
goto end;
break;
}
}
//遍历被跟踪出去的子进程
//从这里可以看出.如果一个子进程被跟踪出去了.那么子进程的退出
//操作并不是由生父进程进行了
if (!flag) {
list_for_each(_p, &tsk->ptrace_children) {
p = list_entry(_p, struct task_struct,
ptrace_list);
if (!eligible_child(pid, options, p))
continue;
flag = 1;
break;
}
}
if (options & __WNOTHREAD)
break;
//也有可能是进程中的线程在wait其fork出来的子进程
tsk = next_thread(tsk);
BUG_ON(tsk->signal != current->signal);
} while (tsk != current);
//
read_unlock(&tasklist_lock);
if (flag) {
retval = 0;
//如果定义了WHNOHANG:马上退出
if (options & WNOHANG)
goto end;
retval = -ERESTARTSYS;
if (signal_pending(current))
goto end;
schedule();
goto repeat;
}
retval = -ECHILD;
if (unlikely(denied) && !allowed)
retval = denied;
end:
//将进程设为运行状态,从等待队列中移除
current->state = TASK_RUNNING;
remove_wait_queue(¤t->signal->wait_chldexit,&wait);
if (infop) {
if (retval > 0)
retval = 0;
else {
/*
* For a WNOHANG return, clear out all the fields
* we would set so the user can easily tell the
* difference.
*/
if (!retval)
retval = put_user(0, &infop->si_signo);
if (!retval)
retval = put_user(0, &infop->si_errno);
if (!retval)
retval = put_user(0, &infop->si_code);
if (!retval)
retval = put_user(0, &infop->si_pid);
if (!retval)
retval = put_user(0, &infop->si_uid);
if (!retval)
retval = put_user(0, &infop->si_status);
}
}
return retval;
}
这代段码还是比较简单。先遍历进程的子进程,再遍历被跟踪出去的进程,再遍历线程中的线程。我 们分析一下里面用到的几个重要的子函数。
eligible_child()用来判断子进程是否是我们想要wait的子进程.代码如下:
static int eligible_child(pid_t pid, int options, struct task_struct *p)
{
int err;
//根据PID判断是不是我们要wait的子进程
//pid >0:等待的子程程的进程号等于pid
//pid = 0:等待进程组号等于当前进程组号的所有子进程
//pid < -1 :等待任何进程组号等于pid绝对值的子进程
//pid == -1 :等待任何子进程
if (pid > 0) {
if (p->pid != pid)
return 0;
} else if (!pid) {
if (process_group(p) != process_group(current))
return 0;
} else if (pid != -1) {
if (process_group(p) != -pid)
return 0;
}
//如果子进程exit_signal ==-1且没有被跟踪.那不会对子进程进行回收
if (p->exit_signal == -1 && !p->ptrace)
return 0;
if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0))
&& !(options & __WALL))
return 0;
/*
* Do not consider thread group leaders that are
* in a non-empty thread group:
*/
//如果子进程是进程组leader,且进程组不为空
if (delay_group_leader(p))
return 2;
err = security_task_wait(p);
if (err)
return err;
return 1;
}
对TASK_TRACED和TASK_STOPPED状态的子进程操作是在wait_task_stopped()中完成的。它的代码如下:
static int wait_task_stopped(struct task_struct *p, int delayed_group_leader,
int noreap, struct siginfo __user *infop,
int __user *stat_addr, struct rusage __user *ru)
{
int retval, exit_code;
//进程退出状态码为零.没有相关退出信息
if (!p->exit_code)
return 0;
//
if (delayed_group_leader && !(p->ptrace & PT_PTRACED) &&
p->signal && p->signal->group_stop_count > 0)
return 0;
//正在取task里面的信息,为了防止意外释放,先增加它的引用计数
get_task_struct(p);
read_unlock(&tasklist_lock);
//如果WNOWAIT 被定义
if (unlikely(noreap)) {
pid_t pid = p->pid;
uid_t uid = p->uid;
int why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED;
exit_code = p->exit_code;
//退出状态码为零,但是过程已经处于退出状态中(僵尸或者是死进程)
if (unlikely(!exit_code) || unlikely(p->exit_state))
goto bail_ref;
//把子进程的各项信息保存起来
//返回值是退出子进程的PID
return wait_noreap_copyout(p, pid, uid,
why, exit_code,
infop, ru);
}
write_lock_irq(&tasklist_lock);
//如果子进程没有退出.只要取子进程的退出信息,再清除子进程的退出信息
//即可
exit_code = xchg(&p->exit_code, 0);
if (unlikely(p->exit_state)) {
p->exit_code = exit_code;
exit_code = 0;
}
if (unlikely(exit_code == 0)) {
write_unlock_irq(&tasklist_lock);
bail_ref:
put_task_struct(p);
return -EAGAIN;
}
//将子进程加到父进程子链表的末尾
remove_parent(p);
add_parent(p);
write_unlock_irq(&tasklist_lock);
//收集相关的信息
retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
if (!retval && stat_addr)
retval = put_user((exit_code << 8) | 0x7f, stat_addr);
if (!retval && infop)
retval = put_user(SIGCHLD, &infop->si_signo);
if (!retval && infop)
retval = put_user(0, &infop->si_errno);
if (!retval && infop)
retval = put_user((short)((p->ptrace & PT_PTRACED)
? CLD_TRAPPED : CLD_STOPPED),
&infop->si_code);
if (!retval && infop)
retval = put_user(exit_code, &infop->si_status);
if (!retval && infop)
retval = put_user(p->pid, &infop->si_pid);
if (!retval && infop)
retval = put_user(p->uid, &infop->si_uid);
if (!retval)
retval = p->pid;
//减少task的引用计数
put_task_struct(p);
BUG_ON(!retval);
return retval;
}
对僵尸进程的操作是由wait_task_zombie()完成的,linux进程管理之wait系统调用》(https://www.unjs.com)。代如如下:
static int wait_task_zombie(struct task_struct *p, int noreap,
struct siginfo __user *infop,
int __user *stat_addr, struct rusage __user *ru)
{
unsigned long state;
int retval;
int status;
//WNOWAIT被设置.不需要释放子进程的资源,只要取相关信息即可
if (unlikely(noreap)) {
pid_t pid = p->pid;
uid_t uid = p->uid;
int exit_code = p->exit_code;
int why, status;
//子进程不为EXIT_ZOMBIE .异常退出
if (unlikely(p->exit_state != EXIT_ZOMBIE))
return 0;
//没有退出信号具没有被跟踪.退出
if (unlikely(p->exit_signal == -1 && p->ptrace == 0))
return 0;
//增加引用计数
get_task_struct(p);
read_unlock(&tasklist_lock);
if ((exit_code & 0x7f) == 0) {
why = CLD_EXITED;
status = exit_code >> 8;
} else {
why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED;
status = exit_code & 0x7f;
}
//取相关信息
return wait_noreap_copyout(p, pid, uid, why,
status, infop, ru);
}
/*
* Try to move the task's state to DEAD
* only one thread is allowed to do this:
*/
//将子进程状态设为EXIT_DEAD状态
state = xchg(&p->exit_state, EXIT_DEAD);
//如果子进程不为EXIT_ZOMBIE状态,异常退出
if (state != EXIT_ZOMBIE) {
BUG_ON(state != EXIT_DEAD);
return 0;
}
//没有退出信号,且没有被跟踪
if (unlikely(p->exit_signal == -1 && p->ptrace == 0)) {
return 0;
}
//子进程的real_parent等于当前父进程.说明子进程并没有被跟踪出去
if (likely(p->real_parent == p->parent) && likely(p->signal)) {
struct signal_struct *psig;
struct signal_struct *sig;
//更新父进程的一些统计信息
spin_lock_irq(&p->parent->sighand->siglock);
psig = p->parent->signal;
sig = p->signal;
psig->cutime =
cputime_add(psig->cutime,
cputime_add(p->utime,
cputime_add(sig->utime,
sig->cutime)));
psig->cstime =
cputime_add(psig->cstime,
cputime_add(p->stime,
cputime_add(sig->stime,
sig->cstime)));
psig->cmin_flt +=
p->min_flt + sig->min_flt + sig->cmin_flt;
psig->cmaj_flt +=
p->maj_flt + sig->maj_flt + sig->cmaj_flt;
psig->cnvcsw +=
p->nvcsw + sig->nvcsw + sig->cnvcsw;
psig->cnivcsw +=
p->nivcsw + sig->nivcsw + sig->cnivcsw;
psig->cinblock +=
task_io_get_inblock(p) +
sig->inblock + sig->cinblock;
psig->coublock +=
task_io_get_oublock(p) +
sig->oublock + sig->coublock;
spin_unlock_irq(&p->parent->sighand->siglock);
}
/*
* Now we are sure this task is interesting, and no other
* thread can reap it because we set its state to EXIT_DEAD.
*/
//取得相关的退出信息
read_unlock(&tasklist_lock);
retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
status = (p->signal->flags & SIGNAL_GROUP_EXIT)
? p->signal->group_exit_code : p->exit_code;
if (!retval && stat_addr)
retval = put_user(status, stat_addr);
if (!retval && infop)
retval = put_user(SIGCHLD, &infop->si_signo);
if (!retval && infop)
retval = put_user(0, &infop->si_errno);
if (!retval && infop) {
int why;
if ((status & 0x7f) == 0) {
why = CLD_EXITED;
status >>= 8;
} else {
why = (status & 0x80) ? CLD_DUMPED : CLD_KILLED;
status &= 0x7f;
}
retval = put_user((short)why, &infop->si_code);
if (!retval)
retval = put_user(status, &infop->si_status);
}
if (!retval && infop)
retval = put_user(p->pid, &infop->si_pid);
if (!retval && infop)
retval = put_user(p->uid, &infop->si_uid);
if (retval) {
// TODO: is this safe?
p->exit_state = EXIT_ZOMBIE;
return retval;
}
retval = p->pid;
//当前进程不是生父进程.则说明进程是被跟踪出去了
// TODO:子进程exit退出的时候,只会向其当前父进程发送信号的哦^_^
if (p->real_parent != p->parent) {
write_lock_irq(&tasklist_lock);
/* Double-check with lock held. */
if (p->real_parent != p->parent) {
//将进程从跟踪链表中脱落,并设置父进程为生父进程
__ptrace_unlink(p);
// TODO: is this safe?
//重新设置为EXIT_ZOMBI状态
p->exit_state = EXIT_ZOMBIE;
/*
* If this is not a detached task, notify the parent.
* If it's still not detached after that, don't release
* it now.
*/
//如果允许发送信息,则给生父进程发送相关信号
if (p->exit_signal != -1) {
do_notify_parent(p, p->exit_signal);
if (p->exit_signal != -1)
p = NULL;
}
}
write_unlock_irq(&tasklist_lock);
}
//释放子进程的剩余资源
if (p != NULL)
release_task(p);
BUG_ON(!retval);
return retval;
}
至此,我们看到了继子进程退出之后的完整处理。在此,值得注意的是。子进程在退出的时候会给父 进程发送相应的信号(例如SIG_CHILD),默认的信号处理函数也会进行相应的处理。
七:等待队列的操作
在这里,我们第一次接触到了等待队列,我们就以上面的代码做为例子来分析一下。
1:申请一个等待队列:
DECLARE_WAITQUEUE():
//name:等待队列的名字。Tsk:所要操作的task
#define DECLARE_WAITQUEUE(name, tsk) \
wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk)
#define __WAITQUEUE_INITIALIZER(name, tsk) { \
.private = tsk, \
.func = default_wake_function, \
.task_list = { NULL, NULL } }
default_wake_function()为默认的唤醒处理函数。
2:添加等待队列。
在上面的代码中,有:
add_wait_queue(¤t->signal->wait_chldexit,&wait);
它的意思是将wait添加至¤t->signal->wait_chldexit中。代码如下:
void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
{
unsigned long flags;
wait->flags &= ~WQ_FLAG_EXCLUSIVE;
//为了防止竞争。加锁
spin_lock_irqsave(&q->lock, flags);
//添加至队列
__add_wait_queue(q, wait);
//解锁
spin_unlock_irqrestore(&q->lock, flags);
}
3:唤醒操作:
在do_notify_parent()中有这样的代码片段:
……
__wake_up_parent(tsk, tsk->parent);
……
__wake_up_parent()的代码如下:
static inline void __wake_up_parent(struct task_struct *p,
struct task_struct *parent)
{
wake_up_interruptible_sync(&parent->signal->wait_chldexit);
}
parent->signal->wait_chldexit这个队列很熟吧?我们在父进程中添加的等待队列就是添加在 这里哦。^_^
唤醒队列的操作是由wake_up_interruptible_sync()完成的,代码如下:
wake_up_interruptible_sync() à __wake_up_sync()à__wake_up_common():
static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, int sync, void *key)
{
struct list_head *tmp, *next;
list_for_each_safe(tmp, next, &q->task_list) {
wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
unsigned flags = curr->flags;
if (curr->func(curr, mode, sync, key) &&
(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
break;
}
}
上述操作会遍历整个等待队列,然后运行对应的函数。我们在前面申请等待队列的时候,默认的函数 为:default_wake_function()。它会将操作的task放入运行队列,并将状态设为RUNING这个函数等之 后我们分析进程切换与调度的时候再来分析。
八:小结
通过分析进程的创建,执行与消息等过程,可以对子程管理子系统有一个大概的了解。该子系统与其 它子系统关系十分密切。对进程资源的管理和释放是理解这个子系统的难点。在下一个小节点,我们接着 分析进程的切换与调度。