分析Linux信号投递流程
背景
最近在学习CMU的计算机系统课程,CSAPP —— Computer Systems: A Programmer's Perspective
,这英文名估计很多人没听说,但是其书的中文名《深入理解计算机系统》,相信大家一定听过。
我学习的是Fall 2018版,课程schedule地址
在ECF: Signals & Nonlocal Jumps
这一章中提到: 如果进程有信号k处于pending
状态,那么,其它投递到该进程的信号k,将被丢弃。
由于对这块只有个模糊的印象,所以怀着探索精神,决定深入源码一探究竟。
信号投递
不管是glibc的kill函数,还是linux的kill命令,最终都是调用得kill
系统调用,且其函数名为sys_kill
,只不过新版本的内核,从安全性角度考虑,用宏对其做了复杂包装。
一般定义为SYSCALL_DEFINEx
,其中x为参数个数
就拿kill
系统调用来说,其glibc函数原型为:
int kill(pid_t pid, int sig);
可知其参数个数为2
,所以推断在内核中kill
系统调用,其声明为SYSCALL_DEFINE2(kill
得形式,很快就能定位到其所定义的文件kernel/signal.c
。
/**
* sys_kill - send a signal to a process
* @pid: the PID of the process
* @sig: signal to be sent
*/
SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
{
struct siginfo info;
info.si_signo = sig;
info.si_errno = 0;
info.si_code = SI_USER;
info.si_pid = task_tgid_vnr(current);
info.si_uid = current_uid();
return kill_something_info(sig, &info, pid);
}
其将发送信号的pid
、uid
做了记录,然后转发调用kill_something_info
/*
* kill_something_info() interprets pid in interesting ways just like kill(2).
*
* POSIX specifies that kill(-1,sig) is unspecified, but what we have
* is probably wrong. Should make it like BSD or SYSV.
*/
static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
{
int ret;
if (pid > 0) {
rcu_read_lock();
ret = kill_pid_info(sig, info, find_vpid(pid));
rcu_read_unlock();
return ret;
}
read_lock(&tasklist_lock);
if (pid != -1) {
ret = __kill_pgrp_info(sig, info,
pid ? find_vpid(-pid) : task_pgrp(current));
} else {
int retval = 0, count = 0;
struct task_struct * p;
for_each_process(p) {
if (task_pid_vnr(p) > 1 &&
!same_thread_group(p, current)) {
int err = group_send_sig_info(sig, info, p);
++count;
if (err != -EPERM)
retval = err;
}
}
ret = count ? retval : -ESRCH;
}
read_unlock(&tasklist_lock);
return ret;
}
我们这里假定给特定进程发信号,所以pid > 0
,然后调用kill_pid_info
int kill_pid_info(int sig, struct siginfo *info, struct pid *pid)
{
int error = -ESRCH;
struct task_struct *p;
rcu_read_lock();
retry:
p = pid_task(pid, PIDTYPE_PID);
if (p) {
error = group_send_sig_info(sig, info, p);
if (unlikely(error == -ESRCH))
/*
* The task was unhashed in between, try again.
* If it is dead, pid_task() will return NULL,
* if we race with de_thread() it will find the
* new leader.
*/
goto retry;
}
rcu_read_unlock();
return error;
}
其调用group_send_sig_info
/*
* send signal info to all the members of a group
*/
int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
{
int ret;
rcu_read_lock();
ret = check_kill_permission(sig, info, p);
rcu_read_unlock();
if (!ret && sig)
ret = do_send_sig_info(sig, info, p, true);
return ret;
}
其调用do_send_sig_info
,并将第4个参数设置为true
int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
bool group)
{
unsigned long flags;
int ret = -ESRCH;
if (lock_task_sighand(p, &flags)) {
ret = send_signal(sig, info, p, group);
unlock_task_sighand(p, &flags);
}
return ret;
}
记住,group
参数为true
static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
int group)
{
int from_ancestor_ns = 0;
#ifdef CONFIG_PID_NS
from_ancestor_ns = si_fromuser(info) &&
!task_pid_nr_ns(current, task_active_pid_ns(t));
#endif
return __send_signal(sig, info, t, group, from_ancestor_ns);
}
__send_signal
关键函数来了,就是这货
static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
int group, int from_ancestor_ns)
{
struct sigpending *pending;
struct sigqueue *q;
int override_rlimit;
trace_signal_generate(sig, info, t);
assert_spin_locked(&t->sighand->siglock);
if (!prepare_signal(sig, t, from_ancestor_ns))
return 0;
pending = group ? &t->signal->shared_pending : &t->pending;
/*
* Short-circuit ignored signals and support queuing
* exactly one non-rt signal, so that we can get more
* detailed information about the cause of the signal.
*/
if (legacy_queue(pending, sig))
return 0;
/*
* fast-pathed signals for kernel-internal things like SIGSTOP
* or SIGKILL.
*/
if (info == SEND_SIG_FORCED)
goto out_set;
/*
* Real-time signals must be queued if sent by sigqueue, or
* some other real-time mechanism. It is implementation
* defined whether kill() does so. We attempt to do so, on
* the principle of least surprise, but since kill is not
* allowed to fail with EAGAIN when low on memory we just
* make sure at least one signal gets delivered and don't
* pass on the info struct.
*/
if (sig < SIGRTMIN)
override_rlimit = (is_si_special(info) || info->si_code >= 0);
else
override_rlimit = 0;
q = __sigqueue_alloc(sig, t, GFP_ATOMIC | __GFP_NOTRACK_FALSE_POSITIVE,
override_rlimit);
if (q) {
list_add_tail(&q->list, &pending->list);
switch ((unsigned long) info) {
case (unsigned long) SEND_SIG_NOINFO:
q->info.si_signo = sig;
q->info.si_errno = 0;
q->info.si_code = SI_USER;
q->info.si_pid = task_tgid_nr_ns(current,
task_active_pid_ns(t));
q->info.si_uid = current_uid();
break;
case (unsigned long) SEND_SIG_PRIV:
q->info.si_signo = sig;
q->info.si_errno = 0;
q->info.si_code = SI_KERNEL;
q->info.si_pid = 0;
q->info.si_uid = 0;
break;
default:
copy_siginfo(&q->info, info);
if (from_ancestor_ns)
q->info.si_pid = 0;
break;
}
} else if (!is_si_special(info)) {
if (sig >= SIGRTMIN && info->si_code != SI_USER) {
/*
* Queue overflow, abort. We may abort if the
* signal was rt and sent by user using something
* other than kill().
*/
trace_signal_overflow_fail(sig, group, info);
return -EAGAIN;
} else {
/*
* This is a silent loss of information. We still
* send the signal, but the *info bits are lost.
*/
trace_signal_lose_info(sig, group, info);
}
}
out_set:
signalfd_notify(t, sig);
sigaddset(&pending->signal, sig);
complete_signal(sig, t, group);
return 0;
}
由于group
为true
,所以
pending = group ? &t->signal->shared_pending : &t->pending;
选择将信号投递到进程的shared_pending
中
接着往下看
if (legacy_queue(pending, sig))
return 0;
static inline int legacy_queue(struct sigpending *signals, int sig)
{
return (sig < SIGRTMIN) && sigismember(&signals->signal, sig);
}
可以看到,如果sig < SIGRTMIN
,也就是常说的传统信号、非实时信号、非可靠信号。 并且,该进程已经投递过该信号sigismember(&signals->signal, sig)
, legacy_queue
将返回true
,这时,__send_signal
将直接结束,不会再次投递该信号。
至此心中的谜团解开
而对于sig >= SIGRTMIN
,也就是常说的实时信号,会进行排队。