Linux: 定时器实现简析

JiMoKuangXiangQu

已于 2024-03-21 10:00:34 修改

阅读量842

点赞数

分类专栏： # 时间 & clk 文章标签： linux timer

于 2023-05-11 16:26:38 首次发布

本文链接：https://blog.csdn.net/JiMoKuangXiangQu/article/details/130610856

版权

时间 & clk 专栏收录该内容

3 篇文章 0 订阅

订阅专栏

1. 前言

限于作者能力水平，本文可能存在谬误，因此而给读者带来的损失，作者不做任何承诺。

2. Linux Timer 使用和实现

2.1 线程组内共享的唯一定时器

Linux 内核提供一个线程组内所有进程共享的 hrtimer 给用户空间使用，这个 timer 在定时到期时，会发出 SIGALRM 信号。

2.1.1 用户空间接口

2.1.1.1 alarm()

alarm() 函数原型：

#include <unistd.h>
unsigned int alarm(unsigned int seconds);

示例代码：

#include <unistd.h>
#include <signal.h>
#include <stdio.h>

void sighandler(int signo)
{
	printf("signo = %d\n", signo);
}

int main(void)
{
	signal(SIGALRM, sighandler);
	alarm(1);
	sleep(3);
	
	return 0;
}

2.1.1.2 setitimer()

函数原型和相关数据结构：

struct itimerval {
    /* 第2次及以后超时周期, 如果为 0 值则不会重启 timer */
    struct timeval it_interval; /* next value */
    /* 第1次的超时时间 */
    struct timeval it_value;    /* current value */
};

struct timeval {
    time_t      tv_sec;         /* seconds */
    suseconds_t tv_usec;        /* microseconds */
};

#include <sys/time.h>

int setitimer(int which, const struct itimerval *new_value,
              struct itimerval *old_value);

示例代码：

#include <sys/time.h>
#include <signal.h>
#include <unistd.h>
#include <stdio.h>

void sighandler(int signo)
{
	printf("signo = %d\n", signo);
}

int main(void)
{
	struct itimerval itv;

	signal(SIGALRM, sighandler);

	itv.it_value.tv_sec = 1; /* first expired time */
	itv.it_value.tv_usec = 0;

	itv.it_interval.tv_sec = 0;
	itv.it_interval.tv_usec = 100 * 1000; /* period in 100ms */

	setitimer(ITIMER_REAL, &itv, NULL);

	sleep(3);

	return 0;
}

2.1.2 `alarm()` 和 `setitimer()` 的实现

本小节对线程组内共享 timer 的实现进行分析，对实现细节不感兴趣的读者，可跳过此节。

2.1.2.1 timer 的定义

/* include/linux/sched.h */

struct task_struct {
	...
	/*
	 * 同一线程组内的所有进程，使用同一个 signal_struct 处理共享信号
	 * (信号的目标进程不是线程组内特定线程)。
	 * 这里的 @signal 指向处理共享信号的、线程组共享的 signal_struct 。
	 */
	struct signal_struct  *signal;
	...
};

/* include/linux/sched/signal.h */

struct signal_struct {
	...
#ifdef CONFIG_POSIX_TIMERS	
	/* ITIMER_REAL timer for the process */
	struct hrtimer real_timer;
	ktime_t it_real_incr;
#endif	
	...
	struct pid *leader_pid; /* 指代线程组 leader 进程 */
	...
};

struct signal_struct 包含的 struct hrtimer real_timer; ，就是线程组内共享的 timer ，也是 alarm() 和 setitimer() 使用的 timer 。

2.1.2.2 timer 的初始化

这个 timer 在进程 leader 进程（struct signal_struct 内 struct pid *leader_pid; 指代的进程）创建时初始化：

/* kernel/fork.c */

/* 进程创建时，不管时 fork(), vfork(), clone() 都要调用 copy_process() */
copy_process()
	p = dup_task_struct(current, node); /* 创建进程 task_struct */
	
	...
	
	retval = copy_signal(clone_flags, p);
		struct signal_struct *sig;
		
		sig = kmem_cache_zalloc(signal_cachep, GFP_KERNEL);
 		tsk->signal = sig;
 		
	#ifdef CONFIG_POSIX_TIMERS
		/* 初始化线程组内共享的 hrtimer */
		hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 		sig->real_timer.function = it_real_fn; /* 定时器超期回调设置: 该回调发送 SIGALRM */
	#endif
		...

	if (likely(p->pid)) {
		if (thread_group_leader(p)) {
			...
			p->signal->leader_pid = pid; /* 设置线程组 leader */
			...
		}
	}

再来看线程组内非 leader 进程的创建过程，非 leader 进程 是由 clone() 系统调用 发起(通常是用户空间的 pthread_create() 间接发起)，传递了 CLONE_THREAD 标志位。

/* kernel/fork.c */

copy_process()
	p = dup_task_struct(current, node); /* 创建进程 task_struct */

	...
	
	retval = copy_signal(clone_flags, p);
		if (clone_flags & CLONE_THREAD) /* 共享 leader 进程创建的 signal_struct */
  			return 0;
  		...

	...

	p->pid = pid_nr(pid);
 	if (clone_flags & CLONE_THREAD) { /* 线程组 【非 leader 进程】 创建路径 (即 clone() 场景) */
 		p->exit_signal = -1;
 		p->group_leader = current->group_leader; /* 设置新进程的 线程组 leader */
 		p->tgid = current->tgid;
 	} else {
 		...
 	}

	...

	if (likely(p->pid)) {
		if (thread_group_leader(p)) { /* 线程组 【leader 进程】 创建路径，前面已经描述 */
			...
		} else { /* 线程组 【非 leader 进程】 创建路径 (即 clone() 场景) */
			...
			/* 添加新的 【非 leader 进程】 到 【leader 进程】 的线程组 */
			list_add_tail_rcu(&p->thread_group,
       				&p->group_leader->thread_group);
       			...
		}
	}

2.1.2.3 timer 的配置

/* kernel/time/itimer.c */

/* alarm() */
sys_alarm()
	alarm_setitimer(seconds)
		it_new.it_value.tv_sec = seconds;
		it_new.it_value.tv_usec = 0;
		it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0; /* 这意味 alarm() 仅触发一次 timer */

		do_setitimer(ITIMER_REAL, &it_new, &it_old); /* 详见后面的分析 */

/* setitimer() */
sys_setitimer()
	do_setitimer(which, &set_buffer, ovalue ? &get_buffer : NULL) /* 详见后面的分析 */

/* 仅分析 ITIMER_REAL 情形，感兴趣的读者可自行分析其它情形 */
do_setitimer()
	switch (which) {
	case ITIMER_REAL: /* 用真实时间计时 timer, timer 时间到期后发送 SIGALRM 信号 */
	again:
		spin_lock_irq(&tsk->sighand->siglock);
		timer = &tsk->signal->real_timer;
		
		/* timer 可能当前可能正在运行，如果是在运行先停下它，重新配置之后再启动 */
		if (hrtimer_try_to_cancel(timer) < 0) {
			spin_unlock_irq(&tsk->sighand->siglock);
   			goto again;
		}
		expires = timeval_to_ktime(value->it_value);
		if (expires != 0) {
			tsk->signal->it_real_incr =
			    timeval_to_ktime(value->it_interval);
			   /* 在当前 CPU 上 启动 或 重启 hrtimer */
			   hrtimer_start(timer, expires, HRTIMER_MODE_REL);
		} else /* struct itimerval::it_value 为 0，不启动 timer */
			tsk->signal->it_real_incr = 0;
		spin_unlock_irq(&tsk->sighand->siglock);
  		break;
  	...
	}
	return 0;

2.1.2.4 timer 的触发

定时超期到达时，将进入 it_real_fn() 回调，发送 SIGALRM 信号：

/* kernel/time/itimer.c */

it_real_fn()
	struct signal_struct *sig =
  		container_of(timer, struct signal_struct, real_timer);

	kill_pid_info(SIGALRM, SEND_SIG_PRIV, sig->leader_pid);

	return HRTIMER_NORESTART;

线程组处理 SIGALRM 信号：

/* arch/arm/kernel/signal.c */
do_work_pending()
	do_signal(regs, syscall)
		if (get_signal(&ksig)) { /* 取信号 */
			...
			handle_signal(&ksig, regs); /* 处理信号 */
		}

/* kernel/signal.c */
get_signal()
	signr = dequeue_signal(current, &current->blocked, &ksig->info);
		signr = __dequeue_signal(&tsk->pending, mask, info, &resched_timer); /* 优先处理进程私有信号 */
		if (!signr) { /* 如果进程没有私有信号要处理 */
			signr = __dequeue_signal(&tsk->signal->shared_pending,
	     	 			mask, info, &resched_timer); /* 取线程组共享信号 */
	     	 #ifdef CONFIG_POSIX_TIMERS
	     	 	if (unlikely(signr == SIGALRM)) { /* itimer 超时到期后，发送的 SIGALRM 信号 */
	     	 		struct hrtimer *tmr = &tsk->signal->real_timer; /* 线程组共享的 timer 对象 */

				if (!hrtimer_is_queued(tmr) &&
				       tsk->signal->it_real_incr != 0) { /* 需要重启 timer */
					/* 设置   hrtimer @timer 的下一次超时时间点 */
					hrtimer_forward(tmr, tmr->base->get_time(),
							tsk->signal->it_real_incr);
					/* 重启 hrtimer */
					hrtimer_restart(tmr);
				}
			}
		#endif
     	 	}

/* arch/arm/kernel/signal.c */
handle_signal()
	...
	/* 调用用户空间注册的信号处理接口 sighandler() */

更多的信号处理细节，可参考博文 Linux信号处理简析。

2.1.3 `alarm()` 和 `setitimer()` 小结

2.1.3.1 共同的限制

alarm() 和 setitimer() 使用的都是内核提供给线程组内所有进程共享的 hrtimer ，也就是它们使用的是同一个 timer ，所以它们彼此之间是互斥的，不能同时使用；另外，这种 timer 只有一个，所以不适用于需要多个 timer 的场景。

2.1.3.2 彼此的差异

alarm() 的精度是秒级；
setitimer() 可以指定微秒级的精度，但这还要取决于系统支持的最小精度；
alarm() 仅触发一次，setitimer() 可配置成周期触发。

2.2 posix timer

当需要多个独立的 timer 时，或者想选择非 SIGALRM 方式处理 timer 超时事件时，posix timer 给你提供了另一个选择。posix timer 提供纳秒级的精度，能够达到的精度受硬件精度限制。

2.2.1 用户接口

使用 posix timer 至少需要用到 timer_create() 和 timer_settime() 接口，先看下它们的原型定义：

#include <signal.h>           /* Definition of SIGEV_* constants */
#include <time.h>

int timer_create(clockid_t clockid, struct sigevent *restrict sevp,
		 timer_t *restrict timerid); /* 创建 timer */

#include <time.h>

int timer_delete(timer_t timerid); /* 停止 和 删除 timer */

int timer_settime(timer_t timerid, int flags,
		  const struct itimerspec *restrict new_value,
		  struct itimerspec *restrict old_value); /* 启动 timer */
int timer_gettime(timer_t timerid, struct itimerspec *curr_value); /* 同步读取 timer */

int timer_getoverrun(timer_t timerid); /* 读取 timer 当前超时次数 */

posix timer 可以以多种方式来处理超时事件，先看下 SIGALRM 方式。

2.2.1.1 超时事件 SIGALRM 处理方式

示例代码（代码来源于 man 手册），仍然使用 SIGALRM 处理 timer 超时事件：

/* posixtimer_demo.c */

#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <signal.h>
#include <time.h>

#define CLOCKID CLOCK_REALTIME
#define SIG SIGRTMIN

#define errExit(msg)    do { perror(msg); exit(EXIT_FAILURE); \
                       } while (0)

static void
print_siginfo(siginfo_t *si)
{
	timer_t *tidp;
	int or;

	tidp = si->si_value.sival_ptr;

	printf("    sival_ptr = %p; ", si->si_value.sival_ptr);
	printf("    *sival_ptr = %#jx\n", (uintmax_t) *tidp);

	or = timer_getoverrun(*tidp);
	if (or == -1)
		errExit("timer_getoverrun");
	else
		printf("    overrun count = %d\n", or);
}

static void
handler(int sig, siginfo_t *si, void *uc)
{
	/* Note: calling printf() from a signal handler is not safe
	   (and should not be done in production programs), since
	   printf() is not async-signal-safe; see signal-safety(7).
	   Nevertheless, we use printf() here as a simple way of
	   showing that the handler was called. */
	printf("Caught signal %d\n", sig);
	print_siginfo(si);
	signal(sig, SIG_IGN);
}

int
main(int argc, char *argv[])
{
	timer_t timerid;
	struct sigevent sev;
	struct itimerspec its;
	long long freq_nanosecs;
	sigset_t mask;
	struct sigaction sa;

	if (argc != 3) {
		fprintf(stderr, "Usage: %s <sleep-secs> <freq-nanosecs>\n",
			argv[0]);
		exit(EXIT_FAILURE);
	}

	/* Establish handler for timer signal. */
	printf("Establishing handler for signal %d\n", SIG);
	sa.sa_flags = SA_SIGINFO;
	sa.sa_sigaction = handler;
	sigemptyset(&sa.sa_mask);
	if (sigaction(SIG, &sa, NULL) == -1)
		errExit("sigaction");
	
	/* Block timer signal temporarily. */
	printf("Blocking signal %d\n", SIG);
	sigemptyset(&mask);
	sigaddset(&mask, SIG);
	if (sigprocmask(SIG_SETMASK, &mask, NULL) == -1)
		errExit("sigprocmask");
	
	/* Create the timer. */
	sev.sigev_notify = SIGEV_SIGNAL;
	sev.sigev_signo = SIG;
	sev.sigev_value.sival_ptr = &timerid;
	if (timer_create(CLOCKID, &sev, &timerid) == -1)
		errExit("timer_create");
	
	printf("timer ID is %#jx\n", (uintmax_t) timerid);

	/* Start the timer. */
	freq_nanosecs = atoll(argv[2]);
	its.it_value.tv_sec = freq_nanosecs / 1000000000;
	its.it_value.tv_nsec = freq_nanosecs % 1000000000;
	its.it_interval.tv_sec = its.it_value.tv_sec;
	its.it_interval.tv_nsec = its.it_value.tv_nsec;

	if (timer_settime(timerid, 0, &its, NULL) == -1)
		errExit("timer_settime");
	
	/* Sleep for a while; meanwhile, the timer may expire
	   multiple times. */
	printf("Sleeping for %d seconds\n", atoi(argv[1]));
	sleep(atoi(argv[1]));

	/* Unlock the timer signal, so that timer notification
	   can be delivered. */
	printf("Unblocking signal %d\n", SIG);
	if (sigprocmask(SIG_UNBLOCK, &mask, NULL) == -1)
		errExit("sigprocmask");
	
	exit(EXIT_SUCCESS);
}

简要的分析下测试代码的逻辑：

1. 通过 sigaction() 函数注册 SIGALRM 信号处理接口
2. 通过 sigprocmask() 接口屏蔽 SIGALRM 信号
3. 通过 timer_create() 创建并初始化一个 posix timer
4. 通过 timer_settime() 启动 posix timer
5. 睡眠一段时间，让程序有机会收到 SIGALRM 信号
6. 通过 sigprocmask() 重新使能 SIGALRM 信号，让程序去处理信号

这中间，可能给人造成困惑的地方，是程序初始时屏蔽了 SIGALRM 信号，直到程序最后才使能？你可能会想到，使能 SIGALRM 信号之前，如果 timer 到期，那么发送的 SIGALRM 信号会不会丢掉？答案是不会，屏蔽信号，只是让程序不去处理信号，但信号仍会存储到进程的信号队列里。当然，对于传统信号，如果进程有挂起的同一信号未被处理，后续的信号会被丢弃。更多信号处理细节可参考博文 Linux信号处理简析。
有必要说明下上述测试代码的编译和运行步骤：

$ gcc -o posixtimer_demo posixtimer_demo.c -lrt
$ ./posixtimer_demo 1 100

2.2.1.2 超时事件处理方式小结

上面的代码演示了以 SIGALRM 信号方式处理 posix timer 超时事件的方式，那么由什么来决定 posix timer 超时事件的方式呢？答案是 timer_create() 调用期间，参数 struct sigevent 的 sigev_notify 成员的值：

typedef struct sigevent {
	sigval_t sigev_value;
	int sigev_signo; /* @sigev_notify == SIGEV_SIGNAL 或 SIGEV_THREAD_ID 时，发送的信号 */
	/*
	 * SIGEV_NONE     : 不对超时事件进行异步处理，可通过 timer_gettime() 读取事件。
	 * SIGEV_SIGNAL   : 超时到期发送 @sigev_signo 信号
	 *                  用户需配置信号处理接口, 内核空间会将信号的 siginfo_t::si_code 
	 *                  设为 SI_TIMER 。
	 * SIGEV_THREAD   : 创建新线程, 线程入口函数为 _sigev_thread::_function 。
	 * SIGEV_THREAD_ID: 类似于 SIGEV_SIGNAL , 但信号会发送给 _sigev_un::_tid 指定的线程，
	 *                  该线程必须和当前进程在同一线程组内。
	 */
	int sigev_notify;
	union {
		int _pad[SIGEV_PAD_SIZE];
		int _tid; /* @sigev_notify == SIGEV_THREAD_ID 时发送信号的目标线程 ID */

		/* @sigev_notify == SIGEV_THREAD 时创建线程时的参数 */
		struct {
			void (*_function)(sigval_t);
			void *_attribute; /* really pthread_attr_t */
		} _sigev_thread;
	} _sigev_un;
} sigevent_t;

SIGEV_NONE: 
非异步处理方式，通过 timer_gettime() 读取超时事件。

SIGEV_SIGNAL，SIGEV_THREAD_ID：
都是经由 SIGALRM　信号处理超时事件。
不过　SIGEV_THREAD_ID　将　SIGALRM 信号发送到线程组内特定线程；
而　SIGEV_SIGNAL　是将　SIGALRM 信号发送到线程组。

SIGEV_THREAD：
创建独立的线程来处理超时事件。

2.2.2 实现

本小节是分析 posix timer 实现，对实现细节不感兴趣的读者可跳过此节。下面就 SIGEV_SIGNAL 超时事件方式来进行分析，对其它方式感兴趣的读者，可自行阅读源码进行分析。
首先看一下 posix timer 线程组相关数据结构：

/* include/linux/sched.h */

struct task_struct {
	...
	/*
	 * 同一线程组内的所有进程，使用同一个 signal_struct 处理共享信号
	 * (信号的目标进程不是线程组内特定线程)。
	 * 这里的 @signal 指向处理共享信号的、线程组共享的 signal_struct 。
	 */
	struct signal_struct  *signal; 
	...
};

/* include/linux/sched/signal.h */

struct signal_struct {
	...
#ifdef CONFIG_POSIX_TIMERS
	/* POSIX.1b Interval Timers */
	/*
	 * posix_timers 列表里的 posix timer id, 它是线程组内共享的.
	 * 线程组 leader 进程创建时，初始值为 0 .
	 * @posix_timer_id 指向下一个可用的 posix timer id
	 */
	int   posix_timer_id;
	struct list_head posix_timers; /* posix timer 列表 (timer_create()) */
	...
#endif
	...
};

2.2.2.1 创建 timer

2.2.2.1.1 用户空间部分

用户空间部分，主要分为 (1) 使用 SIGALRM 信号接口处理 和 (2) 使用线程处理 定时器到期事件两大类情形，看代码细节：

/* glibc 代码 */

int
___timer_create (clockid_t clock_id, struct sigevent *evp, timer_t *timerid)
{
	...
	if (evp == NULL
		|| __builtin_expect (evp->sigev_notify != SIGEV_THREAD, 1)) /* 使用 SIGALARM 信号接口处理 */
	{
		struct sigevent local_evp;

		if (evp == NULL)
		{
			local_evp.sigev_notify = SIGEV_SIGNAL;
			local_evp.sigev_signo = SIGALRM;
			local_evp.sigev_value.sival_ptr = NULL;

			evp = &local_evp;
		}

		kernel_timer_t ktimerid;
		if (INLINE_SYSCALL_CALL (timer_create, syscall_clockid, evp,
							&ktimerid) == -1)
			return -1;
		
		*timerid = kernel_timer_to_timerid (ktimerid);
	}
	else /* 使用 线程处理 */
	{
		/* Create the helper thread.  */
		/* 为新 timer 创建一个线程 */
		__pthread_once (&__timer_helper_once, __timer_start_helper_thread);
		...
		
		...
		/* Copy the thread parameters the user provided.  */
		newp->sival = evp->sigev_value;
		newp->thrfunc = evp->sigev_notify_function; /* 线程回调 */

		...
		
		struct timer *newp = malloc (sizeof (struct timer));
		...

		/* Create the event structure for the kernel timer.  */
		struct sigevent sev =
		{
			.sigev_value.sival_ptr = newp,
			.sigev_signo = SIGTIMER,
			.sigev_notify = SIGEV_SIGNAL | SIGEV_THREAD_ID,
			._sigev_un = { ._pad = { [0] = __timer_helper_tid } }
		};

		/* Create the timer.  */
		int res;
		res = INTERNAL_SYSCALL_CALL (timer_create, syscall_clockid, &sev,
							&newp->ktimerid);
		...

		/* Add to the queue of active timers with thread delivery.  */
		/*
		 * 把所有的 timer 放到 __timer_active_sigev_thread 列表中，
		 * 所有 timer 线程有一个统一的入口函数 timer_helper_thread(),
		 * timer_helper_thread() 负责处理所有 timer 的超时时间。
		 * 区分是哪个 timer 超时，通过对比 timer 对象的地址: 
		 * timer 对象的地址通过 sev.sigev_value.sival_ptr 传递给内核，
		 * 在触发定时器超时事件时通过 siginfo_t::si_ptr 回传到用户空间。
		 */
		__pthread_mutex_lock (&__timer_active_sigev_thread_lock);
		newp->next = __timer_active_sigev_thread;
		__timer_active_sigev_thread = newp;
		__pthread_mutex_unlock (&__timer_active_sigev_thread_lock);

		*timerid = timer_to_timerid (newp);
	}

	return 0;
}

2.2.2.1.2 内核空间部分

/* kernel/time/posix-timers.c */

sys_timer_create() /* timer_create() */
	sigevent_t event;
	
	if (copy_from_user(&event, timer_event_spec, sizeof (event)))
		return -EFAULT;
	return do_timer_create(which_clock, &event, created_timer_id);

do_timer_create()
	const struct k_clock *kc = clockid_to_kclock(which_clock); /* 获取基准时钟 */
	struct k_itimer *new_timer;

	new_timer = alloc_posix_timer(); /* 创建 posix timer 对象 */
		struct k_itimer *tmr;
		tmr = kmem_cache_zalloc(posix_timers_cache, GFP_KERNEL);
		tmr->sigq = sigqueue_alloc(); /* 创建 timer 信号队列 */
		memset(&tmr->sigq->info, 0, sizeof(siginfo_t));
		return tmr;

	/*
	 * 为新的 posix timer @timer 分配一个 id , 通过哈希公式 hash(sig, sig->posix_timer_id),
	 * 试图将其插入到全局哈希链表 posix_timers_hashtablep[] 中, 如果插入成功, 则返回
	 * 为 posix timer @timer 分配的 id; 否则重新为 @timer 分配一个 id , 再次尝试插入，
	 * 直到插入成功为止。如果尝试了当前所有可能的 id , 仍没能插入成功，则返回 EAGAIN
	 * 错误指示用户重新尝试。
	 */
	new_timer_id = posix_timer_add(new_timer);

	/* 配置 timer */
	new_timer->it_id = (timer_t) new_timer_id;
	new_timer->it_clock = which_clock; /* CLOCK_REALTIME, ... */
	new_timer->kclock = kc; /* 设置 timer 时钟基准: posix_clocks[@it_clock]: clock_realtime, ... */
	new_timer->it_overrun = -1LL;

	if (event) {
		rcu_read_lock();
		new_timer->it_pid = get_pid(good_sigevent(event));
		rcu_read_unlock();
		...
		new_timer->it_sigev_notify     = event->sigev_notify;
  		new_timer->sigq->info.si_signo = event->sigev_signo;
  		new_timer->sigq->info.si_value = event->sigev_value;
	} else {
		...
	}

	new_timer->sigq->info.si_tid   = new_timer->it_id;
	new_timer->sigq->info.si_code  = SI_TIMER; /* 设定 si_code 为 SI_TIMER , 表示信号队列用于 timer 超时 */

	/* 从 @created_timer_id 返回 posix timer id @new_timer_id */
	if (copy_to_user(created_timer_id,
			&new_timer_id, sizeof (new_timer_id))) {
		...
	}

	/* 时钟基准的 timer 初始化 */
	error = kc->timer_create(new_timer) = common_timer_create()
		hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0)
	
	spin_lock_irq(&current->sighand->siglock);
	new_timer->it_signal = current->signal;
	/*
	 * 添加 posix timer 到 当前进程 所在 线程组 的 posix timer 列表:
	 * task_struct::signal::posix_timers
	 */
	list_add(&new_timer->list, &current->signal->posix_timers);
	spin_unlock_irq(&current->sighand->siglock);

	return 0;

2.2.2.2 启动 timer

/* kernel/time/posix-timers.c */

sys_timer_settime() /* timer_timeset() */
	if (get_itimerspec64(&new_spec, new_setting))
		return -EFAULT;
	error = do_timer_settime(timer_id, flags, &new_spec, rtn);
	...
	return error;

do_timer_settime()
retry:
	timr = lock_timer(timer_id, &flag);
	kc = timr->kclock; /* posix_clocks[x]: clock_realtime, ... */
	error = kc->timer_set(timr, flags, new_spec64, old_spec64);
		common_timer_set()
			/* Prevent rearming by clearing the interval */
			timr->it_interval = 0;
			if (kc->timer_try_to_cancel(timr) < 0) /* 停止可能正在运行的 timer */
				return TIMER_RETRY;
			timr->it_active = 0;
			timr->it_requeue_pending = (timr->it_requeue_pending + 2) &
				~REQUEUE_PENDING;
			timr->it_overrun_last = 0;
			...
			timr->it_interval = timespec64_to_ktime(new_setting->it_interval); /* 设置 timer 触发周期 */
			expires = timespec64_to_ktime(new_setting->it_value); /* 计算第1次的超期时间 */
			sigev_none = timr->it_sigev_notify == SIGEV_NONE;

			/* 启动 timer */
			kc->timer_arm(timr, expires, flags & TIMER_ABSTIME, sigev_none);
				common_hrtimer_arm()
					mode = absolute ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL;
					hrtimer_init(&timr->it.real.timer, timr->it_clock, mode);
					timr->it.real.timer.function = posix_timer_fn; /* posix hrtimer 超时回调 */
					hrtimer_set_expires(timer, expires);
					if (!sigev_none)
						hrtimer_start_expires(timer, HRTIMER_MODE_ABS); /* 启动 hrtimer */
			timr->it_active = !sigev_none;
			return 0;
	unlock_timer(timr, flag);
	
	return error;

2.2.2.3 超时事件触发

/* kernel/time/posix-timers.c */

posix_timer_fn() /* posix timer 超期回调 */
	struct k_itimer *timr;

	timr = container_of(timer, struct k_itimer, it.real.timer);
	spin_lock_irqsave(&timr->it_lock, flags);

	timr->it_active = 0;
	if (timr->it_interval != 0) /* timr->it_interval != 0 指示要重启 timer */
		si_private = ++timr->it_requeue_pending;
		
	if (posix_timer_event(timr, si_private)) { /* 发送 timer 超时事件信号 */
		...
	}

	unlock_timer(timr, flags);
	return ret;

/* 发送 timer 超时事件信号 */
posix_timer_event()
	timr->sigq->info.si_sys_private = si_private;

	rcu_read_lock();
	task = pid_task(timr->it_pid, PIDTYPE_PID); /* 信号目标进程 */
	if (task) {
		/* SIGEV_THREAD_ID 指示将信号发送给特定进程，否则发送给线程组 */
		shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
		/* 将 posix timer 超时信号 放置到目标进程 @task 的挂起信号队列 */
		ret = send_sigqueue(timr->sigq, task, shared);
	}
	rcu_read_unlock();
	return ret > 0;

2.2.2.4 超时事件处理

/* arch/arm/kernel/signal.c */
do_work_pending()
	do_signal(regs, syscall)
		if (get_signal(&ksig)) { /* 取信号 */
			...
			handle_signal(&ksig, regs); /* 处理信号 */
		}

/* kernel/signal.c */
get_signal() /* 取 timer 超时信号，根据 posix timer 配置情况，【可能】重启 posix timer */
	signr = dequeue_signal(current, &current->blocked, &ksig->info);
		bool resched_timer = false; /* 决定是否重启 posix timer */

		signr = __dequeue_signal(&tsk->pending, mask, info, &resched_timer); /* 优先处理进程私有信号 */
		if (!signr) { /* 如果进程没有私有信号要处理 */
			signr = __dequeue_signal(&tsk->signal->shared_pending,
					mask, info, &resched_timer); /* 取线程组共享信号 */
			...
		}
		
		...
		
#ifdef CONFIG_POSIX_TIMERS
	if (resched_timer) { /* 重启 posix timer */
		spin_unlock(&tsk->sighand->siglock);
		posixtimer_rearm(info);
		spin_lock(&tsk->sighand->siglock);
	}
#endif

/* 从信号队列取出信号时，决定是否重启 posix timer 的逻辑 */
__dequeue_signal()
	int sig = next_signal(pending, mask);
	
	if (sig)
		collect_signal(sig, pending, info, resched_timer);
	return sig;

collect_signal()
	struct sigqueue *q, *first = NULL;

	list_for_each_entry(q, &list->list, list) {
		if (q->info.si_signo == sig) {
			if (first)
				goto still_pending;
			first = q;
		}
	}
	
	...

	if (first) {
still_pending:
		list_del_init(&first->list);
		copy_siginfo(info, &first->info);

		/* 
		 * 看到了吗？这个就是 posix timer 的信号： 
		 * . 信号队列 SIGQUEUE_PREALLOC 预分配标记，创建 posix timer 时预分配的信号队列：
		 *  alloc_posix_timer() -> sigqueue_alloc() -> q->flags |= SIGQUEUE_PREALLOC;
		 * . info->si_code == SI_TIMER: 也是在创建  posix timer 时设置
		 * . info->si_sys_private： 触发 timer 超时时经由 posix_timer_event() 设置
		 * 满足这些条件，就会重启 posix timer ，但是为什么？其实很简单，从 timer 超时处
		 * 理代码路径：
		 * posix_timer_fn()
		 * 	if (timr->it_interval != 0) // timr->it_interval != 0 指示要重启 timer
		 * 		si_private = ++timr->it_requeue_pending;
		 * 	posix_timer_event(timr, si_private)
		 * 		timr->sigq->info.si_sys_private = si_private;
		 *  info->si_sys_private
		 * 可以看到，info->si_sys_private 的值，是 timer_settime() 调用参数值
		 * struct itimerspec::it_interval 不为 0 表示要周期重启 posix timer 。
		 * struct itimerspec::it_interval 的反馈：struct itimerspec::it_interval != 0 ,
		 * 则 info->si_sys_private != 0.
		 */
		*resched_timer =
			(first->flags & SIGQUEUE_PREALLOC) &&
			(info->si_code == SI_TIMER) &&
			(info->si_sys_private);
		
		__sigqueue_free(first);
	} else {
		...
	}

handle_signal(&ksig, regs) /* 处理 SIGALAM　信号 */
	...
	/* 调用 用户空间注册的信号处理接口 */

2.2.3 观察

可通过 cat /proc/<pid>/timers 查看进程的 posix timer 。

$ cat /proc/3700/timers
ID: 0
signal: 34/00007ffd09d64530
notify: signal/pid.3700
ClockID: 0

2.3 timerfd

Linux 还提供了一套 timerfd_create(), timerfd_settime(), timerfd_gettime() 定时器接口，是现在 fs/timerfd.c 中，感兴趣的读者可自行研究相关文档和代码。

3. 参考资料

[1] https://linux.die.net/man/2/alarm
[2] https://linux.die.net/man/2/setitimer
[3] https://man7.org/linux/man-pages/man2/timer_create.2.html
[4] https://man7.org/linux/man-pages/man2/timer_delete.2.html
[5] https://man7.org/linux/man-pages/man2/timer_settime.2.html
[6] https://man7.org/linux/man-pages/man2/timer_getoverrun.2.html
[7] https://man7.org/linux/man-pages/man2/timerfd_create.2.html