以下分析基于5.15内核版本
1、watchdog初始化调用关系
start_kernel
arch_call_rest_init
rest_init
kernel_init
kernel_init_freeable
lockup_detector_init (watchdog内核初始化接口)
2、lockup_detector_init内核实现
void __init lockup_detector_init(void)
{
if (tick_nohz_full_enabled()) //检测CONFIG_NO_HZ_FULL宏是否打开
pr_info("Disabling watchdog on nohz_full cores by default\n");
cpumask_copy(&watchdog_cpumask, //检测那些cpu需要定时器中断,获取对应的cpu掩码,赋值给watchdog_cpumask
housekeeping_cpumask(HK_FLAG_TIMER));
if (!watchdog_nmi_probe()) //检测NMI中断是否使能,hardlockup检测机制
nmi_watchdog_available = true;
lockup_detector_setup();
}
相关注解:
(1)CONFIG_NO_HZ_FULL选项允许内核在所有CPU上实现更激进的NO-HZ行为,这意味着只有活跃的CPU即正在运行任务的CPU才会定期接收定时器中断。空闲的CPU将不接收这些中断,从而节省电力并减少不必要的计算负载。
(2)HK_FLAG_TIMER是在Linux内核的housekeeping机制中使用的一个标志,它主要用于控制和标识哪些 CPU 需要周期性的定时器中断。
3、watchdog_nmi_probe
检测nmi是否使能,harlockup检测机制
/* Return 0, if a NMI watchdog is available. Error code otherwise */
int __weak __init watchdog_nmi_probe(void)
{
return hardlockup_detector_perf_init(); //初始化性能计数器以检测hardlockup
}
int __init hardlockup_detector_perf_init(void)
{
int ret = hardlockup_detector_event_create(); //尝试创建hardlockup检测所需的性能监控事件
if (ret) {
pr_info("Perf NMI watchdog permanently disabled\n");
} else {
perf_event_release_kernel(this_cpu_read(watchdog_ev));
this_cpu_write(watchdog_ev, NULL);
}
return ret;
}
static int hardlockup_detector_event_create(void)
{
unsigned int cpu = smp_processor_id(); //获取当前执行此函数的CPU编号
struct perf_event_attr *wd_attr;
struct perf_event *evt;
wd_attr = &wd_hw_attr; //用于配置性能监控事件
wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); //确定采样周期,用于决定何时触发看门狗中断
/* Try to register using hardware perf events */
evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL,
watchdog_overflow_callback, NULL); //尝试在当前CPU上创建一个内核性能计数器;并添加性能计数器溢出时被调用的回调函数watchdog_overflow_callback
if (IS_ERR(evt)) {
pr_debug("Perf event create on CPU %d failed with %ld\n", cpu,
PTR_ERR(evt));
return PTR_ERR(evt);
}
this_cpu_write(watchdog_ev, evt); //将创建的事件`evt`写入到当前CPU的`watchdog_ev`变量
return 0;
}
static void watchdog_overflow_callback(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs)
{
*** 跳过部分接口
/* check for a hardlockup
* This is done by making sure our timer interrupt
* is incrementing. The timer interrupt should have
* fired multiple times before we overflow'd. If it hasn't
* then this is a good indication the cpu is stuck
*/
if (is_hardlockup()) { //检测是否发生hadrlockup
int this_cpu = smp_processor_id();
/* only print hardlockups once */
if (__this_cpu_read(hard_watchdog_warn) == true)
return;
pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n",
this_cpu); //hardlockup异常时关键日志打印
*** 跳过部分接口
if (hardlockup_panic) //hardlockup触发是是否要panic
nmi_panic(regs, "Hard LOCKUP");
__this_cpu_write(hard_watchdog_warn, true);
return;
}
__this_cpu_write(hard_watchdog_warn, false);
return;
}
bool is_hardlockup(void)
{
unsigned long hrint = __this_cpu_read(hrtimer_interrupts); //获取当前cpu hrtimer定时器中断次数
if (__this_cpu_read(hrtimer_interrupts_saved) == hrint) //如果中断次数在两次检测中间未发生变化,则说明在一个采样周期来临时hrtimer定时器中断没有触发,则发生hardlockup
return true;
__this_cpu_write(hrtimer_interrupts_saved, hrint);
return false;
}
4、lockup_detector_setup
softlockup检测机制
lockup_detector_setup
lockup_detector_reconfigure
static void lockup_detector_reconfigure(void)
{
cpus_read_lock();
watchdog_nmi_stop(); //停止watchdog NMI,即非屏蔽中断。这可以防止在重新配置过程中发生NMI,避免可能的中断干扰。
softlockup_stop_all(); //停止所有软锁检测器。软锁检测器是用于检测系统是否进入死循环或长时间阻塞的一种机制
set_sample_period(); //设置采样周期,决定软锁检测器检查系统状态的时间间隔。
lockup_detector_update_enable(); //更新锁检测器的启用状态。这通常涉及到硬件寄存器的配置,使能或禁能锁检测功能。
if (watchdog_enabled && watchdog_thresh) //检查watchdog是否已经启用并且有有效的阈值设定。
softlockup_start_all(); //启动所有软锁检测器,开始监控系统状态,检测潜在的锁现象。
watchdog_nmi_start(); //重新启用watchdog NMI,恢复对非屏蔽中断的支持。
cpus_read_unlock();
/*
* Must be called outside the cpus locked section to prevent
* recursive locking in the perf code.
*/
__lockup_detector_cleanup();
}
static void softlockup_start_all(void)
{
int cpu;
cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask); //使用`cpumask_copy`函数将`watchdog_cpumask`中的CPU掩码复制到`watchdog_allowed_mask`中。这一步确保了只有那些被标记为可以运行softlockup检测的CPU才会被后续操作影响。
for_each_cpu(cpu, &watchdog_allowed_mask)
smp_call_on_cpu(cpu, softlockup_start_fn, NULL, false); //在指定的CPU上异步执行`softlockup_start_fn`函数,`NULL`参数通常表示没有额外的数据需要传递给`softlockup_start_fn`,而`false`参数表示不等待函数执行完成,即以非阻塞的方式调用。
}
static int softlockup_start_fn(void *data)
{
watchdog_enable(smp_processor_id()); //初始化并启动一个硬件或软件Watchdog定时器,用于监控系统的运行状态。
return 0;
}
static void watchdog_enable(unsigned int cpu)
{
struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
struct completion *done = this_cpu_ptr(&softlockup_completion);
WARN_ON_ONCE(cpu != smp_processor_id()); //使用 `WARN_ON_ONCE` 确保函数仅在其预期的处理器上运行。
init_completion(done);
complete(done); //这里初始化并立即完成 `done`,通常用于同步目的。
/*
* Start the timer first to prevent the NMI watchdog triggering
* before the timer has a chance to fire.
*/
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); //初始化定时器使用单调时钟,模式为相对硬中断。
hrtimer->function = watchdog_timer_fn; //设置定时器回调函数
hrtimer_start(hrtimer, ns_to_ktime(sample_period),
HRTIMER_MODE_REL_PINNED_HARD); //启动定时器,设定周期为 `sample_period`,并使用硬中断相对模式。
/* Initialize timestamp */
__touch_watchdog(); //更新 Watchdog 的最后活动时间戳。
/* Enable the perf event */
if (watchdog_enabled & NMI_WATCHDOG_ENABLED)
watchdog_nmi_enable(cpu); //如果满足条件,调用 `watchdog_nmi_enable(cpu)` 来激活 NMI Watchdog。
}
static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
{
unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
struct pt_regs *regs = get_irq_regs();
int duration;
int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
if (!watchdog_enabled) //判断 watchdog 是否启用,若未启用则直接返回。
return HRTIMER_NORESTART;
/* kick the hardlockup detector */
watchdog_interrupt_count(); //更新hardlockup检测计数,确保硬件层面没有异常。
/* kick the softlockup detector */
if (completion_done(this_cpu_ptr(&softlockup_completion))) {
reinit_completion(this_cpu_ptr(&softlockup_completion));
stop_one_cpu_nowait(smp_processor_id(),
softlockup_fn, NULL,
this_cpu_ptr(&softlockup_stop_work));
} //如果`softlockup_completion`已完成,即软锁死检测已经触发过,那么重新初始化它,并尝试停止当前CPU上的软锁死检测工作。重新设置定时器,使其在下一个周期再次触发。
*** 跳过部分接口
/* check for a softlockup
* This is done by making sure a high priority task is
* being scheduled. The task touches the watchdog to
* indicate it is getting cpu time. If it hasn't then
* this is a good indication some task is hogging the cpu
*/
duration = is_softlockup(touch_ts); //检测是否发生softlockup,计算自上次高优先级任务调度以来的时间`duration`,如果超过预设阈值,则认为发生了软锁死。
if (unlikely(duration)) {
*** 跳过部分接口
pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
smp_processor_id(), duration,
current->comm, task_pid_nr(current));
__this_cpu_write(softlockup_task_ptr_saved, current); //softlockup异常发生时关键日志打印
*** 跳过部分接口
add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
if (softlockup_panic)
panic("softlockup: hung tasks");
__this_cpu_write(soft_watchdog_warn, true);
} else
__this_cpu_write(soft_watchdog_warn, false);
return HRTIMER_RESTART;
}
static int is_softlockup(unsigned long touch_ts)
{
unsigned long now = get_timestamp(); //获取当前的时间戳。
if ((watchdog_enabled & SOFT_WATCHDOG_ENABLED) && watchdog_thresh){ //检查看门狗是否启用以及是否设置了阈值
/* Warn about unreasonable delays. */
if (time_after(now, touch_ts + get_softlockup_thresh())) //将当前时间与一个计算出的阈值进行比较(touch_ts + get_softlockup_thresh())。如果当前时间超过这个阈值,则表明存在不合理的延迟。
return now - touch_ts; //如果检测到延迟,返回 now 和 touch_ts 之间的时间差。
}
return 0;
}
5、采样周期
softlockup:
int __read_mostly watchdog_thresh = 10;
static int get_softlockup_thresh(void)
{
return watchdog_thresh * 2;
}
可配置:/proc/sys/kernel/watchdog_thresh (默认初始化为10*2=20s)
hardlockup:
#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
u64 hw_nmi_get_sample_period(int watchdog_thresh)
{
return (u64)(cpu_khz) * 1000 * watchdog_thresh;
}
#endif
6、总结
softlockup:软狗检测机制:在一个采样周期内定时触发hrtimer定时器任务,更新系统时间,主要检测单核上是否发生死锁,导致task不能正常调度;
抢占被长时间关闭而导致其余进程无法调度
hardlockup:硬狗检测机制:在一个采样周期内触发nmi中断检测hrtimer定时器中断上报次数,主要检测单核上中断是否不能正常响应;
中断被长时间关闭而导致