内核死锁分为D状态和R状态,之前介绍过D状态了,下来看看kernel对R状态死锁的检测.
其入口函数在
void __init lockup_detector_init(void)
{
// 设置检测的频率,默认是每4s检测一次
set_sample_period();
//R状态会对没有cpu 创建一个thread来检测,当前cpu是否处于R状态死锁,因此这里的watchdog_cpumask 决定要在哪些cpu上创建thread
#ifdef CONFIG_NO_HZ_FULL
if (tick_nohz_full_enabled()) {
pr_info("Disabling watchdog on nohz_full cores by default\n");
cpumask_copy(&watchdog_cpumask, housekeeping_mask);
} else
cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
#else
cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
#endif
if (watchdog_enabled)
watchdog_enable_all_cpus();
}
为啥是4s检测一次呢?
int __read_mostly watchdog_thresh = 10;
static int get_softlockup_thresh(void)
{
return watchdog_thresh * 2;
}
static void set_sample_period(void)
{
/*
* convert watchdog_thresh from seconds to ns
* the divide by 5 is to give hrtimer several chances (two
* or three with the current relation between the soft
* and hard thresholds) to increment before the
* hardlockup detector generates a warning
*/
sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
watchdog_update_hrtimer_threshold(sample_period);
}
可以看出sample_period=10*2*1s/5 =4s
回到lockup_detector_init 函数中,最终通过watchdog_enable_all_cpus 来在每个cpu上创建thread来检测R状态死锁
static int watchdog_enable_all_cpus(void)
{
int err = 0;
//第一次进来watchdog_running 为0,因此调用smpboot_register_percpu_thread_cpumask 在watchdog_cpumask 表示的cpu上,默认watchdog_cpumask 就是所有在线的cpu上创建thread
if (!watchdog_running) {
err = smpboot_register_percpu_thread_cpumask(&watchdog_threads,
&watchdog_cpumask);
if (err)
pr_err("Failed to create watchdog threads, disabled\n");
else
watchdog_running = 1;
} else {
/*
* Enable/disable the lockup detectors or
* change the sample period 'on the fly'.
*/
//更新定时器的到期时间
err = update_watchdog_all_cpus();
if (err) {
watchdog_disable_all_cpus();
pr_err("Failed to update lockup detectors, disabled\n");
}
}
if (err)
watchdog_enabled = 0;
return err;
}
我们先看看watchdog_threads
static struct smp_hotplug_thread watchdog_threads = {
.store = &softlockup_watchdog,
.thread_should_run = watchdog_should_run,
.thread_fn = watchdog,
.thread_comm = "watchdog/%u",
.setup = watchdog_enable,
.cleanup = watchdog_cleanup,
.park = watchdog_disable,
.unpark = watchdog_enable,
};
smpboot_register_percpu_thread_cpumask 会为每个cpu创建thread,正常情况下会先调用watchdog_threads的setup来初始化
static void watchdog_enable(unsigned int cpu)
{
//每个cpu都有一个高精度定时器指针hrtimer
struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);
//初始化定时器,并设置定时器到期处理函数为watchdog_timer_fn
/* kick off the timer for the hardlockup detector */
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hrtimer->function = watchdog_timer_fn;
/* Enable the perf event */
watchdog_nmi_enable(cpu);
//开始定时器工作
/* done here because hrtimer_start can only pin to smp_processor_id() */
hrtimer_start(hrtimer, ns_to_ktime(sample_period),
HRTIMER_MODE_REL_PINNED);
/* initialize timestamp */
//将这个thread的优先级设置为最高,只有最高才能不被其他thread 抢占
watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
//执行首次喂狗动作
__touch_watchdog();
}
我们看看如何喂狗的?
/* Commands for resetting the watchdog */
static void __touch_watchdog(void)
{
__this_cpu_write(watchdog_touch_ts, get_timestamp());
}
原来也是写每个cpu 变量时间get_timestamp()
smpboot_register_percpu_thread_cpumask 创建的thread执行完成setup后,就会调用thread_should_run 来判断是否thread是否要继续运行
static int watchdog_should_run(unsigned int cpu)
{
return __this_cpu_read(hrtimer_interrupts) !=
__this_cpu_read(soft_lockup_hrtimer_cnt);
}
从这里可以看出只有hrtimer_interrupts 不等于 soft_lockup_hrtimer_cnt才是正常的情况,这说明hrtimer_interrupts 一直在更新啊,说明系统没有处于R状态死锁
那hrtimer_interrupts 是在高精确定时器的回调函数中更新的
watchdog_timer_fn->watchdog_interrupt_count 中更新的
static void watchdog_interrupt_count(void)
{
__this_cpu_inc(hrtimer_interrupts);
}
mpboot_register_percpu_thread_cpumask 调用thread_should_run 返回true后,就会调用thread_fn 函数来运行thread
static void watchdog(unsigned int cpu)
{
__this_cpu_write(soft_lockup_hrtimer_cnt,
__this_cpu_read(hrtimer_interrupts));
__touch_watchdog();
}
可以看到这个函数会调用__touch_watchdog 来喂狗,然后会将hrtimer_interrupts的值写到soft_lockup_hrtimer_cnt 中。这样如果系统处于R状态死锁了,
就会导致高精度定时器不会到期,那下一次调用thread_should_run的时候由于hrtimer_interrupts 没有更新就会导致thread_should_run 返回false,
就不会调用watchdog来喂狗,这样就等于检测到R状态死锁呀
检测到死锁后,会在下一次高精度定时器到期后在
static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
{
//死锁后这个duration 就为1
duration = is_softlockup(touch_ts);
if (unlikely(duration)) {
/*
* If a virtual machine is stopped by the host it can look to
* the watchdog like a soft lockup, check to see if the host
* stopped the vm before we issue the warning
*/
if (kvm_check_and_clear_guest_paused())
return HRTIMER_RESTART;
/* only warn once */
if (__this_cpu_read(soft_watchdog_warn) == true) {
/*
* When multiple processes are causing softlockups the
* softlockup detector only warns on the first one
* because the code relies on a full quiet cycle to
* re-arm. The second process prevents the quiet cycle
* and never gets reported. Use task pointers to detect
* this.
*/
if (__this_cpu_read(softlockup_task_ptr_saved) !=
current) {
__this_cpu_write(soft_watchdog_warn, false);
__touch_watchdog();
}
return HRTIMER_RESTART;
}
if (softlockup_all_cpu_backtrace) {
/* Prevent multiple soft-lockup reports if one cpu is already
* engaged in dumping cpu back traces
*/
if (test_and_set_bit(0, &soft_lockup_nmi_warn)) {
/* Someone else will report us. Let's give up */
__this_cpu_write(soft_watchdog_warn, true);
return HRTIMER_RESTART;
}
}
就看看到下面这段log
pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
smp_processor_id(), duration,
current->comm, task_pid_nr(current));
__this_cpu_write(softlockup_task_ptr_saved, current);
print_modules();
print_irqtrace_events(current);
if (regs)
show_regs(regs);
else
dump_stack();
if (softlockup_all_cpu_backtrace) {
/* Avoid generating two back traces for current
* given that one is already made above
*/
trigger_allbutself_cpu_backtrace();
clear_bit(0, &soft_lockup_nmi_warn);
/* Barrier to sync with other cpus */
smp_mb__after_atomic();
}
add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
if (softlockup_panic)
panic("softlockup: hung tasks");
__this_cpu_write(soft_watchdog_warn, true);
} else
__this_cpu_write(soft_watchdog_warn, false);
return HRTIMER_RESTART;
}
有人可能有疑问,为啥到R状态死锁了,为啥还可以指定定时器的到期函数,这是因为我们在watchdog_enable 中将优先级设置到最高了
kernel对R状态task在4s不被调度的检测
最新推荐文章于 2023-11-28 14:45:42 发布