softlock机制

最新推荐文章于 2024-03-26 08:00:00 发布

shuai_wen

最新推荐文章于 2024-03-26 08:00:00 发布

阅读量1.8k

点赞数

分类专栏： crash

本文链接：https://blog.csdn.net/u011279649/article/details/11523765

版权

crash 专栏收录该内容

51 篇文章 10 订阅

订阅专栏

kernel/watchdog.c

有关lockup_detector初始化

创建kick_timer and watchdog thread
void __init lockup_detector_init(void)
{
   void *cpu = (void *)(long)smp_processor_id();
   int err;

   err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
   WARN_ON(notifier_to_errno(err));

   cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
   register_cpu_notifier(&cpu_nfb);

   return;
}

/*
* Create/destroy watchdog threads as CPUs come and go:
*/
static int __cpuinit
cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
   int hotcpu = (unsigned long)hcpu;

   switch (action) {
   case CPU_UP_PREPARE:
   case CPU_UP_PREPARE_FROZEN:
       watchdog_prepare_cpu(hotcpu);
       break;
   case CPU_ONLINE:
   case CPU_ONLINE_FROZEN:
       if (watchdog_enabled)
           watchdog_enable(hotcpu);
       break;
#ifdef CONFIG_HOTPLUG_CPU
   case CPU_UP_CANCELED:
   case CPU_UP_CANCELED_FROZEN:
       watchdog_disable(hotcpu);
       break;
   case CPU_DEAD:
   case CPU_DEAD_FROZEN:
       watchdog_disable(hotcpu);
       break;
#endif /* CONFIG_HOTPLUG_CPU */
   }

   /*
   * hardlockup and softlockup are not important enough
   * to block cpu bring up. Just always succeed and
   * rely on printk output to flag problems.
   */
   return NOTIFY_OK;
}

cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu) -> watchdog_prepare_cpu(hotcpu);
/* prepare/enable/disable routines */
static void watchdog_prepare_cpu(int cpu)
{
   struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);

   WARN_ON(per_cpu(softlockup_watchdog, cpu));
   hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
   hrtimer->function = watchdog_timer_fn;
}

cpu_callback(&cpu_nfb, CPU_ONLINE, cpu) -> watchdog_enable(hotcpu);
static int watchdog_enable(int cpu)
{
   struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
   int err = 0;

   /* enable the perf event */
   err = watchdog_nmi_enable(cpu);

   /* Regardless of err above, fall through and start softlockup */

   /* create the watchdog thread */
   if (!p) {
       struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
       p = kthread_create_on_node(watchdog, NULL, cpu_to_node(cpu), "watchdog/%d", cpu);
       if (IS_ERR(p)) {
           pr_err("softlockup watchdog for %i failed\n", cpu);
           if (!err) {
               /* if hardlockup hasn't already set this */
               err = PTR_ERR(p);
               /* and disable the perf event */
               watchdog_nmi_disable(cpu);
           }
           goto out;
       }
       sched_setscheduler(p, SCHED_FIFO, &param);
       kthread_bind(p, cpu);
       per_cpu(watchdog_touch_ts, cpu) = 0;
       per_cpu(softlockup_watchdog, cpu) = p;
       wake_up_process(p);
   }

out:
   return err;
}

watchdog thread and timer_fun

/*
* The watchdog thread - touches the timestamp.
*watchdog thread执行touches the timestamp，也就是更新一下watchdog_touch_ts，然后就主动
*睡眠，在 timer_fun里唤醒该thread。如果执行了wakeup动作，但是watchdog thread没有运行，
*当运行timer_fun时判断watchdog_touch_ts，如果时间间隔 > 20S,就执行相应操作。
   duration = is_softlockup(touch_ts);
   if (unlikely(duration)) {
       if (softlockup_panic)
       panic("softlockup: hung tasks");
   }

static int watchdog(void *unused)
{
   struct sched_param param = { .sched_priority = 0 };
   struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);

   /* initialize timestamp */
   __touch_watchdog();

   /* kick off the timer for the hardlockup detector */
   /* done here because hrtimer_start can only pin to smp_processor_id() */
   hrtimer_start(hrtimer, ns_to_ktime(get_sample_period()),
              HRTIMER_MODE_REL_PINNED);

   set_current_state(TASK_INTERRUPTIBLE);
   /*
   * Run briefly (kicked by the hrtimer callback function) once every
   * get_sample_period() seconds (4 seconds by default) to reset the
   * softlockup timestamp. If this gets delayed for more than
   * 2*watchdog_thresh seconds then the debug-printout triggers in
   * watchdog_timer_fn().
   */
   while (!kthread_should_stop()) {
       __touch_watchdog();
       schedule();

       if (kthread_should_stop())
           break;

       set_current_state(TASK_INTERRUPTIBLE);
   }
   /*
   * Drop the policy/priority elevation during thread exit to avoid a
   * scheduling latency spike.
   */
   __set_current_state(TASK_RUNNING);
   sched_setscheduler(current, SCHED_NORMAL, &param);
   return 0;
}

/* Commands for resetting the watchdog */

static void __touch_watchdog(void)
{
int this_cpu = smp_processor_id();

__this_cpu_write(watchdog_touch_ts, get_timestamp(this_cpu));
}

/* watchdog kicker functions */

static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
{
   unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
   struct pt_regs *regs = get_irq_regs();
   int duration;

   /* kick the hardlockup detector */
   watchdog_interrupt_count();

   /* kick the softlockup detector */
   wake_up_process(__this_cpu_read(softlockup_watchdog));

   /* .. and repeat */
   hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period()));

   if (touch_ts == 0) {
       if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
           /*
           * If the time stamp was touched atomically
           * make sure the scheduler tick is up to date.
           */
           __this_cpu_write(softlockup_touch_sync, false);
           sched_clock_tick();
       }
       __touch_watchdog();
       return HRTIMER_RESTART;
   }

   /* check for a softlockup
   * This is done by making sure a high priority task is
   * being scheduled. The task touches the watchdog to
   * indicate it is getting cpu time. If it hasn't then
   * this is a good indication some task is hogging the cpu
   */
   duration = is_softlockup(touch_ts);
   if (unlikely(duration)) {
       /* only warn once */
       if (__this_cpu_read(soft_watchdog_warn) == true)
           return HRTIMER_RESTART;

       printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
           smp_processor_id(), duration,
           current->comm, task_pid_nr(current));
       print_modules();
       print_irqtrace_events(current);
       if (regs)
           show_regs(regs);
       else
           dump_stack();

       if (softlockup_panic)
           panic("softlockup: hung tasks");
       __this_cpu_write(soft_watchdog_warn, true);
   } else
       __this_cpu_write(soft_watchdog_warn, false);

   return HRTIMER_RESTART;
}
static int is_softlockup(unsigned long touch_ts)
{
   unsigned long now = get_timestamp(smp_processor_id());

   /* Warn about unreasonable delays: */
   if (time_after(now, touch_ts + get_softlockup_thresh()))
       return now - touch_ts;

   return 0;
}

/*
* Hard-lockup warnings should be triggered after just a few seconds. Soft-
* lockups can have false positives under extreme conditions. So we generally
* want a higher threshold for soft lockups than for hard lockups. So we couple
* the thresholds with a factor: we make the soft threshold twice the amount of
* time the hard threshold is.
*/
static int get_softlockup_thresh(void)
{
   return watchdog_thresh * 2;
}
int __read_mostly watchdog_thresh = 10;

/怎样保存当时的crash?/

if (softlockup_panic)
panic("softlockup: hung tasks");
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=1

有关cpu_chain

static struct notifier_block __cpuinitdata cpu_nfb = {
   .notifier_call = cpu_callback
};
/* Need to know about CPUs going up/down? */
int __ref register_cpu_notifier(struct notifier_block *nb)
{
   int ret;
   cpu_maps_update_begin();
   ret = raw_notifier_chain_register(&cpu_chain, nb);
   cpu_maps_update_done();
   return ret;
}

什么情况下会发生 CPU状态的变化？ cpu_up/down

没搞明白