softlock机制

kernel/watchdog.c

有关lockup_detector初始化

创建kick_timer and watchdog thread
void __init lockup_detector_init(void)
{
    void *cpu = (void *)(long)smp_processor_id();
    int err;

    err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
    WARN_ON(notifier_to_errno(err));

    cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
    register_cpu_notifier(&cpu_nfb);

    return;
}

/*
 * Create/destroy watchdog threads as CPUs come and go:
 */
static int __cpuinit
cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
    int hotcpu = (unsigned long)hcpu;

    switch (action) {
    case CPU_UP_PREPARE:
    case CPU_UP_PREPARE_FROZEN:
        watchdog_prepare_cpu(hotcpu);
        break;
    case CPU_ONLINE:
    case CPU_ONLINE_FROZEN:
        if (watchdog_enabled)
            watchdog_enable(hotcpu);
        break;
#ifdef CONFIG_HOTPLUG_CPU
    case CPU_UP_CANCELED:
    case CPU_UP_CANCELED_FROZEN:
        watchdog_disable(hotcpu);
        break;
    case CPU_DEAD:
    case CPU_DEAD_FROZEN:
        watchdog_disable(hotcpu);
        break;
#endif /* CONFIG_HOTPLUG_CPU */
    }

    /*
     * hardlockup and softlockup are not important enough
     * to block cpu bring up.  Just always succeed and
     * rely on printk output to flag problems.
     */
    return NOTIFY_OK;
}

cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu) -> watchdog_prepare_cpu(hotcpu);
/* prepare/enable/disable routines */
static void watchdog_prepare_cpu(int cpu)
{
    struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);

    WARN_ON(per_cpu(softlockup_watchdog, cpu));
    hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
    hrtimer->function = watchdog_timer_fn;
}

cpu_callback(&cpu_nfb, CPU_ONLINE, cpu) -> watchdog_enable(hotcpu);
static int watchdog_enable(int cpu)
{
    struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
    int err = 0;

    /* enable the perf event */
    err = watchdog_nmi_enable(cpu);

    /* Regardless of err above, fall through and start softlockup */

    /* create the watchdog thread */
    if (!p) {
        struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
        p = kthread_create_on_node(watchdog, NULL, cpu_to_node(cpu), "watchdog/%d", cpu);
        if (IS_ERR(p)) {
            pr_err("softlockup watchdog for %i failed\n", cpu);
            if (!err) {
                /* if hardlockup hasn't already set this */
                err = PTR_ERR(p);
                /* and disable the perf event */
                watchdog_nmi_disable(cpu);
            }
            goto out;
        }
        sched_setscheduler(p, SCHED_FIFO, &param);
        kthread_bind(p, cpu);
        per_cpu(watchdog_touch_ts, cpu) = 0;
        per_cpu(softlockup_watchdog, cpu) = p;
        wake_up_process(p);
    }

out:
    return err;
}


watchdog thread and timer_fun

/*
 * The watchdog thread - touches the timestamp.
 *watchdog thread执行touches the timestamp,也就是更新一下watchdog_touch_ts,然后就主动
 *睡眠,在 timer_fun里唤醒该thread。如果执行了wakeup动作,但是watchdog thread没有运行,
 *当运行timer_fun时判断watchdog_touch_ts,如果时间间隔 > 20S,就执行相应操作。
   duration = is_softlockup(touch_ts);
    if (unlikely(duration)) {
       if (softlockup_panic)
        panic("softlockup: hung tasks");
    }

 */


static int watchdog(void *unused)
{
    struct sched_param param = { .sched_priority = 0 };
    struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);

    /* initialize timestamp */
    __touch_watchdog();

    /* kick off the timer for the hardlockup detector */
    /* done here because hrtimer_start can only pin to smp_processor_id() */
    hrtimer_start(hrtimer, ns_to_ktime(get_sample_period()),
              HRTIMER_MODE_REL_PINNED);

    set_current_state(TASK_INTERRUPTIBLE);
    /*
     * Run briefly (kicked by the hrtimer callback function) once every
     * get_sample_period() seconds (4 seconds by default) to reset the
     * softlockup timestamp. If this gets delayed for more than
     * 2*watchdog_thresh seconds then the debug-printout triggers in
     * watchdog_timer_fn().
     */
    while (!kthread_should_stop()) {
        __touch_watchdog();
        schedule();

        if (kthread_should_stop())
            break;

        set_current_state(TASK_INTERRUPTIBLE);
    }
    /*
     * Drop the policy/priority elevation during thread exit to avoid a
     * scheduling latency spike.
     */
    __set_current_state(TASK_RUNNING);
    sched_setscheduler(current, SCHED_NORMAL, &param);
    return 0;
}

/* Commands for resetting the watchdog */

static void __touch_watchdog(void)
{
    int this_cpu = smp_processor_id();

    __this_cpu_write(watchdog_touch_ts, get_timestamp(this_cpu));
}

/* watchdog kicker functions */

static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
{
    unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
    struct pt_regs *regs = get_irq_regs();
    int duration;

    /* kick the hardlockup detector */
    watchdog_interrupt_count();

    /* kick the softlockup detector */
    wake_up_process(__this_cpu_read(softlockup_watchdog));

    /* .. and repeat */
    hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period()));

    if (touch_ts == 0) {
        if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
            /*
             * If the time stamp was touched atomically
             * make sure the scheduler tick is up to date.
             */
            __this_cpu_write(softlockup_touch_sync, false);
            sched_clock_tick();
        }
        __touch_watchdog();
        return HRTIMER_RESTART;
    }

    /* check for a softlockup
     * This is done by making sure a high priority task is
     * being scheduled.  The task touches the watchdog to
     * indicate it is getting cpu time.  If it hasn't then
     * this is a good indication some task is hogging the cpu
     */
    duration = is_softlockup(touch_ts);
    if (unlikely(duration)) {
        /* only warn once */
        if (__this_cpu_read(soft_watchdog_warn) == true)
            return HRTIMER_RESTART;

        printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
            smp_processor_id(), duration,
            current->comm, task_pid_nr(current));
        print_modules();
        print_irqtrace_events(current);
        if (regs)
            show_regs(regs);
        else
            dump_stack();

        if (softlockup_panic)
            panic("softlockup: hung tasks");
        __this_cpu_write(soft_watchdog_warn, true);
    } else
        __this_cpu_write(soft_watchdog_warn, false);

    return HRTIMER_RESTART;
}
static int is_softlockup(unsigned long touch_ts)
{
    unsigned long now = get_timestamp(smp_processor_id());

    /* Warn about unreasonable delays: */
    if (time_after(now, touch_ts + get_softlockup_thresh()))
        return now - touch_ts;

    return 0;
}

/*
 * Hard-lockup warnings should be triggered after just a few seconds. Soft-
 * lockups can have false positives under extreme conditions. So we generally
 * want a higher threshold for soft lockups than for hard lockups. So we couple
 * the thresholds with a factor: we make the soft threshold twice the amount of
 * time the hard threshold is.
 */
static int get_softlockup_thresh(void)
{
    return watchdog_thresh * 2;
}
int __read_mostly watchdog_thresh = 10;

/*怎样保存当时的crash?*/

        if (softlockup_panic)
            panic("softlockup: hung tasks");
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=1

有关cpu_chain

static struct notifier_block __cpuinitdata cpu_nfb = {
    .notifier_call = cpu_callback
};
/* Need to know about CPUs going up/down? */
int __ref register_cpu_notifier(struct notifier_block *nb)
{
    int ret;
    cpu_maps_update_begin();
    ret = raw_notifier_chain_register(&cpu_chain, nb);
    cpu_maps_update_done();
    return ret;
}

什么情况下会发生 CPU状态的变化? cpu_up/down

没搞明白

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值