连续调用usleep_range导致的lockup

现象

crng_finalize_init后出现死锁

[    3.610168] random: crng init done
[   13.615447] suspected lockup task = kworker/0:3, pid: 42
[   13.620788] pc = 0x0
[   23.615447] suspected lockup task = kworker/0:3, pid: 42
[   23.620788] pc = 0x0
[   33.615447] suspected lockup task = kworker/0:3, pid: 42
[   33.620788] pc = 0x0
[   43.615447] suspected lockup task = kworker/0:3, pid: 42
[   43.620788] pc = 0x0

分析

最后一句打印的地方,添加栈回溯看什么触发的

--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -829,6 +829,7 @@ static void crng_finalize_init(struct crng_state *crng)
        wake_up_interruptible(&crng_init_wait);
        kill_fasync(&fasync, SIGIO, POLL_IN);
        pr_notice("crng init done\n");
+       dump_stack();
        if (unseeded_warning.missed) {
                pr_notice("random: %d get_random_xx warning(s) missed "
                                "due to ratelimiting\n",

也即初始化的时候,发送mmc的request的时候,触发了中断

[    3.636016] Workqueue: events_freezable mmc_rescan
[    3.640838] [<c0013245>] (unwind_backtrace) from [<c00109b7>] (show_stack+0xb/0xc)
[    3.648468] [<c00109b7>] (show_stack) from [<c0208e49>] (crng_finalize_init.constprop.18+0xc9/0xfc)
[    3.657592] [<c0208e49>] (crng_finalize_init.constprop.18) from [<c0209285>] (crng_reseed.constprop.14+0xe9/0x174)
[    3.668029] [<c0209285>] (crng_reseed.constprop.14) from [<c020981b>] (add_interrupt_randomness+0x10b/0x160)
[    3.677917] [<c020981b>] (add_interrupt_randomness) from [<c004e5e7>] (handle_irq_event_percpu+0x1b/0x3c)
[    3.687561] [<c004e5e7>] (handle_irq_event_percpu) from [<c004e62f>] (handle_irq_event+0x27/0x3c)
[    3.696502] [<c004e62f>] (handle_irq_event) from [<c0050ae3>] (handle_level_irq+0x67/0xb8)
[    3.704833] [<c0050ae3>] (handle_level_irq) from [<c004dea3>] (generic_handle_irq+0x17/0x20)
[    3.713317] [<c004dea3>] (generic_handle_irq) from [<c004e0c9>] (__handle_domain_irq+0x35/0x70)
[    3.722076] [<c004e0c9>] (__handle_domain_irq) from [<c0009ad3>] (__irq_svc+0x53/0x94)
[    3.730072] Exception stack(0xc5de5d00 to 0xc5de5d48)
[    3.735137] 5d00: 00008001 ffff8c9d ffffff9c 00000000 c5c3d000 c066ac40 00000001 c5c3d810
[    3.743377] 5d20: c5c3d640 00000013 00000000 c5de5dc0 10624dd3 c5de5d50 c042a2b9 c02dbc96
[    3.751617] 5d40: a0000033 ffffffff
[    3.755126] [<c0009ad3>] (__irq_svc) from [<c02dbc96>] (sdhci_request+0xd2/0x270)
[    3.762664] [<c02dbc96>] (sdhci_request) from [<c02c97bb>] (mmc_start_request+0x5f/0x7c)
[    3.770812] [<c02c97bb>] (mmc_start_request) from [<c02c990d>] (mmc_wait_for_req+0x45/0x8c)
[    3.779235] [<c02c990d>] (mmc_wait_for_req) from [<c02d05a3>] (mmc_app_send_scr+0x8f/0xb8)
[    3.787567] [<c02d05a3>] (mmc_app_send_scr) from [<c02cfbfb>] (mmc_sd_setup_card+0x2f/0x250)
[    3.796051] [<c02cfbfb>] (mmc_sd_setup_card) from [<c02cfec9>] (mmc_sd_init_card+0xad/0x2fc)
[    3.804565] [<c02cfec9>] (mmc_sd_init_card) from [<c02d023f>] (mmc_attach_sd+0x77/0xf4)
[    3.812622] [<c02d023f>] (mmc_attach_sd) from [<c02caba5>] (mmc_rescan+0x21d/0x290)
[    3.820312] [<c02caba5>] (mmc_rescan) from [<c0034293>] (process_one_work+0x127/0x2e4)
[    3.828308] [<c0034293>] (process_one_work) from [<c003454b>] (worker_thread+0xfb/0x3e4)
[    3.836456] [<c003454b>] (worker_thread) from [<c0037a01>] (kthread+0x105/0x108)
[    3.843902] [<c0037a01>] (kthread) from [<c00090e9>] (ret_from_fork+0x11/0x28)
[    3.851165] Exception stack(0xc5de5fb0 to 0xc5de5ff8)
[    3.856262] 5fa0:                                     00000000 00000000 00000000 00000000
[    3.864471] 5fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[    3.872711] 5fe0: 00000000 00000000 00000000 00000000 00000013 00000000
[   13.875732] suspected lockup task = kworker/0:2, pid: 39
[   13.881072] pc = 0x0
[   23.875732] suspected lockup task = kworker/0:2, pid: 39
[   23.881072] pc = 0x0

添加打印,看看触发了什么中断,发现是不同的中断触发的,那就不是某个中断处理引发的问题

--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -664,7 +664,7 @@ int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
        struct pt_regs *old_regs = set_irq_regs(regs);
        unsigned int irq = hwirq;
        int ret = 0;
-
+       printk("__handle_domain_irq irq = %d", irq);
        irq_enter();

add_interrupt_randomness

在中断处理中,add_interrupt_randomness()函数利用设备两次中断的间隔时间作为噪声源将随机数据加入熵池。这个产生的随机值,可供usleep_range使用

handle_irq

分析下这个中断随机值的产生流程

int generic_handle_irq(unsigned int irq)
{
	struct irq_desc *desc = irq_to_desc(irq);

	if (!desc)
		return -EINVAL;
	generic_handle_irq_desc(desc);
	return 0;
}

static inline void generic_handle_irq_desc(struct irq_desc *desc)
{
	desc->handle_irq(desc);
}

irq_set_chip_and_handler

中断控制器通过irq_set_chip_and_handler将上面的desc->handle_irq(desc)赋值为handle_level_irq;类似的处理看《这篇

static int mmp_irq_domain_map(struct irq_domain *d, unsigned int irq,
                              irq_hw_number_t hw)
{
        irq_set_chip_and_handler(irq, &icu_irq_chip, handle_level_irq);
        return 0;
}

//
void handle_level_irq(struct irq_desc *desc)
{
	raw_spin_lock(&desc->lock);
	mask_ack_irq(desc);

	if (!irq_may_run(desc))
		goto out_unlock;

	desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);

	/*
	 * If its disabled or no action available
	 * keep it masked and get out of here
	 */
	if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
		desc->istate |= IRQS_PENDING;
		goto out_unlock;
	}

	kstat_incr_irqs_this_cpu(desc);
	handle_irq_event(desc);

	cond_unmask_irq(desc);

out_unlock:
	raw_spin_unlock(&desc->lock);
}

//
irqreturn_t handle_irq_event(struct irq_desc *desc)
{
	irqreturn_t ret;

	desc->istate &= ~IRQS_PENDING;
	irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
	raw_spin_unlock(&desc->lock);

	ret = handle_irq_event_percpu(desc);

	raw_spin_lock(&desc->lock);
	irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
	return ret;
}

handle_irq_event_percpu

__handle_irq_event_percpu执行中断处理函数,这个add_interrupt_randomness函数利用设备两次中断的间隔时间作为噪声源将随机数据加入熵池,供产生随机数使用

irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
{
	irqreturn_t retval;
	unsigned int flags = 0;

	retval = __handle_irq_event_percpu(desc, &flags);

	add_interrupt_randomness(desc->irq_data.irq, flags);

	if (!noirqdebug)
		note_interrupt(desc, retval);
	return retval;
}

action->handler执行request_irq注册的中断处理函数handler

irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags)
{
	irqreturn_t retval = IRQ_NONE;
	unsigned int irq = desc->irq_data.irq;
	struct irqaction *action;

	record_irq_time(desc);

	for_each_action_of_desc(desc, action) {
		irqreturn_t res;

		trace_irq_handler_entry(irq, action);
		res = action->handler(irq, action->dev_id);
		trace_irq_handler_exit(irq, action, res);

		if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pS enabled interrupts\n",
			      irq, action->handler))
			local_irq_disable();

		switch (res) {
		case IRQ_WAKE_THREAD:
			/*
			 * Catch drivers which return WAKE_THREAD but
			 * did not set up a thread function
			 */
			if (unlikely(!action->thread_fn)) {
				warn_no_thread(irq, action);
				break;
			}

			__irq_wake_thread(desc, action);

			/* Fall through - to add to randomness */
		case IRQ_HANDLED:
			*flags |= action->flags;
			break;

		default:
			break;
		}

		retval |= res;
	}

	return retval;
}

结论

正常的打印,后面该接着打印pcie相关的信息,最终发现是pcie主机驱动里,加复位操作的时候,连续调用usleep_range引起的死锁

  • 4
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值