BUG: scheduling while atomic经典bug

static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port,
                           unsigned long event)
{
        struct device *dev = &hr_dev->pdev->dev;
        struct net_device *netdev;

        netdev = hr_dev->iboe.netdevs[port];
        if (!netdev) {
                dev_err(dev, "port(%d) can't find netdev\n", port);
                return -ENODEV;
        }

        spin_lock_bh(&hr_dev->iboe.lock);

        switch (event) {
        case NETDEV_UP:
        case NETDEV_CHANGE:
        case NETDEV_REGISTER:
        case NETDEV_CHANGEADDR:
                hns_roce_set_mac(hr_dev, port, netdev->dev_addr);
                break;
        case NETDEV_DOWN:
                /*
                 * In v1 engine, only support all ports closed together.
                 */
                break;
        default:
                dev_dbg(dev, "NETDEV event = 0x%x!\n", (u32)(event));
                break;
        }

        spin_unlock_bh(&hr_dev->iboe.lock);
        return 0;
}
这段code中在spin_lock_bh/spin_unlock_bh 之间调用hns_roce_set_mac,而hns_roce_set_mac->hns_roce_set_mac->hns_roce_v1_set_mac
void hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr)
{
        u32 reg_smac_l;
        u16 reg_smac_h;
        u16 *p_h;
        u32 *p;
        u32 val;

        /*
         * When mac changed, loopback may fail
         * because of smac not equal to dmac.
         * We Need to release and create reserved qp again.
         */
        if (hr_dev->hw->dereg_mr && hns_roce_v1_recreate_lp_qp(hr_dev))
                dev_warn(&hr_dev->pdev->dev, "recreate lp qp timeout!\n");

 }
hns_roce_v1_set_mac 中又会调用hns_roce_v1_recreate_lp_qp
static int hns_roce_v1_recreate_lp_qp(struct hns_roce_dev *hr_dev)
{
        struct device *dev = &hr_dev->pdev->dev;
        struct hns_roce_recreate_lp_qp_work *lp_qp_work;
        struct hns_roce_free_mr *free_mr;
        struct hns_roce_v1_priv *priv;
        struct completion comp;
        unsigned long end =
          msecs_to_jiffies(HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS) + jiffies;

        

        while (time_before_eq(jiffies, end)) {
                if (try_wait_for_completion(&comp))
                        return 0;
                msleep(HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE);
        }
}
在hns_roce_v1_recreate_lp_qp 中调用msleep函数,就违反了不能在atomic环境中sleep的情况,这样在
__schedule->schedule_debug
static inline void schedule_debug(struct task_struct *prev)
{
#ifdef CONFIG_SCHED_STACK_END_CHECK
    if (task_stack_end_corrupted(prev))
        panic("corrupted stack end detected inside scheduler\n");
#endif
//这个条件就会符合,所以调用__schedule_bug
    if (unlikely(in_atomic_preempt_off())) {
        __schedule_bug(prev);
        preempt_count_set(PREEMPT_DISABLED);
    }
    rcu_sleep_check();

    profile_hit(SCHED_PROFILING, __builtin_return_address(0));

    schedstat_inc(this_rq()->sched_count);
}
static noinline void __schedule_bug(struct task_struct *prev)
{
    /* Save this before calling printk(), since that will clobber it */
    unsigned long preempt_disable_ip = get_preempt_disable_ip(current);

    if (oops_in_progress)
        return;

    printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n",
        prev->comm, prev->pid, preempt_count());
}
这样就会打印BUG: scheduling while atomic 这样的log。并出给当前京进程的name和pid。
而#define in_atomic_preempt_off() (preempt_count() != PREEMPT_DISABLE_OFFSET)
可见就是就是检测preempt_count是否等于PREEMPT_DISABLE_OFFSET==0.不为0也就是说当前是原子环境
那么为啥spin_lock_bh/spin_unlock_bh 之间的环境是原子环境呢?

static __always_inline void spin_lock_bh(spinlock_t *lock)
{
    raw_spin_lock_bh(&lock->rlock);
}
#define raw_spin_lock_bh(lock)        _raw_spin_lock_bh(lock)
void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock)
{
    __raw_spin_lock_bh(lock);
}
static inline void __raw_spin_lock_irq(raw_spinlock_t *lock)
{
    local_irq_disable();
    preempt_disable();
    spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
    LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
}
重点是调用preempt_disable
#define preempt_disable() \
do { \
    preempt_count_inc(); \
    barrier(); \
} while (0)

原来在preempt_disable 中调用preempt_count_inc来让#define preempt_count_inc() preempt_count_add(1) preempt加一,这样再通过in_atomic_preempt_off()判断preempt是否为0肯定就是就是不成立的。因此kernel就会打印BUG: scheduling while atomic



  • 3
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值