BUG: scheduling while atomic经典bug

最新推荐文章于 2023-11-09 20:34:28 发布

tiantao2012

最新推荐文章于 2023-11-09 20:34:28 发布

阅读量1w

点赞数 3

分类专栏： Linux 源码分析

本文链接：https://blog.csdn.net/tiantao2012/article/details/73275750

版权

Linux 源码分析专栏收录该内容

769 篇文章 113 订阅

订阅专栏

static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port,
                           unsigned long event)
{
        struct device *dev = &hr_dev->pdev->dev;
        struct net_device *netdev;

        netdev = hr_dev->iboe.netdevs[port];
        if (!netdev) {
                dev_err(dev, "port(%d) can't find netdev\n", port);
                return -ENODEV;
        }

        spin_lock_bh(&hr_dev->iboe.lock);

        switch (event) {
        case NETDEV_UP:
        case NETDEV_CHANGE:
        case NETDEV_REGISTER:
        case NETDEV_CHANGEADDR:
                hns_roce_set_mac(hr_dev, port, netdev->dev_addr);
                break;
        case NETDEV_DOWN:
                /*
                 * In v1 engine, only support all ports closed together.
                 */
                break;
        default:
                dev_dbg(dev, "NETDEV event = 0x%x!\n", (u32)(event));
                break;
        }

        spin_unlock_bh(&hr_dev->iboe.lock);
        return 0;
}
这段code中在spin_lock_bh/spin_unlock_bh 之间调用hns_roce_set_mac，而hns_roce_set_mac->hns_roce_set_mac->hns_roce_v1_set_mac
void hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr)
{
        u32 reg_smac_l;
        u16 reg_smac_h;
        u16 *p_h;
        u32 *p;
        u32 val;

        /*
         * When mac changed, loopback may fail
         * because of smac not equal to dmac.
         * We Need to release and create reserved qp again.
         */
        if (hr_dev->hw->dereg_mr && hns_roce_v1_recreate_lp_qp(hr_dev))
                dev_warn(&hr_dev->pdev->dev, "recreate lp qp timeout!\n");

}
hns_roce_v1_set_mac 中又会调用hns_roce_v1_recreate_lp_qp
static int hns_roce_v1_recreate_lp_qp(struct hns_roce_dev *hr_dev)
{
        struct device *dev = &hr_dev->pdev->dev;
        struct hns_roce_recreate_lp_qp_work *lp_qp_work;
        struct hns_roce_free_mr *free_mr;
        struct hns_roce_v1_priv *priv;
        struct completion comp;
        unsigned long end =
          msecs_to_jiffies(HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS) + jiffies;



        while (time_before_eq(jiffies, end)) {
                if (try_wait_for_completion(&comp))
                        return 0;
                msleep(HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE);
        }
}
在hns_roce_v1_recreate_lp_qp 中调用msleep函数，就违反了不能在atomic环境中sleep的情况，这样在
__schedule->schedule_debug
static inline void schedule_debug(struct task_struct *prev)
{
#ifdef CONFIG_SCHED_STACK_END_CHECK
   if (task_stack_end_corrupted(prev))
       panic("corrupted stack end detected inside scheduler\n");
#endif
//这个条件就会符合，所以调用__schedule_bug
   if (unlikely(in_atomic_preempt_off())) {
       __schedule_bug(prev);
       preempt_count_set(PREEMPT_DISABLED);
   }
   rcu_sleep_check();

   profile_hit(SCHED_PROFILING, __builtin_return_address(0));

   schedstat_inc(this_rq()->sched_count);
}
static noinline void __schedule_bug(struct task_struct *prev)
{
   /* Save this before calling printk(), since that will clobber it */
   unsigned long preempt_disable_ip = get_preempt_disable_ip(current);

   if (oops_in_progress)
       return;

   printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n",
       prev->comm, prev->pid, preempt_count());
}
这样就会打印BUG: scheduling while atomic 这样的log。并出给当前京进程的name和pid。
而#define in_atomic_preempt_off() (preempt_count() != PREEMPT_DISABLE_OFFSET)
可见就是就是检测preempt_count是否等于PREEMPT_DISABLE_OFFSET==0.不为0也就是说当前是原子环境
那么为啥spin_lock_bh/spin_unlock_bh 之间的环境是原子环境呢？

static __always_inline void spin_lock_bh(spinlock_t *lock)
{
   raw_spin_lock_bh(&lock->rlock);
}
#define raw_spin_lock_bh(lock)       _raw_spin_lock_bh(lock)
void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock)
{
   __raw_spin_lock_bh(lock);
}
static inline void __raw_spin_lock_irq(raw_spinlock_t *lock)
{
   local_irq_disable();
   preempt_disable();
   spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
   LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
}
重点是调用preempt_disable
#define preempt_disable() \
do { \
   preempt_count_inc(); \
   barrier(); \
} while (0)

原来在preempt_disable 中调用preempt_count_inc来让#define preempt_count_inc() preempt_count_add(1) preempt加一，这样再通过in_atomic_preempt_off()判断preempt是否为0肯定就是就是不成立的。因此kernel就会打印BUG: scheduling while atomic

tiantao2012

关注

3
点赞
踩
10

收藏

觉得还不错? 一键收藏
1
评论
BUG: scheduling while atomic经典bug

static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port, unsigned long event){ struct device *dev = &hr_dev->pdev->dev; struct net_device *netde
复制链接

扫一扫

专栏目录