Linux网络设备之注销

本文解析了网络设备注销时PCI驱动unregister_netdev函数的工作原理,涉及回滚注册、设备关闭和net_todo_list处理等关键步骤。注销过程包括初步注销、设备关闭和最终注销通知,确保引用计数归零并释放资源。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

在注销网络设备时,会调用pci_driver->remove函数,以e100网卡驱动为例,实际调用e100_remove,该函数调用函数unregister_netdev进行设备注销操作;

函数调用关系图如下,注销分为两步:

(1) 回滚注册操作,进行初步注销,过程中如果设备尚未关闭,则需先调用dev_close_many将设备关闭;

(2) 将设备加入到net_todo_list,然后通过netdev_run_todo进行设备最终的注销,其中包括调用netdev_wait_allrefs来等待所有对设备的引用结束;

/**
 * 设备注销第一步,回滚注销调用关系
 * e100_remove
 *     |-->unregister_netdev
 *          |-->unregister_netdevice
 *                 |-->unregister_netdevice_queue
 *                        |-->rollback_registered
 *                        |       |-->rollback_registered_many
 *                        |              |-->dev_close_many-->__dev_close_many
 *                        |---->net_set_todo
 *
 * 设备注销第二步,todo_list处理调用关系
 * rtnl_unlock
 *     |-->netdev_run_todo
 *             |-->netdev_wait_allrefs
 */

unregister_netdev为注销设备的直接调用函数,其中在锁的包围之下,调用unregister_netdevice进行注销设备,而rtnl_unlock解锁函数调用时,也会触发对net_todo_list的处理;

/**
 *    unregister_netdev - remove device from the kernel
 *    @dev: device
 *
 *    This function shuts down a device interface and removes it
 *    from the kernel tables.
 *
 *    This is just a wrapper for unregister_netdevice that takes
 *    the rtnl semaphore.  In general you want to use this and not
 *    unregister_netdevice.
 */
void unregister_netdev(struct net_device *dev)
{
    rtnl_lock();
    unregister_netdevice(dev);
    rtnl_unlock();
}
static inline void unregister_netdevice(struct net_device *dev)
{
     unregister_netdevice_queue(dev, NULL);
}

unregister_netdevice_queue函数执行两个操作,rollback_registered对注册过程进行回滚,net_set_todo将设备加入到net_todo_list;

/**
 *    unregister_netdevice_queue - remove device from the kernel
 *    @dev: device
 *    @head: list
 *
 *    This function shuts down a device interface and removes it
 *    from the kernel tables.
 *    If head not NULL, device is queued to be unregistered later.
 *
 *    Callers must hold the rtnl semaphore.  You may want
 *    unregister_netdev() instead of this.
 */

void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
{
    ASSERT_RTNL();

    if (head) {
        list_move_tail(&dev->unreg_list, head);
    }
    /*
        可见,下面注销分两个步骤:
        1. rollback_registered
        2. 加入todo_list,后续在rtnl_unlock中调用netdev_run_todo
    */
    else {
        /* 回滚注册 */
        rollback_registered(dev);
        /* Finish processing unregister after unlock */
        /* 设备的todolist加入到todolist中 */
        net_set_todo(dev);
    }
}

rollback_registered将设备加入到新链表中,然后调用回滚多个设备的函数;

static void rollback_registered(struct net_device *dev)
{
    /* 生成single链表 */
    LIST_HEAD(single);

    /* 加入链表 */
    list_add(&dev->unreg_list, &single);

    /* 注销 */
    rollback_registered_many(&single);

    /* 删除链表 */
    list_del(&single);
}

rollback_registered_many为核心的回滚注册函数,其会遍历传入设备链表,对每个设备进行注销操作,并且发送相关通知消息;

static void rollback_registered_many(struct list_head *head)
{
    struct net_device *dev, *tmp;
    LIST_HEAD(close_head);

    BUG_ON(dev_boot_phase);
    ASSERT_RTNL();

    list_for_each_entry_safe(dev, tmp, head, unreg_list) {
        /* Some devices call without registering
         * for initialization unwind. Remove those
         * devices and proceed with the remaining.
         */
        /* 设备为为初始化状态 */
        if (dev->reg_state == NETREG_UNINITIALIZED) {
            pr_debug("unregister_netdevice: device %s/%p never was registered\n",
                 dev->name, dev);

            WARN_ON(1);
            //删除节点,继续下一个设备
            list_del(&dev->unreg_list);
            continue;
        }

        //标记设备要被释放
        dev->dismantle = true;
        BUG_ON(dev->reg_state != NETREG_REGISTERED);
    }

    /* If device is running, close it first. */
    /* 如果设备正在运行,先将其关闭 */
    list_for_each_entry(dev, head, unreg_list)
        list_add_tail(&dev->close_list, &close_head);

    /* 关闭设备 */
    dev_close_many(&close_head, true);

    /* 从各种链表中移除当前设备 */
    list_for_each_entry(dev, head, unreg_list) {
        /* And unlink it from device chain. */
        unlist_netdevice(dev);

        dev->reg_state = NETREG_UNREGISTERING;
    }

    /* 清理工作 */
    flush_all_backlogs();

    /* 同步rcu */
    synchronize_net();

    list_for_each_entry(dev, head, unreg_list) {
        struct sk_buff *skb = NULL;

        /* Shutdown queueing discipline. */
        /* 关闭排队规则 */
        dev_shutdown(dev);


        /* Notify protocols, that we are about to destroy
         * this device. They should clean all the things.
         */
        /* 通知设备注销 */
        call_netdevice_notifiers(NETDEV_UNREGISTER, dev);

        /* 构建dellink消息 */
        if (!dev->rtnl_link_ops ||
            dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
            skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U,
                             GFP_KERNEL);

        /*
         *    Flush the unicast and multicast chains
         */
        /* 清理单播和组播链表 */
        dev_uc_flush(dev);
        dev_mc_flush(dev);

        /* 设备的uninit操作 */
        if (dev->netdev_ops->ndo_uninit)
            dev->netdev_ops->ndo_uninit(dev);

        /* 发送netlink消息 */
        if (skb)
            rtmsg_ifinfo_send(skb, dev, GFP_KERNEL);

        /* Notifier chain MUST detach us all upper devices. */
        WARN_ON(netdev_has_any_upper_dev(dev));
        WARN_ON(netdev_has_any_lower_dev(dev));

        /* Remove entries from kobject tree */
        /* 从虚拟文件系统中移除设备 */
        netdev_unregister_kobject(dev);
#ifdef CONFIG_XPS
        /* Remove XPS queueing entries */
        netif_reset_xps_queues_gt(dev, 0);
#endif
    }

    /* 同步rcu */
    synchronize_net();

    /* 减少设备引用 */
    list_for_each_entry(dev, head, unreg_list)
        dev_put(dev);
}

注销过程中,如果发现有设备尚未关闭,则需要调用dev_close_many将设备进行关闭;

/* 关闭设备 */
int dev_close_many(struct list_head *head, bool unlink)
{
    struct net_device *dev, *tmp;

    /* Remove the devices that don't need to be closed */
    /* 移除未运行的设备 */
    list_for_each_entry_safe(dev, tmp, head, close_list)
        if (!(dev->flags & IFF_UP))
            list_del_init(&dev->close_list);

    /* 关闭设备 */
    __dev_close_many(head);

    list_for_each_entry_safe(dev, tmp, head, close_list) {
        /* netlink消息 */
        rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);

        /* 通知设备关闭 */
        call_netdevice_notifiers(NETDEV_DOWN, dev);

        /* 移除设备节点 */
        if (unlink)
            list_del_init(&dev->close_list);
    }

    return 0;
}
static int __dev_close_many(struct list_head *head)
{
    struct net_device *dev;

    ASSERT_RTNL();
    might_sleep();

    list_for_each_entry(dev, head, close_list) {
        /* Temporarily disable netpoll until the interface is down */
        /* 禁用netpoll */
        netpoll_poll_disable(dev);

        /* 通知设备正在关闭 */
        call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);

        /* 清除start标志位 */
        clear_bit(__LINK_STATE_START, &dev->state);

        /* Synchronize to scheduled poll. We cannot touch poll list, it
         * can be even on different cpu. So just clear netif_running().
         *
         * dev->stop() will invoke napi_disable() on all of it's
         * napi_struct instances on this device.
         */
        smp_mb__after_atomic(); /* Commit netif_running(). */
    }

    /* 未发送完的数据发送完 */
    dev_deactivate_many(head);

    list_for_each_entry(dev, head, close_list) {
        const struct net_device_ops *ops = dev->netdev_ops;

        /*
         *    Call the device specific close. This cannot fail.
         *    Only if device is UP
         *
         *    We allow it to be called even after a DETACH hot-plug
         *    event.
         */
        /* 调用设备关闭操作 */
        if (ops->ndo_stop)
            ops->ndo_stop(dev);

        /* 标记设备关闭 */
        dev->flags &= ~IFF_UP;
        /* 启用netpoll */
        netpoll_poll_enable(dev);
    }

    return 0;
}
static int __dev_close_many(struct list_head *head)
{
    struct net_device *dev;

    ASSERT_RTNL();
    might_sleep();

    list_for_each_entry(dev, head, close_list) {
        /* Temporarily disable netpoll until the interface is down */
        /* 禁用netpoll */
        netpoll_poll_disable(dev);

        /* 通知设备正在关闭 */
        call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);

        /* 清除start标志位 */
        clear_bit(__LINK_STATE_START, &dev->state);

        /* Synchronize to scheduled poll. We cannot touch poll list, it
         * can be even on different cpu. So just clear netif_running().
         *
         * dev->stop() will invoke napi_disable() on all of it's
         * napi_struct instances on this device.
         */
        smp_mb__after_atomic(); /* Commit netif_running(). */
    }

    /* 未发送完的数据发送完 */
    dev_deactivate_many(head);

    list_for_each_entry(dev, head, close_list) {
        const struct net_device_ops *ops = dev->netdev_ops;

        /*
         *    Call the device specific close. This cannot fail.
         *    Only if device is UP
         *
         *    We allow it to be called even after a DETACH hot-plug
         *    event.
         */
        /* 调用设备关闭操作 */
        if (ops->ndo_stop)
            ops->ndo_stop(dev);

        /* 标记设备关闭 */
        dev->flags &= ~IFF_UP;
        /* 启用netpoll */
        netpoll_poll_enable(dev);
    }

    return 0;
}

对于net_todo_list的处理,则是在解锁的时候进行,它将调用netdev_run_todo来处理todo_list;

void rtnl_unlock(void)
{
    /* This fellow will unlock it for us. */    
     netdev_run_todo();
}

netdev_run_todo需要在等待所有对设备的引用结束之后,再对设备进行销毁; 

/* The sequence is:
 *
 *    rtnl_lock();
 *    ...
 *    register_netdevice(x1);
 *    register_netdevice(x2);
 *    ...
 *    unregister_netdevice(y1);
 *    unregister_netdevice(y2);
 *      ...
 *    rtnl_unlock();
 *    free_netdev(y1);
 *    free_netdev(y2);
 *
 * We are invoked by rtnl_unlock().
 * This allows us to deal with problems:
 * 1) We can delete sysfs objects which invoke hotplug
 *    without deadlocking with linkwatch via keventd.
 * 2) Since we run with the RTNL semaphore not held, we can sleep
 *    safely in order to wait for the netdev refcnt to drop to zero.
 *
 * We must not return until all unregister events added during
 * the interval the lock was held have been completed.
 */
void netdev_run_todo(void)
{
    struct list_head list;

    /* Snapshot list, allow later requests */
    list_replace_init(&net_todo_list, &list);

    __rtnl_unlock();


    /* Wait for rcu callbacks to finish before next phase */
    if (!list_empty(&list))
        rcu_barrier();

    /* 遍历链表 */
    while (!list_empty(&list)) {
        struct net_device *dev
            = list_first_entry(&list, struct net_device, todo_list);

        /* 移除设备 */
        list_del(&dev->todo_list);

        /* 通知设备进行最后的注销 */
        rtnl_lock();
        call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
        __rtnl_unlock();

        /* 检查设备状态 */
        if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
            pr_err("network todo '%s' but state %d\n",
                   dev->name, dev->reg_state);
            dump_stack();
            continue;
        }

        /* 设置设备状态为注销 */
        dev->reg_state = NETREG_UNREGISTERED;

        /* 等待所有引用结束 */
        netdev_wait_allrefs(dev);

        /* paranoia */
        BUG_ON(netdev_refcnt_read(dev));
        BUG_ON(!list_empty(&dev->ptype_all));
        BUG_ON(!list_empty(&dev->ptype_specific));
        WARN_ON(rcu_access_pointer(dev->ip_ptr));
        WARN_ON(rcu_access_pointer(dev->ip6_ptr));
        WARN_ON(dev->dn_ptr);

        /* 调用设备销毁 */
        if (dev->priv_destructor)
            dev->priv_destructor(dev);

        /* 需要释放设备,则释放 */
        if (dev->needs_free_netdev)
            free_netdev(dev);

        /* Report a network device has been unregistered */
        /* 报告设备被注销 */
        rtnl_lock();
        dev_net(dev)->dev_unreg_count--;
        __rtnl_unlock();
        wake_up(&netdev_unregistering_wq);

        /* Free network device */
        /* 减少设备对象引用 */
        kobject_put(&dev->dev.kobj);
    }
}

netdev_wait_allrefs会等待设备引用计数不为0的设备引用结束; 

/**
 * netdev_wait_allrefs - wait until all references are gone.
 * @dev: target net_device
 *
 * This is called when unregistering network devices.
 *
 * Any protocol or device that holds a reference should register
 * for netdevice notification, and cleanup and put back the
 * reference if they receive an UNREGISTER event.
 * We can get stuck here if buggy protocols don't correctly
 * call dev_put.
 */
/* 等待所有的引用结束 */
static void netdev_wait_allrefs(struct net_device *dev)
{
    unsigned long rebroadcast_time, warning_time;
    int refcnt;

    /* 从linkwatch中删除设备 */
    linkwatch_forget_dev(dev);

    rebroadcast_time = warning_time = jiffies;

    /* 统计所有引用数 */
    refcnt = netdev_refcnt_read(dev);

    /* 引用数不为0 */
    while (refcnt != 0) {
        /* 每1s中进行一次通知 */
        /* 到达通知广播时间窗口 */
        if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
            rtnl_lock();

            /* Rebroadcast unregister notification */
            /* 通知设备注销 */
            call_netdevice_notifiers(NETDEV_UNREGISTER, dev);

            __rtnl_unlock();
            rcu_barrier();
            rtnl_lock();

            /* 通知设备最终注销 */
            call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);

            /* 连接改变事件处理 */
            if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
                     &dev->state)) {
                /* We must not have linkwatch events
                 * pending on unregister. If this
                 * happens, we simply run the queue
                 * unscheduled, resulting in a noop
                 * for this device.
                 */
                linkwatch_run_queue();
            }

            __rtnl_unlock();

            /* 设定通知时间为当前时间 */
            rebroadcast_time = jiffies;
        }

        msleep(250);

        /* 重新读取引用数 */
        refcnt = netdev_refcnt_read(dev);

        /* 10s钟仍未结束,则打印告警 */
        if (time_after(jiffies, warning_time + 10 * HZ)) {
            pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
                 dev->name, refcnt);
            warning_time = jiffies;
        }
    }
}

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值