JJJ：unregister_netdev

你板子冒烟了
已于 2024-04-09 23:34:45 修改
阅读量202
点赞数 1
分类专栏：网卡驱动文章标签： windows microsoft 服务器
于 2024-04-09 00:34:15 首次发布
本文链接：https://blog.csdn.net/engineer0/article/details/137529237
版权
网卡驱动专栏收录该内容
8 篇文章
订阅专栏
本文介绍了Linux内核中的两个关键函数unregister_netdev和unregister_netdevice_queue，用于从内核表中移除网络设备，包括关闭接口、移除队列、清理相关数据和协议栈的通知。文章还涉及设备状态管理、工作队列操作和数据包处理流程。
摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >
9362 /**
9363  *  unregister_netdev - remove device from the kernel
9364  *  @dev: device
9365  *
9366  *  This function shuts down a device interface and removes it
9367  *  from the kernel tables.
9368  *
9369  *  This is just a wrapper for unregister_netdevice that takes
9370  *  the rtnl semaphore.  In general you want to use this and not
9371  *  unregister_netdevice.
9372  */
9373 void unregister_netdev(struct net_device *dev)
9374 {
9375     rtnl_lock();
9376     unregister_netdevice(dev);
9377     rtnl_unlock();
9378 }
9379 EXPORT_SYMBOL(unregister_netdev);


2665 static inline void unregister_netdevice(struct net_device *dev)
2666 {
2667     unregister_netdevice_queue(dev, NULL);
2668 }




9315 /**
9316  *  unregister_netdevice_queue - remove device from the kernel
9317  *  @dev: device
9318  *  @head: list
9319  *
9320  *  This function shuts down a device interface and removes it
9321  *  from the kernel tables.
9322  *  If head not NULL, device is queued to be unregistered later.
9323  *
9324  *  Callers must hold the rtnl semaphore.  You may want
9325  *  unregister_netdev() instead of this.
9326  */
9327
9328 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
9329 {
9330     ASSERT_RTNL();
9331
9332     if (head) {
9333         list_move_tail(&dev->unreg_list, head);
9334     } else {
9335         rollback_registered(dev);
9336         /* Finish processing unregister after unlock */
9337         net_set_todo(dev);
9338     }
9339 }



8259 static void rollback_registered(struct net_device *dev)
8260 {
8261     LIST_HEAD(single);
8262
8263     list_add(&dev->unreg_list, &single);
8264     rollback_registered_many(&single);
8265     list_del(&single);
8266 }



8171 static void rollback_registered_many(struct list_head *head)
8172 {
8173     struct net_device *dev, *tmp;
8174     LIST_HEAD(close_head);
8175
8176     BUG_ON(dev_boot_phase);
8177     ASSERT_RTNL();
8178
8179     list_for_each_entry_safe(dev, tmp, head, unreg_list) {
8180         /* Some devices call without registering
8181          * for initialization unwind. Remove those
8182          * devices and proceed with the remaining.
8183          */
8184         if (dev->reg_state == NETREG_UNINITIALIZED) {
8185             pr_debug("unregister_netdevice: device %s/%p never was registered\n",
8186                  dev->name, dev);
8187
8188             WARN_ON(1);
8189             list_del(&dev->unreg_list);
8190             continue;
8191         }
8192         dev->dismantle = true;
8193         BUG_ON(dev->reg_state != NETREG_REGISTERED);
8194     }
8195
8196     /* If device is running, close it first. */
8197     list_for_each_entry(dev, head, unreg_list)
8198         list_add_tail(&dev->close_list, &close_head);
8199     dev_close_many(&close_head, true);
8200
8201     list_for_each_entry(dev, head, unreg_list) {
8202         /* And unlink it from device chain. */
8203         unlist_netdevice(dev);
8204
8205         dev->reg_state = NETREG_UNREGISTERING;
8206     }
8207     flush_all_backlogs();
8208
8209     synchronize_net();
8210
8211     list_for_each_entry(dev, head, unreg_list) {
8212         struct sk_buff *skb = NULL;
8213
8214         /* Shutdown queueing discipline. */
8215         dev_shutdown(dev);
8216
8217         dev_xdp_uninstall(dev);
8218
8219         /* Notify protocols, that we are about to destroy
8220          * this device. They should clean all the things.
8221          */
8222         call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
8223
8224         if (!dev->rtnl_link_ops ||
8225             dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
8226             skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
8227                              GFP_KERNEL, NULL, 0);

8228
8229         /*
8230          *  Flush the unicast and multicast chains
8231          */
8232         dev_uc_flush(dev);   清空指定网络设备dev的单播（Unicast）硬件地址列表
8233         dev_mc_flush(dev);   清空指定网络设备dev的多播（Multicast）硬件地址列表
8234
8235         if (dev->netdev_ops->ndo_uninit)
8236             dev->netdev_ops->ndo_uninit(dev);
8237
8238         if (skb)
8239             rtmsg_ifinfo_send(skb, dev, GFP_KERNEL);
8240
8241         /* Notifier chain MUST detach us all upper devices. */
8242         WARN_ON(netdev_has_any_upper_dev(dev));
8243         WARN_ON(netdev_has_any_lower_dev(dev));
8244
8245         /* Remove entries from kobject tree */
8246         netdev_unregister_kobject(dev);
8247 #ifdef CONFIG_XPS
8248         /* Remove XPS queueing entries */
8249         netif_reset_xps_queues_gt(dev, 0);
8250 #endif
8251     }
8252
8253     synchronize_net();
8254
8255     list_for_each_entry(dev, head, unreg_list)
8256         dev_put(dev);
8257 }



1501 void dev_close_many(struct list_head *head, bool unlink)
1502 {
1503     struct net_device *dev, *tmp;
1504
1505     /* Remove the devices that don't need to be closed */
1506     list_for_each_entry_safe(dev, tmp, head, close_list)
1507         if (!(dev->flags & IFF_UP))
1508             list_del_init(&dev->close_list);
1509
1510     __dev_close_many(head);
1511
1512     list_for_each_entry_safe(dev, tmp, head, close_list) {
1513         rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
1514         call_netdevice_notifiers(NETDEV_DOWN, dev);
1515         if (unlink)
1516             list_del_init(&dev->close_list);
1517     }
1518 }




1448 static void __dev_close_many(struct list_head *head)
1449 {
1450     struct net_device *dev;
1451
1452     ASSERT_RTNL();
1453     might_sleep();
1454
1455     list_for_each_entry(dev, head, close_list) {
1456         /* Temporarily disable netpoll until the interface is down */
1457         netpoll_poll_disable(dev);
1458
1459         call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1460
1461         clear_bit(__LINK_STATE_START, &dev->state);
1462
1463         /* Synchronize to scheduled poll. We cannot touch poll list, it
1464          * can be even on different cpu. So just clear netif_running().
1465          *
1466          * dev->stop() will invoke napi_disable() on all of it's
1467          * napi_struct instances on this device.
1468          */
1469         smp_mb__after_atomic(); /* Commit netif_running(). */
1470     }
1471
1472     dev_deactivate_many(head);
1473
1474     list_for_each_entry(dev, head, close_list) {
1475         const struct net_device_ops *ops = dev->netdev_ops;
1476
1477         /*
1478          *  Call the device specific close. This cannot fail.
1479          *  Only if device is UP
1480          *
1481          *  We allow it to be called even after a DETACH hot-plug
1482          *  event.
1483          */
1484         if (ops->ndo_stop)
1485             ops->ndo_stop(dev);
1486
1487         dev->flags &= ~IFF_UP;
1488         netpoll_poll_enable(dev);  disable相反操作，释放一个信号量
1489     }
1490 }




202 // 责暂时禁用网络设备的轮询（polling）功能。
203 // 网络轮询通常用于在没有中断的情况下，定期检查网络设备是否有待处理的数据包。
204 void netpoll_poll_disable(struct net_device *dev)
205 {
206     struct netpoll_info *ni;
207     int idx;
208     might_sleep();
209     idx = srcu_read_lock(&netpoll_srcu);
210     ni = srcu_dereference(dev->npinfo, &netpoll_srcu);
211     if (ni)
212         down(&ni->dev_lock);  主要操作就是在这里占用了一个信号量
213     srcu_read_unlock(&netpoll_srcu, idx);
214 }




1218 /**
1219  *  dev_deactivate_many - deactivate transmissions on several devices
1220  *  @head: list of devices to deactivate
1221  *
1222  *  This function returns only when all outstanding transmissions
1223  *  have completed, unless all devices are in dismantle phase.
1224  */
1225 void dev_deactivate_many(struct list_head *head) 好多qdisc相关的，等把第三部分看完再详细看吧
1226 {
1227     struct net_device *dev;
1228
1229     list_for_each_entry(dev, head, close_list) {
1230         netdev_for_each_tx_queue(dev, dev_deactivate_queue,
1231                      &noop_qdisc);
1232         if (dev_ingress_queue(dev))
1233             dev_deactivate_queue(dev, dev_ingress_queue(dev),
1234                          &noop_qdisc);
1235
1236         dev_watchdog_down(dev);
1237     }
1238
1239     /* Wait for outstanding qdisc-less dev_queue_xmit calls or
1240      * outstanding qdisc enqueuing calls.
1241      * This is avoided if all devices are in dismantle phase :
1242      * Caller will call synchronize_net() for us
1243      */
1244     synchronize_net();
1245
1246     list_for_each_entry(dev, head, close_list) {
1247         netdev_for_each_tx_queue(dev, dev_reset_queue, NULL);
1248
1249         if (dev_ingress_queue(dev))
1250             dev_reset_queue(dev, dev_ingress_queue(dev), NULL);
1251     }
1252
1253     /* Wait for outstanding qdisc_run calls. */
1254     list_for_each_entry(dev, head, close_list) {
1255         while (some_qdisc_is_busy(dev))
1256             yield();
1257         /* The new qdisc is assigned at this point so we can safely
1258          * unwind stale skb lists and qdisc statistics
1259          */
1260         netdev_for_each_tx_queue(dev, dev_qdisc_reset, NULL);
1261         if (dev_ingress_queue(dev))
1262             dev_qdisc_reset(dev, dev_ingress_queue(dev), NULL);
1263     }
1264 }




 83 // 用于获取网络设备（struct net_device结构体实例）的入口队列（ingress queue）。
 84 static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev)
 85 {
 86     return rtnl_dereference(dev->ingress_queue);
 87 }



在保证线程安全的前提下，将指定的网络设备队列所关联的Qdisc停用，并替换为默认的Qdisc，
以达到管理网络流量的目的。停用Qdisc意味着该队列将不再处理新的数据包，
而是按照新的默认Qdisc策略进行处理。
1142 static void dev_deactivate_queue(struct net_device *dev,
1143                  struct netdev_queue *dev_queue,
1144                  void *_qdisc_default)
1145 {
1146     struct Qdisc *qdisc = rtnl_dereference(dev_queue->qdisc);
1147     struct Qdisc *qdisc_default = _qdisc_default;
1148
1149     if (qdisc) {
1150         if (!(qdisc->flags & TCQ_F_BUILTIN))
1151             set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
1152
1153         rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
1154     }
1155 }





 497 static void dev_watchdog_down(struct net_device *dev)
 498 {
 499     netif_tx_lock_bh(dev);
 500     if (del_timer(&dev->watchdog_timer)) // dev_init_scheduler中timer_setup(&dev->watchdog_timer, dev_watchdog, 0);
 501         dev_put(dev);
 502     netif_tx_unlock_bh(dev);
 503 }





 437 static void dev_watchdog(struct timer_list *t)
 438 {
 439     struct net_device *dev = from_timer(dev, t, watchdog_timer);
 440
 441     netif_tx_lock(dev);
 442     if (!qdisc_tx_is_noop(dev)) {
 443         if (netif_device_present(dev) &&
 444             netif_running(dev) &&
 445             netif_carrier_ok(dev)) {
 446             int some_queue_timedout = 0;
 447             unsigned int i;
 448             unsigned long trans_start;
 449
 450             for (i = 0; i < dev->num_tx_queues; i++) { 循环遍历设备的每个发送队列，检查是否出现超时情况：
 451                 struct netdev_queue *txq;
 452
 453                 txq = netdev_get_tx_queue(dev, i);
 454                 trans_start = txq->trans_start;
 455                 if (netif_xmit_stopped(txq) &&
 456                     time_after(jiffies, (trans_start +
 457                              dev->watchdog_timeo))) {
 458                     some_queue_timedout = 1;
 459                     txq->trans_timeout++;
 460                     break;
 461                 }
 462             }
 463
 464             if (some_queue_timedout) {
 465                 WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n",
 466                        dev->name, netdev_drivername(dev), i);
 467                 dev->netdev_ops->ndo_tx_timeout(dev); 如果至少有一个发送队列超时，则打印警告信息，并调用设备驱动程序提供的ndo_tx_timeout回调函数，通知驱动程序处理超时事件。
 468             }
 469             if (!mod_timer(&dev->watchdog_timer, 无论是否超时，都重置设备的看门狗定时器，使其在当前时间加上dev->watchdog_timeo时间后再次触发
 470                        round_jiffies(jiffies +
 471                              dev->watchdog_timeo)))
 472                 dev_hold(dev);
 473         }
 474     }
 475     netif_tx_unlock(dev);
 476
 477     dev_put(dev);
 478 }




flush_works在net_dev_init里面初始化，INIT_WORK(flush, flush_backlog);
5281 static void flush_all_backlogs(void)
5282 {
5283     unsigned int cpu;
5284
5285     get_online_cpus();
5286
5287     for_each_online_cpu(cpu)
5288         queue_work_on(cpu, system_highpri_wq,
5289                   per_cpu_ptr(&flush_works, cpu)); 把任务放到工作队列异步执行
5290
5291     for_each_online_cpu(cpu)
5292         flush_work(per_cpu_ptr(&flush_works, cpu)); 等待上面异步执行的工作完成
5293
5294     put_online_cpus();
5295 }




1490  * queue_work_on - queue work on specific cpu
1491  * @cpu: CPU number to execute work on
1492  * @wq: workqueue to use
1493  * @work: work to queue
1494  *
1495  * We queue the work to a specific CPU, the caller must ensure it
1496  * can't go away.
1497  *
1498  * Return: %false if @work was already on a queue, %true otherwise.
1499  */
1500 bool queue_work_on(int cpu, struct workqueue_struct *wq,



2959 /**
2960  * flush_work - wait for a work to finish executing the last queueing instance
2961  * @work: the work to flush
2962  *
2963  * Wait until @work has finished execution.  @work is guaranteed to be idle
2964  * on return if it hasn't been requeued since flush started.
2965  *
2966  * Return:
2967  * %true if flush_work() waited for the work to finish execution,
2968  * %false if it was already idle.
2969  */
2970 bool flush_work(struct work_struct *work)




遍历两个链表一个输入包链表，一个处理包链表
5250 /* Network device is going away, flush any packets still pending */
5251 static void flush_backlog(struct work_struct *work)
5252 {
5253     struct sk_buff *skb, *tmp;
5254     struct softnet_data *sd;
5255
5256     local_bh_disable();
5257     sd = this_cpu_ptr(&softnet_data);
5258
5259     local_irq_disable();
5260     rps_lock(sd);
5261     skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { 存放尚未被内核网络子系统处理的输入数据包
5262         if (skb->dev->reg_state == NETREG_UNREGISTERING) {
5263             __skb_unlink(skb, &sd->input_pkt_queue);  把这个skb从input_pkt_queue链表中取出来
5264             dev_kfree_skb_irq(skb); 中断环境下不能调用kfree_skb，用这个函数平替
5265             input_queue_head_incr(sd);
5266         }
5267     }
5268     rps_unlock(sd);
5269     local_irq_enable();
5270
5271     skb_queue_walk_safe(&sd->process_queue, skb, tmp) { 用于存放那些已经被内核网络子系统初步处理过的数据包，这些数据包等待被更高层次的处理程序（如协议栈的上层处理）进一步处理。在软中断处理过程中，从input_pkt_queue取出的数据包经过初步处理后，可能被放入process_queue，然后由任务队列或其他机制来完成后续的处理工作。
5272         if (skb->dev->reg_state == NETREG_UNREGISTERING) {
5273             __skb_unlink(skb, &sd->process_queue);
5274             kfree_skb(skb);
5275             input_queue_head_incr(sd);
5276         }
5277     }
5278     local_bh_enable();
5279 }






1911 static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
1912 {
1913     struct sk_buff *next, *prev;
1914
1915     WRITE_ONCE(list->qlen, list->qlen - 1);
1916     next       = skb->next;
1917     prev       = skb->prev;
1918     skb->next  = skb->prev = NULL;
1919     WRITE_ONCE(next->prev, prev);
1920     WRITE_ONCE(prev->next, next);
1921 }





/**
 * dev_kfree_skb_irq(skb) when caller drops a packet from irq context,
 *  replacing kfree_skb(skb)
 **/
3597 static inline void dev_kfree_skb_irq(struct sk_buff *skb)
3598 {
3599     __dev_kfree_skb_irq(skb, SKB_REASON_DROPPED);
3600 }





1344 void dev_shutdown(struct net_device *dev)
1345 {
1346     netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
1347     if (dev_ingress_queue(dev))
1348         shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
1349     qdisc_put(dev->qdisc);
1350     dev->qdisc = &noop_qdisc;
1351
1352     WARN_ON(timer_pending(&dev->watchdog_timer));
1353 }





1329 static void shutdown_scheduler_queue(struct net_device *dev,
1330                      struct netdev_queue *dev_queue,
1331                      void *_qdisc_default)
1332 {
1333     struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
1334     struct Qdisc *qdisc_default = _qdisc_default;
1335
1336     if (qdisc) {
1337         rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
1338         dev_queue->qdisc_sleeping = qdisc_default;
1339
1340         qdisc_put(qdisc);
1341     }
1342 }




8165 static void net_set_todo(struct net_device *dev)
8166 {
8167     list_add_tail(&dev->todo_list, &net_todo_list);
8168     dev_net(dev)->dev_unreg_count++;
8169 }