网络设备对象的注册与注销

linux网络设备接口层 专栏收录该内容
25 篇文章 1 订阅


每个网络设备对象由各自的驱动程序维护,但是驱动程序只有将网络设备对象(struct net_device)注册给系统,系统才能感知到它的存在。这篇笔记分析了网络设备对象的注册与去注册流程。

注册: register_netdev()

分配好net_device对象并进行初始化后,驱动程序就可以通过register_netdev()向系统注册该网络设备对象了。

/**
 *	register_netdev	- register a network device
 *	@dev: device to register
 *
 *	Take a completed network device structure and add it to the kernel
 *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
 *	chain. 0 is returned on success. A negative errno code is returned
 *	on a failure to set up the device, or if the name is a duplicate.
 *
 *	This is a wrapper around register_netdevice that takes the rtnl semaphore
 *	and expands the device name if you passed a format string to
 *	alloc_netdev.
 */
int register_netdev(struct net_device *dev)
{
	int err;

	rtnl_lock();

	// 如果驱动程序指定的网络设备名称中有%字符,则内核认为传入的是一个格式化字符串,
	// 会尝试为其分配一个唯一的ID,以此组成最终的网络设备名称。比如传入
	// "eth%d", 最终的结果是"eth0"、"eth1"等等
	if (strchr(dev->name, '%')) {
		err = dev_alloc_name(dev, dev->name);
		if (err < 0)
			goto out;
	}
	// 真正的注册流程
	err = register_netdevice(dev);
out:
	rtnl_unlock();
	return err;
}

Linux内核使用互斥锁rtnl_mutex将系统中所有模块的注册与去注册操作序列化,代码如下:

static DEFINE_MUTEX(rtnl_mutex);

void rtnl_lock(void)
{
	mutex_lock(&rtnl_mutex);
}

void __rtnl_unlock(void)
{
	mutex_unlock(&rtnl_mutex);
}

void rtnl_unlock(void)
{
	mutex_unlock(&rtnl_mutex);
	// 释放锁后执行了一些其它工作,见下面的分析
	netdev_run_todo();
}

register_netdevice()

int register_netdevice(struct net_device *dev)
{
	struct hlist_head *head;
	struct hlist_node *p;
	int ret;
	struct net *net = dev_net(dev);
	
    // 设备接口层必须已经初始化完成,即net_dev_init()已经执行完毕
	BUG_ON(dev_boot_phase);
	ASSERT_RTNL(); // 确保在持有该互斥锁的时候

	might_sleep();

	// 网络设备的注册状态必须是UNINITIALIZED,刚分配的net_device就是这个状态
	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
	BUG_ON(!net);

    // 初始化地址列表、队列锁
	spin_lock_init(&dev->addr_list_lock);
	netdev_set_addr_lockdep_class(dev);
	netdev_init_queue_locks(dev);

	dev->iflink = -1;

	// 回调驱动程序提供的ndo_init()函数
	if (dev->netdev_ops->ndo_init) {
		ret = dev->netdev_ops->ndo_init(dev);
		if (ret) {
			if (ret > 0)
				ret = -EIO;
			goto out;
		}
	}
    // 校验网络设备名称
	if (!dev_valid_name(dev->name)) {
		ret = -EINVAL;
		goto err_uninit;
	}
    // 分配网络设备索引
	dev->ifindex = dev_new_index(net);
	if (dev->iflink == -1)
		dev->iflink = dev->ifindex;

	// 检查名字是否重复
	head = dev_name_hash(net, dev->name);
	hlist_for_each(p, head) {
		struct net_device *d = hlist_entry(p, struct net_device, name_hlist);
		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
			ret = -EEXIST;
			goto err_uninit;
		}
	}

	/* Fix illegal checksum combinations */
	if ((dev->features & NETIF_F_HW_CSUM) &&
	    (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
		printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
		       dev->name);
		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
	}

	if ((dev->features & NETIF_F_NO_CSUM) &&
	    (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
		printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
		       dev->name);
		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
	}

	dev->features = netdev_fix_features(dev->features, dev->name);

	/* Enable software GSO if SG is supported. */
	if (dev->features & NETIF_F_SG)
		dev->features |= NETIF_F_GSO;

    // 将网络设备注册到设备模型中,用户态的/sys/class/net目录下会有该设备目录
	netdev_initialize_kobject(dev);
	ret = netdev_register_kobject(dev);
	if (ret)
		goto err_uninit;
	// 设置网络设备的注册状态为已注册状态
	dev->reg_state = NETREG_REGISTERED;

	/*
	 *	Default initial state at registry is that the
	 *	device is present.
	 */
	// 设置初始的设备状态为“PRESENT”
	set_bit(__LINK_STATE_PRESENT, &dev->state);
    // 初始化发送排队规则,见"流量控制"
	dev_init_scheduler(dev);
	// 初始化基本完成,网络设备的引用计数+1(此时网络设备的引用计数为1)
	dev_hold(dev);
	// 将网络设备同时挂接到系统维护三个表中:名字表、索引表、设备表
	list_netdevice(dev);

	// 通知其他模块NETDEV_REGISTER事件
	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
	ret = notifier_to_errno(ret);
	// 如果通知失败,这里回滚前面所有的注册步骤,并设置注册状态为UNREGISTERED
	if (ret) {
		rollback_registered(dev);
		dev->reg_state = NETREG_UNREGISTERED;
	}

out:
	return ret;
err_uninit:
	if (dev->netdev_ops->ndo_uninit)
		dev->netdev_ops->ndo_uninit(dev);
	goto out;
}

注销: unregister_netdev()

当驱动程序需要将网络设备对象从系统中移除时,可以调用unregister_netdev()来完成:

/**
 *	unregister_netdev - remove device from the kernel
 *	@dev: device
 *
 *	This function shuts down a device interface and removes it
 *	from the kernel tables.
 *
 *	This is just a wrapper for unregister_netdevice that takes
 *	the rtnl semaphore.  In general you want to use this and not
 *	unregister_netdevice.
 */
void unregister_netdev(struct net_device *dev)
{
	// 同样需要先持有锁
	rtnl_lock();
	unregister_netdevice(dev);
	rtnl_unlock();
}

void unregister_netdevice(struct net_device *dev)
{
	ASSERT_RTNL();
	// 回滚注册时执行的操作
	rollback_registered(dev);
	// 将设备加入系统的todo_list中,在rtnl_unlock()时处理
	net_set_todo(dev);
}

// 全局的net_todo_list专门用来延迟执行去注册后的网络设备对象删除操作
static DEFINE_SPINLOCK(net_todo_list_lock);
static LIST_HEAD(net_todo_list);

static void net_set_todo(struct net_device *dev)
{
	spin_lock(&net_todo_list_lock);
	list_add_tail(&dev->todo_list, &net_todo_list);
	spin_unlock(&net_todo_list_lock);
}

回滚注册:rollback_registered()

核心的网络设备对象注销工作都均由rollback_registered()完成。

static void rollback_registered(struct net_device *dev)
{
    // 1)设备接口层已经初始化完毕;2)已经持有RTNETLINK信号量
	BUG_ON(dev_boot_phase);
	ASSERT_RTNL();

	// 未初始化的设备不能执行去注册,仅仅执行过alloc_netdev()的网络设备处于该状态
	if (dev->reg_state == NETREG_UNINITIALIZED) {
		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
				  "was registered\n", dev->name, dev);
		WARN_ON(1);
		return;
	}
	BUG_ON(dev->reg_state != NETREG_REGISTERED);

	// 设备可能还处于UP状态,关闭设备
	dev_close(dev);

	// 将设备从系统全局的网络设备名字表、网络设备索引表、网络设备对象表中移除
	unlist_netdevice(dev);
    // 设置设备注册状态为UNREGISTERING,即正在去注册
	dev->reg_state = NETREG_UNREGISTERING;
    // 同步其它CPU上面该设备的状态
	synchronize_net();
	// 关闭发送队列
	dev_shutdown(dev);

	// 向内核其它模块发送UNREGISTER通知
	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);

	// Flush the unicast and multicast chains
	dev_addr_discard(dev);

    // 回调驱动程序ndo_uninit()接口
	if (dev->netdev_ops->ndo_uninit)
		dev->netdev_ops->ndo_uninit(dev);

	/* Notifier chain MUST detach us from master device. */
	WARN_ON(dev->master);

	// 从统一设备模型中移除该对象
	netdev_unregister_kobject(dev);
	synchronize_net();

    // 递减设备引用计数,对应注册过程中的dev_hold()
	dev_put(dev);
}

清理net_todo_list: netdev_run_todo()

unregister_netdevice()末尾会调用net_set_todo()将待去注册设备对象添加到了全局的net_todo_list中,对该链表的处理是在rtnl_unlock()中调用netdev_run_todo()完成的。

/* The sequence is:
 *
 *	rtnl_lock();
 *	...
 *	register_netdevice(x1);
 *	register_netdevice(x2);
 *	...
 *	unregister_netdevice(y1);
 *	unregister_netdevice(y2);
 *      ...
 *	rtnl_unlock();
 *	free_netdev(y1);
 *	free_netdev(y2);
 *
 * We are invoked by rtnl_unlock().
 * This allows us to deal with problems:
 * 1) We can delete sysfs objects which invoke hotplug
 *    without deadlocking with linkwatch via keventd.
 * 2) Since we run with the RTNL semaphore not held, we can sleep
 *    safely in order to wait for the netdev refcnt to drop to zero.
 *
 * We must not return until all unregister events added during
 * the interval the lock was held have been completed.
 */
void netdev_run_todo(void)
{
	struct list_head list;

	// 为了尽可能的缩短持有互斥锁的时间,将net_todo_list链表做个快照,
	// 然后释放互斥锁,后面的耗时流程操作快照list即可
	list_replace_init(&net_todo_list, &list);

    // 此时可以释放掉全局互斥锁
	__rtnl_unlock();

    // 遍历快照list,完成对每一个网络设备对象的最后清理
	while (!list_empty(&list)) {
		struct net_device *dev = list_entry(list.next, struct net_device, todo_list);
		list_del(&dev->todo_list);
        // 执行到这里设备的注册状态肯定已经是UNREGISTERING状态
		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
			printk(KERN_ERR "network todo '%s' but state %d\n",
			       dev->name, dev->reg_state);
			dump_stack();
			continue;
		}
        // 设置注册状态为去注册完成状态
		dev->reg_state = NETREG_UNREGISTERED;
        // 清空每个cpu的backlog队列
		on_each_cpu(flush_backlog, dev, 1);
        // 等待对所有网络设备对象的引用计数都释放
		netdev_wait_allrefs(dev);

		/* paranoia */
		BUG_ON(atomic_read(&dev->refcnt));
		WARN_ON(dev->ip_ptr);
		WARN_ON(dev->ip6_ptr);
		WARN_ON(dev->dn_ptr);

        // 调用驱动程序提供的destructor()回调
		if (dev->destructor)
			dev->destructor(dev);
		// 释放设备模型中的相关结构
		kobject_put(&dev->dev.kobj);
	}
}

等待引用计数清零: netdev_wait_allrefs()

去注册网络设备对象时,设备接口层框架必须负责通知到所有引用了该网络设备对象的模块,为了实现这一目标,需要设备接口层代码和引用该网络设备对象的模块互相配合:

  1. 外部模块监听设备接口层的网络设备通知事件,在设备注册时用dev_hold()持有引用,在设备去注册时用dev_put()释放引用;
  2. 设备接口层代码在去注册过程中使用netdev_wait_allrefs()等待所有外部模块释放引用计数;
/*
 * netdev_wait_allrefs - wait until all references are gone.
 *
 * This is called when unregistering network devices.
 *
 * Any protocol or device that holds a reference should register
 * for netdevice notification, and cleanup and put back the
 * reference if they receive an UNREGISTER event.
 * We can get stuck here if buggy protocols don't correctly
 * call dev_put.
 */
static void netdev_wait_allrefs(struct net_device *dev)
{
	unsigned long rebroadcast_time, warning_time;

	rebroadcast_time = warning_time = jiffies;
	// 循环等待,直到其它模块对网络设备对象的引用计数变为0
	while (atomic_read(&dev->refcnt) != 0) {
	    // 每隔1s向外发送一次NETDEV_UNREGISTER事件通知,
	    // 促使其它模块释放对网络设备的引用
		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
			rtnl_lock();
			/* Rebroadcast unregister notification */
			call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
            // 对链路状态的处理我们在单独的笔记中介绍
			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
				     &dev->state)) {
				/* We must not have linkwatch events
				 * pending on unregister. If this
				 * happens, we simply run the queue
				 * unscheduled, resulting in a noop
				 * for this device.
				 */
				linkwatch_run_queue();
			}
			__rtnl_unlock();
			rebroadcast_time = jiffies;
		}
        // 休眠250ms后再次检查
    	msleep(250);
    	// 等待每超过10s,打印一条告警信息
		if (time_after(jiffies, warning_time + 10 * HZ)) {
			printk(KERN_EMERG "unregister_netdevice: "
			       "waiting for %s to become free. Usage "
			       "count = %d\n",
			       dev->name, atomic_read(&dev->refcnt));
			warning_time = jiffies;
		}
	}
}

由于netdev_wait_allrefs()会休眠等待,所以这里需要注意的是,调用去注册过程可能会阻塞一段时间,禁止在原子上下文调用。

网络设备注册状态的管理

从前面的代码分析中可以看出,在整个网络设备对象的分配、注册、去注册、释放过程,dev->reg_state的取值非常关键,它控制了网络设备对象的整个生命周期流程,定义的注册状态有如下几种:

/* register/unregister state machine */
enum { NETREG_UNINITIALIZED=0,
	   NETREG_REGISTERED,	/* completed register_netdevice */
	   NETREG_UNREGISTERING,/* called unregister_netdevice */
	   NETREG_UNREGISTERED,	/* completed unregister todo */
	   NETREG_RELEASED,		/* called free_netdev */
} reg_state;

在整个流程中,注册状态的变迁关系见下图:

分配, setup()
注册,ndo_init()
去注册,ndo_uninit()
destructor()
释放
UNINITIALIZED
REGISTERED
UNREGISTERING
UNREGISTERED
RELEASED
  • 0
    点赞
  • 0
    评论
  • 4
    收藏
  • 打赏
    打赏
  • 扫一扫,分享海报

参与评论 您还未登录,请先 登录 后发表或查看评论
©️2022 CSDN 皮肤主题:创作都市 设计师:CSDN官方博客 返回首页

打赏作者

fanxiaoyu321

你的鼓励将是我创作的最大动力

¥2 ¥4 ¥6 ¥10 ¥20
输入1-500的整数
余额支付 (余额:-- )
扫码支付
扫码支付:¥2
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、C币套餐、付费专栏及课程。

余额充值