Linux内核bridge结构体相关

STP相关先不管,以后有兴趣了在专门研究。

一、net_bridge、net_bridge_prot、net_bridge_fdb_entry数据结构

1. 网桥设备net_bridge数据结构

struct net_bridge {	
	/*自旋锁*/
	spinlock_t			lock;
	
	/*用于管理下面hash表的锁*/
	spinlock_t			hash_lock;
	
	/*网桥端口列表*/
	struct list_head		port_list;
	
	/*网桥设备*/
	struct net_device		*dev;
	
	struct pcpu_sw_netstats		__percpu *stats;
	/* These fields are accessed on each packet */
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
	u8				vlan_enabled;
	u8				vlan_stats_enabled;
	__be16				vlan_proto;
	u16				default_pvid;
	struct net_bridge_vlan_group	__rcu *vlgrp;
#endif
	
	/*hash链表,每一个元素指向一个net_bridge_fdb_entry表
	CAM表,转发表*/
	struct hlist_head		hash[BR_HASH_SIZE];
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
	union {
		struct rtable		fake_rtable;
		struct rt6_info		fake_rt6_info;
	};
	bool				nf_call_iptables;
	bool				nf_call_ip6tables;
	bool				nf_call_arptables;
#endif
	u16				group_fwd_mask;
	u16				group_fwd_mask_required;

	/* STP */
	bridge_id			designated_root;
	bridge_id			bridge_id;
	u32				root_path_cost;
	unsigned char			topology_change;
	unsigned char			topology_change_detected;
	u16				root_port;
	unsigned long			max_age;
	unsigned long			hello_time;
	unsigned long			forward_delay;
	unsigned long			ageing_time;
	unsigned long			bridge_max_age;
	unsigned long			bridge_hello_time;
	unsigned long			bridge_forward_delay;
	unsigned long			bridge_ageing_time;

	u8				group_addr[ETH_ALEN];
	bool				group_addr_set;

	enum {
		BR_NO_STP, 		/* no spanning tree */
		BR_KERNEL_STP,		/* old STP in kernel */
		BR_USER_STP,		/* new RSTP in userspace */
	} stp_enabled;

#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
	unsigned char			multicast_router;

	u8				multicast_disabled:1;
	u8				multicast_querier:1;
	u8				multicast_query_use_ifaddr:1;
	u8				has_ipv6_addr:1;
	u8				multicast_stats_enabled:1;

	u32				hash_elasticity;
	u32				hash_max;

	u32				multicast_last_member_count;
	u32				multicast_startup_query_count;

	u8				multicast_igmp_version;

	unsigned long			multicast_last_member_interval;
	unsigned long			multicast_membership_interval;
	unsigned long			multicast_querier_interval;
	unsigned long			multicast_query_interval;
	unsigned long			multicast_query_response_interval;
	unsigned long			multicast_startup_query_interval;

	spinlock_t			multicast_lock;
	/*mdb指向网桥的组播数据库转发表*/
	struct net_bridge_mdb_htable __rcu *mdb;
	struct hlist_head		router_list;

	struct timer_list		multicast_router_timer;
	struct bridge_mcast_other_query	ip4_other_query;
	struct bridge_mcast_own_query	ip4_own_query;
	struct bridge_mcast_querier	ip4_querier;
	struct bridge_mcast_stats	__percpu *mcast_stats;
#if IS_ENABLED(CONFIG_IPV6)
	struct bridge_mcast_other_query	ip6_other_query;
	struct bridge_mcast_own_query	ip6_own_query;
	struct bridge_mcast_querier	ip6_querier;
	u8				multicast_mld_version;
#endif /* IS_ENABLED(CONFIG_IPV6) */
#endif

	struct timer_list		hello_timer;
	struct timer_list		tcn_timer;
	struct timer_list		topology_change_timer;
	struct delayed_work		gc_work;
	struct kobject			*ifobj;
	u32				auto_cnt;

#ifdef CONFIG_NET_SWITCHDEV
	int offload_fwd_mark;
#endif
	bool				neigh_suppress_enabled;
};

 2. 网桥端口net_bridge_port数据结构

//net/bridge/br_private.h
struct net_bridge_port
{
	/*指向网桥端口所属的网桥设备*/
	struct net_bridge		*br;
	
	/*添加到网桥的设备*/
	struct net_device		*dev;
	
	/*网桥端口列表,连接到net_bridge->head_list*/
	struct list_head		list;

	/* STP */
	u8				priority;/*端口优先级*/
	u8				state;/*端口状态*/
	u16				port_no;/*端口号*/
	unsigned char			topology_change_ack;
	unsigned char			config_pending;
	port_id				port_id;/*端口ID,由端口优先级和端口号组成*/
	port_id				designated_port;
	bridge_id			designated_root;
	bridge_id			designated_bridge;
	u32				path_cost;
	u32				designated_cost;
	unsigned long			designated_age;

	struct timer_list		forward_delay_timer;
	struct timer_list		hold_timer;
	struct timer_list		message_age_timer;
	struct kobject			kobj;
	struct rcu_head			rcu;
        /*flags参数很重要*/
	unsigned long 			flags;

#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
	struct bridge_mcast_own_query	ip4_own_query;
#if IS_ENABLED(CONFIG_IPV6)
	struct bridge_mcast_own_query	ip6_own_query;
#endif /* IS_ENABLED(CONFIG_IPV6) */
	unsigned char			multicast_router;
	struct timer_list		multicast_router_timer;
	struct hlist_head		mglist;
	struct hlist_node		rlist;
#endif

#ifdef CONFIG_SYSFS
	char				sysfs_name[IFNAMSIZ];
#endif

#ifdef CONFIG_NET_POLL_CONTROLLER
	struct netpoll			*np;
#endif
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
	struct net_port_vlans __rcu	*vlan_info;
#endif
};

感觉就前面的三个br,dev,list有用。还有一个flags(老版本没有这个参数)。

(1). 关于flags参数:

参考:https://blog.csdn.net/sinat_20184565/article/details/80852155

网桥添加网络设备时,新接口的flags赋值为BR_LEARNING | BR_FLOOD,即新接口为自动状态接口

自动状态接口(AUTO_PORT):

内核中对自动状态接口定义如下:即设置了学习|单播洪泛的接口。

#define BR_AUTO_MASK        (BR_FLOOD | BR_LEARNING)

#define br_auto_port(p) ((p)->flags & BR_AUTO_MASK)

#define BR_HAIRPIN_MODE		BIT(0)
#define BR_BPDU_GUARD		BIT(1)
#define BR_ROOT_BLOCK		BIT(2)
#define BR_MULTICAST_FAST_LEAVE	BIT(3)
#define BR_ADMIN_COST		BIT(4)
#define BR_LEARNING		BIT(5)
#define BR_FLOOD		BIT(6)
#define BR_AUTO_MASK		(BR_FLOOD | BR_LEARNING)
#define BR_PROMISC		BIT(7)
#define BR_PROXYARP		BIT(8)
#define BR_LEARNING_SYNC	BIT(9)
#define BR_PROXYARP_WIFI	BIT(10)

/* called with RTNL but without bridge lock */
static struct net_bridge_port *new_nbp(struct net_bridge *br,
				       struct net_device *dev)
{
	int index;
	struct net_bridge_port *p;

	index = find_portno(br);
	if (index < 0)
		return ERR_PTR(index);

	p = kzalloc(sizeof(*p), GFP_KERNEL);
	if (p == NULL)
		return ERR_PTR(-ENOMEM);

	p->br = br;
	dev_hold(dev);
	p->dev = dev;
	p->path_cost = port_cost(dev);
	p->priority = 0x8000 >> BR_PORT_BITS;
	p->port_no = index;
	p->flags = BR_LEARNING | BR_FLOOD;
	br_init_port(p);
	br_set_state(p, BR_STATE_DISABLED);
	br_stp_port_timer_init(p);
	br_multicast_add_port(p);

	return p;
}

BR_LEARNING标志,在函数br_handle_frame_finish中判断,用来决定是否使用数据包的源MAC地址更新网桥的FDB转发表。

int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
    struct net_bridge_port *p = br_port_get_rcu(skb->dev);
    struct net_bridge *br;
 
    br = p->br;
    if (p->flags & BR_LEARNING)
        br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, false);
}

BR_FLOOD标志用来控制是否在接口上泛洪单播数据包,如果没有设置此标志,不能在此接口泛洪单播包。

void br_flood(struct net_bridge *br, struct sk_buff *skb, ...)
{
    struct net_bridge_port *p;
 
    list_for_each_entry_rcu(p, &br->port_list, list) {
        switch (pkt_type) {
        case BR_PKT_UNICAST:
            if (!(p->flags & BR_FLOOD))
                continue;
            break;
}

(2). 端口状态:

//linux/uapi/linux/if_bridge.h
#define BR_STATE_DISABLED 0
#define BR_STATE_LISTENING 1
#define BR_STATE_LEARNING 2
#define BR_STATE_FORWARDING 3
#define BR_STATE_BLOCKING 4

关闭:不收发任何报文
阻塞:只能接收BPDU,不能发送BPDU,不能收发数据帧,不进行地址学习,
监听:可以收发BPDU,不能收发数据帧,不进行地址学习
学习:可以收发BPDU,不收发数据帧,进行地址学习
转发:可以收发BPDU,可以收发数据帧,进行地址学习

记得在br_handle_frame_finish中用到了端口状态:

if (!p || p->state == BR_STATE_DISABLED)
    ...
if (p->state == BR_STATE_LEARNING)
    ...

3. net_bridge_fdb_entry数据结构

fdb:forward database

https://blog.csdn.net/qq_25077833/article/details/52834418

端口-MAC地址表

struct net_bridge_fdb_entry
{
	/*链接到net_bridge的hash[BR_HASH_SIZE]*/
	struct hlist_node		hlist;
	
	/*指向网桥端口*/
	struct net_bridge_port		*dst;

	struct rcu_head			rcu;
	unsigned long			updated;
	unsigned long			used;//引用计数
	
	/*mac地址*/
	mac_addr			addr;
	
	/*mac地址是否为本地地址,1为是*/
	unsigned char			is_local:1,
	
	/*mac地址为静态的,表示mac地址不会过期,本地地址都是静态的。*/
					is_static:1,
					added_by_user:1, //用户配置
					added_by_external_learn:1;//外部学习
	__u16				vlan_id;//MAC属于哪个VLAN?
};

这个is_local选项在br_handle_frame_finish()函数中查找fdb转发表后,会用到进行判断,如果目的端口的is_local=1,表示数据包是本地接收,需要送往三层进行进一步处理。

二、网桥相关函数

1.br_init()

网桥初始化函数

桥接程序既可以集成在内核中,也可以编译成单独模块。初始化函数为br_init(),清理函数为br_deinit()函数。定义在/net/bridge/br.c中。就是个模块,br_init()为初始化函数,br_deinit()为exit函数。

static int __init br_init(void)
{
	int err;

	BUILD_BUG_ON(sizeof(struct br_input_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
	
	/*注册STP协议*/
	err = stp_proto_register(&br_stp_proto);
	if (err < 0) {
		pr_err("bridge: can't register sap for STP\n");
		return err;
	}
	
	/*fdb初始化,分配一个fdb内存*/
	err = br_fdb_init();
	if (err)
		goto err_out;
	
	/*register_pernet_subsys - register a network namespace subsystem
	注册一个网络空间子系统*/
	err = register_pernet_subsys(&br_net_ops);
	if (err)
		goto err_out1;
	
	/*netfilter初始化*/
	err = br_nf_core_init();
	if (err)
		goto err_out2;
	
	/*注册netdevice通知链*/
	err = register_netdevice_notifier(&br_device_notifier);
	if (err)
		goto err_out3;
	
	/*注册netdevice_switch通知链*/
	err = register_netdev_switch_notifier(&br_netdev_switch_notifier);
	if (err)
		goto err_out4;
	
	/*netlink初始化*/
	err = br_netlink_init();
	if (err)
		goto err_out5;
	//安装网络设备的do_ioctl函数,也就是提供给用户空间ioctl接口。 
	brioctl_set(br_ioctl_deviceless_stub);

#if IS_ENABLED(CONFIG_ATM_LANE)
	br_fdb_test_addr_hook = br_fdb_test_addr;
#endif

	pr_info("bridge: automatic filtering via arp/ip/ip6tables has been "
		"deprecated. Update your scripts to load br_netfilter if you "
		"need this.\n");

	return 0;

err_out5:
	unregister_netdev_switch_notifier(&br_netdev_switch_notifier);
err_out4:
	unregister_netdevice_notifier(&br_device_notifier);
err_out3:
	br_nf_core_fini();
err_out2:
	unregister_pernet_subsys(&br_net_ops);
err_out1:
	br_fdb_fini();
err_out:
	stp_proto_unregister(&br_stp_proto);
	return err;
}

(1). register_netdevice_notifier(&br_device_notifier);

https://www.cnblogs.com/3me-linux/p/6566750.html

注册netdevice通知链,网桥设备是建立在其他设备之上的,那些设备的状态(UP/DOWN),地址改变等消息,会影响网桥设备的内部数据结构,如端口表,FDB等,因此需要关注netdev_chain,这些事件的处理由br_device_event()完成。

(2)brioctl_set(br_ioctl_deviceless_stub);

http://blog.sina.com.cn/s/blog_67cc0c8f0101oh33.html

用户空间程序使用网桥相关的命令来调用ioctl函数时,它经kernel依据命令所属的分类分派到compat_sock_ioctl_trans()函数,在compat_sock_ioctl_trans()函数中,根据cmd类型,调用相应函数,例如sock_ioctl()函数,dev_ifsioc()函数。sock_ioctl()函数,在sock_ioctl()函数里,当ioctl命令为SIOCGIFBR、SIOCSIFBR、SIOCBRADDBR、SIOCBRDELBR时,会调用br_ioctl_deviceless_stub()函数进行处理;在dev_ifsioc()函数中,调用设备的ndo_do_ioctl()函数,网桥对应的就是br_dev_ioctl()函数。

(3). compat_sock_ioctl_trans()函数

static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
			 unsigned int cmd, unsigned long arg)
{
	...
	switch (cmd) {
	case SIOCSIFBR:
	case SIOCGIFBR:
		return old_bridge_ioctl(argp);
	...
	case SIOCBRADDBR:
	case SIOCBRDELBR:
		return sock_ioctl(file, cmd, arg);

	
	case SIOCBRADDIF:
	case SIOCBRDELIF:
	...
		return dev_ifsioc(net, sock, cmd, argp);
	}

}

(4). sock_ioctl()函数

static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
	
		switch (cmd) {
		...
		case SIOCGIFBR:
		case SIOCSIFBR:
		case SIOCBRADDBR:
		case SIOCBRDELBR:
			err = -ENOPKG;
			if (!br_ioctl_hook)
				request_module("bridge");

			mutex_lock(&br_ioctl_mutex);
			if (br_ioctl_hook)
				err = br_ioctl_hook(net, cmd, argp);
			mutex_unlock(&br_ioctl_mutex);
			break;
		...
		}
	return err;
}

 (5). dev_ifsioc()函数

/*
 *	Perform the SIOCxIFxxx calls, inside rtnl_lock()
 */
static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
{
	...
	switch (cmd) {
	...
	/*
	 *	Unknown or private ioctl
	 */
	default:
		if ((cmd >= SIOCDEVPRIVATE &&
		    cmd <= SIOCDEVPRIVATE + 15) ||
			...
		    cmd == SIOCBRADDIF ||
		    cmd == SIOCBRDELIF ||
			...
		    cmd == SIOCWANDEV) {
			err = -EOPNOTSUPP;
			if (ops->ndo_do_ioctl) {
				if (netif_device_present(dev))
					err = ops->ndo_do_ioctl(dev, ifr, cmd);
				else
					err = -ENODEV;
			}
		} else
			err = -EINVAL;
	}
	return err;
}

(6). br_ioctl_deviceless_stub()函数


int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg)
{
	switch (cmd) {
	case SIOCGIFBR:
	case SIOCSIFBR:
		return old_deviceless(net, uarg);

	case SIOCBRADDBR:
	case SIOCBRDELBR:
	{
		char buf[IFNAMSIZ];

		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
			return -EPERM;

		if (copy_from_user(buf, uarg, IFNAMSIZ))
			return -EFAULT;

		buf[IFNAMSIZ-1] = 0;
		if (cmd == SIOCBRADDBR)
			return br_add_bridge(net, buf);

		return br_del_bridge(net, buf);
	}
	}
	return -EOPNOTSUPP;
}

 (7) . br_dev_ioctl()函数

int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
{
	struct net_bridge *br = netdev_priv(dev);

	switch (cmd) {
	case SIOCDEVPRIVATE:
		return old_dev_ioctl(dev, rq, cmd);

	case SIOCBRADDIF:
	case SIOCBRDELIF:
		return add_del_if(br, rq->ifr_ifindex, cmd == SIOCBRADDIF);

	}

	br_debug(br, "Bridge does not support ioctl 0x%x\n", cmd);
	return -EOPNOTSUPP;
}

2. br_add_bridge()

添加网桥函数。这个函数中调用alloc_netdev()分配网桥设备及私有数据内存,调用register_netdev()注册网桥设备。在alloc_netdev()函数中,调用了br_dev_setup()函数对新建立网桥进行初始化工作。

在alloc_netdev()中创建设备的时候,把参数name传递给dev->name,这个时候没有对name进行检验。

在register_netdev()注册设备的时候,对参数name进行校验,包括name合法性,name是否重复,name的形式是否为"前缀%d"形式,如果是,系统会根据前缀,顺序分配一个名称。

int br_add_bridge(struct net *net, const char *name)
{
	struct net_device *dev;
	int res;

	/*建立网桥设备*/
	dev = alloc_netdev(sizeof(struct net_bridge), name, NET_NAME_UNKNOWN,
			   br_dev_setup);

	if (!dev)
		return -ENOMEM;
	/*设置网络设备所在的网络命名空间*/
	dev_net_set(dev, net);
	dev->rtnl_link_ops = &br_link_ops;
	
	/*注册网络设备*/
	res = register_netdev(dev);
	if (res)
		free_netdev(dev);
	return res;
}

(1). alloc_netdev宏定义

alloc_netdev():给网络设备分配空间,alloc_netdev()调用的是alloc_netdev_mqs()函数。

#define alloc_netdev(sizeof_priv, name, name_assign_type, setup) \
	alloc_netdev_mqs(sizeof_priv, name, name_assign_type, setup, 1, 1)

sizeof_priv:网络设备私有数据大小
name:设备名称,或者“设备名前缀%d”形式,相同前缀的设备会进行统一编号,以确保设备名唯一
name_assign_type:设备名来源
setup:网络设备初始化函数,回调函数。

name_assign_typ类型:

这几个类似什么意思,还不清楚。

/* interface name assignment types (sysfs name_assign_type attribute) */
#define NET_NAME_UNKNOWN	0	/* unknown origin (not exposed to userspace) */
/*由内核枚举*/
#define NET_NAME_ENUM		1	/* enumerated by kernel */
#define NET_NAME_PREDICTABLE	2	/* predictably named by the kernel */
#define NET_NAME_USER		3	/* provided by user-space */
#define NET_NAME_RENAMED	4	/* renamed by user-space */

(2). alloc_netdev_mqs()函数:

alloc_netdev_mqs()函数,分配了net_device和私有数据的内存空间,对net_device进行了基本的初始化,并且分配了设备接收、发送队列空间。

rxqs:分配的传输队列的数量
rxqs:分配的接收队列的数量

/**
 *	alloc_netdev_mqs - allocate network device
 *	@sizeof_priv:		size of private data to allocate space for
 *	@name:			device name format string
 *	@name_assign_type: 	origin of device name
 *	@setup:			callback to initialize device
 *	@txqs:			the number of TX subqueues to allocate
 *	@rxqs:			the number of RX subqueues to allocate
 *
 *	Allocates a struct net_device with private data area for driver use
 *	and performs basic initialization.  Also allocates subqueue structs
 *	for each queue on the device.
 */
struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
		unsigned char name_assign_type,
		void (*setup)(struct net_device *),
		unsigned int txqs, unsigned int rxqs)
{
	struct net_device *dev;
	size_t alloc_size;
	struct net_device *p;
	
	/*name长度不能超过16,dev中name数组长16,为name[16]*/
	BUG_ON(strlen(name) >= sizeof(dev->name));
	
	/*不能没有传输队列*/
	if (txqs < 1) {
		pr_err("alloc_netdev: Unable to allocate device with zero queues\n");
		return NULL;
	}

#ifdef CONFIG_SYSFS
	if (rxqs < 1) {
		pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
		return NULL;
	}
#endif
	
	/*将net_device数据结构长度按照32位对齐后,加上私有数据长度,产生总的内存分配长度。*/
	alloc_size = sizeof(struct net_device);
	if (sizeof_priv) {
		/* ensure 32-byte alignment of private area */
		/*net_device结构体32位对齐,确保私有数据也32位对齐*/
		alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
		alloc_size += sizeof_priv;
	}
	/* ensure 32-byte alignment of whole construct */
	/*这里增加31个字节空间,是为了下面将分配后的net_device结构体的地址调整
	到32位边界对齐时,预留空间。*/
	alloc_size += NETDEV_ALIGN - 1;

	p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
	if (!p)
		p = vzalloc(alloc_size);
	if (!p)
		return NULL;
	/*将net_device数据结构的地址对齐到32位边界,记录下调整后的地址dev,
	和实际分配的地址p,便于释放空间时使用实际起始地址p。*/
	dev = PTR_ALIGN(p, NETDEV_ALIGN);
	dev->padded = (char *)dev - (char *)p;

	dev->pcpu_refcnt = alloc_percpu(int);
	if (!dev->pcpu_refcnt)
		goto free_dev;

	if (dev_addr_init(dev))
		goto free_pcpu;

	dev_mc_init(dev);
	dev_uc_init(dev);

	dev_net_set(dev, &init_net);

	dev->gso_max_size = GSO_MAX_SIZE;
	dev->gso_max_segs = GSO_MAX_SEGS;
	dev->gso_min_segs = 0;

	INIT_LIST_HEAD(&dev->napi_list);
	INIT_LIST_HEAD(&dev->unreg_list);
	INIT_LIST_HEAD(&dev->close_list);
	INIT_LIST_HEAD(&dev->link_watch_list);
	INIT_LIST_HEAD(&dev->adj_list.upper);
	INIT_LIST_HEAD(&dev->adj_list.lower);
	INIT_LIST_HEAD(&dev->all_adj_list.upper);
	INIT_LIST_HEAD(&dev->all_adj_list.lower);
	INIT_LIST_HEAD(&dev->ptype_all);
	INIT_LIST_HEAD(&dev->ptype_specific);
	dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
	setup(dev);

	dev->num_tx_queues = txqs;
	dev->real_num_tx_queues = txqs;
	if (netif_alloc_netdev_queues(dev))
		goto free_all;

#ifdef CONFIG_SYSFS
	dev->num_rx_queues = rxqs;
	dev->real_num_rx_queues = rxqs;
	if (netif_alloc_rx_queues(dev))
		goto free_all;
#endif

	strcpy(dev->name, name);//直接把name赋值给dev->name
	dev->name_assign_type = name_assign_type;
	dev->group = INIT_NETDEV_GROUP;
	if (!dev->ethtool_ops)
		dev->ethtool_ops = &default_ethtool_ops;
	return dev;

free_all:
	free_netdev(dev);
	return NULL;

free_pcpu:
	free_percpu(dev->pcpu_refcnt);
free_dev:
	netdev_freemem(dev);
	return NULL;
}

插入一句,最近在看代码的时候,创建设备的时候用的是alloc_etherdev()函数,功能和alloc_netdev()的一样,只是封装了一下:

alloc_etherdev()

#define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1)
#define alloc_etherdev_mq(sizeof_priv, count) alloc_etherdev_mqs(sizeof_priv, count, count)

/**
 * alloc_etherdev_mqs - Allocates and sets up an Ethernet device
 * @sizeof_priv: Size of additional driver-private structure to be allocated
 *	for this Ethernet device
 * @txqs: The number of TX queues this device has.
 * @rxqs: The number of RX queues this device has.
 *
 * Fill in the fields of the device structure with Ethernet-generic
 * values. Basically does everything except registering the device.
 *
 * Constructs a new net device, complete with a private data area of
 * size (sizeof_priv).  A 32-byte (not bit) alignment is enforced for
 * this private data area.
 */

struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
				      unsigned int rxqs)
{
	return alloc_netdev_mqs(sizeof_priv, "eth%d", NET_NAME_UNKNOWN,
				ether_setup, txqs, rxqs);
}

(3)register_netdevice 

这里只简单看一下name的赋值过程。


/**
 *	register_netdevice	- register a network device
 *	@dev: device to register
 *
 *	Take a completed network device structure and add it to the kernel
 *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
 *	chain. 0 is returned on success. A negative errno code is returned
 *	on a failure to set up the device, or if the name is a duplicate.
 *
 *	Callers must hold the rtnl semaphore. You may want
 *	register_netdev() instead of this.
 *
 *	BUGS:
 *	The locking appears insufficient to guarantee two parallel registers
 *	will not get the same name.
 */

int register_netdevice(struct net_device *dev)
{
	int ret;
	struct net *net = dev_net(dev);

	BUG_ON(dev_boot_phase);
	ASSERT_RTNL();

	might_sleep();

	/* When net_device's are persistent, this will be fatal. */
	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
	BUG_ON(!net);

	spin_lock_init(&dev->addr_list_lock);
	netdev_set_addr_lockdep_class(dev);

	ret = dev_get_valid_name(net, dev, dev->name);
	if (ret < 0)
		goto out;

	/* Init, if this function is available */
	if (dev->netdev_ops->ndo_init) {
		ret = dev->netdev_ops->ndo_init(dev);
		if (ret) {
			if (ret > 0)
				ret = -EIO;
			goto out;
		}
	}

	if (((dev->hw_features | dev->features) &
	     NETIF_F_HW_VLAN_CTAG_FILTER) &&
	    (!dev->netdev_ops->ndo_vlan_rx_add_vid ||
	     !dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
		netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
		ret = -EINVAL;
		goto err_uninit;
	}

	ret = -EBUSY;
	if (!dev->ifindex)
		dev->ifindex = dev_new_index(net);
	else if (__dev_get_by_index(net, dev->ifindex))
		goto err_uninit;

	/* Transfer changeable features to wanted_features and enable
	 * software offloads (GSO and GRO).
	 */
	dev->hw_features |= NETIF_F_SOFT_FEATURES;
	dev->features |= NETIF_F_SOFT_FEATURES;
	dev->wanted_features = dev->features & dev->hw_features;

	if (!(dev->flags & IFF_LOOPBACK)) {
		dev->hw_features |= NETIF_F_NOCACHE_COPY;
	}

	/* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
	 */
	dev->vlan_features |= NETIF_F_HIGHDMA;

	/* Make NETIF_F_SG inheritable to tunnel devices.
	 */
	dev->hw_enc_features |= NETIF_F_SG;

	/* Make NETIF_F_SG inheritable to MPLS.
	 */
	dev->mpls_features |= NETIF_F_SG;

	ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
	ret = notifier_to_errno(ret);
	if (ret)
		goto err_uninit;

	ret = netdev_register_kobject(dev);
	if (ret)
		goto err_uninit;
	dev->reg_state = NETREG_REGISTERED;

	__netdev_update_features(dev);

	/*
	 *	Default initial state at registry is that the
	 *	device is present.
	 */

	set_bit(__LINK_STATE_PRESENT, &dev->state);

	linkwatch_init_dev(dev);

	dev_init_scheduler(dev);
	dev_hold(dev);
	list_netdevice(dev);
	add_device_randomness(dev->dev_addr, dev->addr_len);

	/* If the device has permanent device address, driver should
	 * set dev_addr and also addr_assign_type should be set to
	 * NET_ADDR_PERM (default value).
	 */
	if (dev->addr_assign_type == NET_ADDR_PERM)
		memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);

	/* Notify protocols, that a new device appeared. */
	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
	ret = notifier_to_errno(ret);
	if (ret) {
		rollback_registered(dev);
		dev->reg_state = NETREG_UNREGISTERED;
	}
	/*
	 *	Prevent userspace races by waiting until the network
	 *	device is fully setup before sending notifications.
	 */
	if (!dev->rtnl_link_ops ||
	    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);

out:
	return ret;

err_uninit:
	if (dev->netdev_ops->ndo_uninit)
		dev->netdev_ops->ndo_uninit(dev);
	goto out;
}

 (4). dev_get_valid_name


static int dev_get_valid_name(struct net *net,
			      struct net_device *dev,
			      const char *name)
{
	BUG_ON(!net);

	if (!dev_valid_name(name))
		return -EINVAL;

	if (strchr(name, '%'))
		return dev_alloc_name_ns(net, dev, name);
	else if (__dev_get_by_name(net, name))
		return -EEXIST;
	else if (dev->name != name)
		strlcpy(dev->name, name, IFNAMSIZ);

	return 0;
}

(5). dev_alloc_name_ns 

由系统根据前缀赋值名称。


static int dev_alloc_name_ns(struct net *net,
			     struct net_device *dev,
			     const char *name)
{
	char buf[IFNAMSIZ];
	int ret;

	ret = __dev_alloc_name(net, name, buf);
	if (ret >= 0)
		strlcpy(dev->name, buf, IFNAMSIZ);
	return ret;
}

例如我的设备上目前有一个br0,我想再创建一个br1:

3. br_dev_setup()函数

网桥初始化函数,网桥设备的net_device相应字段,网桥私有数据net_bridge字段设置。

void br_dev_setup(struct net_device *dev)
{
	/*netdev_prit()取网桥设备私有数据起始地址*/
	struct net_bridge *br = netdev_priv(dev);
	
	/*随机生成mac地址*/
	eth_hw_addr_random(dev);
	
	/*以太网设备初始化,用以太网通用参数初始化dev相应参数*/
	ether_setup(dev);
	
	/*网桥设备的操作函数集br_netdev_ops*/
	dev->netdev_ops = &br_netdev_ops;
	
	/*析构函数为br_dev_free*/
	dev->destructor = br_dev_free;
	
	/*网桥设备的以太网工具函数集*/
	dev->ethtool_ops = &br_ethtool_ops;
	
	/*设置网络设备的设备类型*/
	SET_NETDEV_DEVTYPE(dev, &br_type);
	
	/*传输队列长度为0*/
	dev->tx_queue_len = 0;
	
	/*设备标识为桥*/
	dev->priv_flags = IFF_EBRIDGE;

	dev->features = COMMON_FEATURES | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL |
			NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
	dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
			   NETIF_F_HW_VLAN_STAG_TX;
	dev->vlan_features = COMMON_FEATURES;
	
	/*后面是网桥私有化数据初始化*/
	/*dev指向自己对于的net_device*/
	br->dev = dev;
	
	/*锁初始化*/
	spin_lock_init(&br->lock);
	
	/*网桥端口列表初始化*/
	INIT_LIST_HEAD(&br->port_list);
	
	/*CAM表自旋锁初始化*/
	spin_lock_init(&br->hash_lock);
	
	/*网桥默认优先级0x8000,即32768*/
	br->bridge_id.prio[0] = 0x80;
	br->bridge_id.prio[1] = 0x00;
	
	/*STP初始化相关*/
	/*802.1D(STP)组播01:80:C2:00:00:00*/
	ether_addr_copy(br->group_addr, eth_reserved_addr_base);
	
	/*默认没有开启STP,不阻塞任何组播包*/
	br->stp_enabled = BR_NO_STP;
	br->group_fwd_mask = BR_GROUPFWD_DEFAULT;
	br->group_fwd_mask_required = BR_GROUPFWD_DEFAULT;

	br->designated_root = br->bridge_id;
	
	/*20s,BPDU老化时间*/
	br->bridge_max_age = br->max_age = 20 * HZ;
	
	/*2s,hello定时器时间*/
	br->bridge_hello_time = br->hello_time = 2 * HZ;
	
	/*15s转发时延,用于Block->Learning->Forwarding*/
	br->bridge_forward_delay = br->forward_delay = 15 * HZ;
	
	/*FDB中保存的MAC地址的老化时间,5分钟*/
	br->ageing_time = 300 * HZ;
	
	/*桥上netfilter初始化*/
	br_netfilter_rtable_init(br);
	br_stp_timer_init(br);
	
	/*组播初始化*/
	br_multicast_init(br);
}

(1). netdev_priv()函数

这个函数很简短,但是很重要,功能就一个。取出网桥设备私有数据起始地址,就是net_bridge结构地址。它也是32字节对齐的。

/**
 *	netdev_priv - access network device private data
 *	@dev: network device
 *
 * Get network device private data
 */
static inline void *netdev_priv(const struct net_device *dev)
{
	return (char *)dev + ALIGN(sizeof(struct net_device), NETDEV_ALIGN);
}

(2). 宏SET_NETDEV_DEVTYPE(dev, &br_type);

/* Set the sysfs device type for the network logical device to allow
 * fine-grained identification of different network device types. For
 * example Ethernet, Wirelss LAN, Bluetooth, WiMAX etc.
 */
#define SET_NETDEV_DEVTYPE(net, devtype)	((net)->dev.type = (devtype))

相当于dev->dev.type = &by_type

br_type结构体定义为:

static struct device_type br_type = {
	.name	= "bridge",
};

net_device数据结构中有个元素struct device    dev; dev结构体定义为:当中有个type元素。

struct device 
{
	struct device		*parent;

	struct device_private	*p;

	struct kobject kobj;
	const char		*init_name; /* initial name of the device */
	const struct device_type *type;
        ...
}

(3). ether_setup()

以太网设备初始化,使用以太网通用参数初始化net_device中以太网相关参数。包括L2头长度为14,MAC地址长度为6,MTU为1518等。

/**
 * ether_setup - setup Ethernet network device
 * @dev: network device
 *
 * Fill in the fields of the device structure with Ethernet-generic values.
 */
void ether_setup(struct net_device *dev)
{
	/*L2层头操作函数,包括create,parse,cache等
	这个参数初始化的地方没找到,以后再研究。*/
	dev->header_ops		= &eth_header_ops;
	
	/*设置ARP协议硬件标识符,ARPHRD_ETHER表示标准以太网*/
	dev->type		= ARPHRD_ETHER;
	
	/*L2头长度为14*/
	dev->hard_header_len 	= ETH_HLEN;
	
	/*数据长度最大为1500*/
	dev->mtu		= ETH_DATA_LEN;
	
	/*MAC地址长度为6*/
	dev->addr_len		= ETH_ALEN;
	
	/*发送队列长度为1000*/
	dev->tx_queue_len	= 1000;	/* Ethernet wants good queues */
	
	/*?*/
	dev->flags		= IFF_BROADCAST|IFF_MULTICAST;
	
	/*Like 'flags' but invisible to userspace,see if.h for the definitions*/
	dev->priv_flags		|= IFF_TX_SKB_SHARING;
	
	/*分配广播地址,全1*/
	eth_broadcast_addr(dev->broadcast);

}

关于dev->type = ARPHRD_ETHER;

设置arp协议硬件标识符, ARPHRD_ETHER表示标准以太网。

/* ARP protocol HARDWARE identifiers. */
#define ARPHRD_NETROM	0		/* from KA9Q: NET/ROM pseudo	*/
#define ARPHRD_ETHER 	1		/* Ethernet 10Mbps		*/
#define	ARPHRD_EETHER	2		/* Experimental Ethernet	*/
#define	ARPHRD_AX25	3		/* AX.25 Level 2		*/
#define	ARPHRD_PRONET	4		/* PROnet token ring		*/
#define	ARPHRD_CHAOS	5		/* Chaosnet			*/
#define	ARPHRD_IEEE802	6		/* IEEE 802.2 Ethernet/TR/TB	*/
#define	ARPHRD_ARCNET	7		/* ARCnet			*/
#define	ARPHRD_APPLETLK	8		/* APPLEtalk			*/
#define ARPHRD_DLCI	15		/* Frame Relay DLCI		*/
#define ARPHRD_ATM	19		/* ATM 				*/
#define ARPHRD_METRICOM	23		/* Metricom STRIP (new IANA id)	*/
#define	ARPHRD_IEEE1394	24		/* IEEE 1394 IPv4 - RFC 2734	*/
#define ARPHRD_EUI64	27		/* EUI-64                       */
#define ARPHRD_INFINIBAND 32		/* InfiniBand			*/

(4). br_netdev_ops操作函数

static const struct net_device_ops br_netdev_ops = {
	/*打开网桥调用*/
	.ndo_open		 = br_dev_open,
	.ndo_stop		 = br_dev_stop,
	.ndo_init		 = br_dev_init,
	
	/*网桥发包函数*/
	.ndo_start_xmit		 = br_dev_xmit,
	.ndo_get_stats64	 = br_get_stats64,
	
	/*设置网桥MAC地址*/
	.ndo_set_mac_address	 = br_set_mac_address,
	.ndo_set_rx_mode	 = br_dev_set_multicast_list,
	.ndo_change_rx_flags	 = br_dev_change_rx_flags,
	
	/*改变网桥MTU时,新的MTU值必须小于等于被绑定的设备的最小的MTU*/
	.ndo_change_mtu		 = br_change_mtu,
	
	/*网桥ioctl命令调用函数*/
	.ndo_do_ioctl		 = br_dev_ioctl,
#ifdef CONFIG_NET_POLL_CONTROLLER
	.ndo_netpoll_setup	 = br_netpoll_setup,
	.ndo_netpoll_cleanup	 = br_netpoll_cleanup,
	.ndo_poll_controller	 = br_poll_controller,
#endif
	/*网桥添加端口*/
	.ndo_add_slave		 = br_add_slave,
	.ndo_del_slave		 = br_del_slave,
	.ndo_fix_features        = br_fix_features,
	/*网桥添加fdb转发享*/
	.ndo_fdb_add		 = br_fdb_add,
	.ndo_fdb_del		 = br_fdb_delete,
	.ndo_fdb_dump		 = br_fdb_dump,
	.ndo_bridge_getlink	 = br_getlink,
	.ndo_bridge_setlink	 = br_setlink,
	.ndo_bridge_dellink	 = br_dellink,
};

看看定义是哪样的:


/*
 * This structure defines the management hooks for network devices.
 * The following hooks can be defined; unless noted otherwise, they are
 * optional and can be filled with a null pointer.
 *
 * int (*ndo_init)(struct net_device *dev);
 *     This function is called once when network device is registered.
 *     The network device can use this to any late stage initializaton
 *     or semantic validattion. It can fail with an error code which will
 *     be propogated back to register_netdev
 *
 * void (*ndo_uninit)(struct net_device *dev);
 *     This function is called when device is unregistered or when registration
 *     fails. It is not called if init fails.
 *
 * int (*ndo_open)(struct net_device *dev);
 *     This function is called when network device transistions to the up
 *     state.
 *
 * int (*ndo_stop)(struct net_device *dev);
 *     This function is called when network device transistions to the down
 *     state.
 *
 * netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb,
 *                               struct net_device *dev);
 *	Called when a packet needs to be transmitted.
 *	Returns NETDEV_TX_OK.  Can return NETDEV_TX_BUSY, but you should stop
 *	the queue before that can happen; it's for obsolete devices and weird
 *	corner cases, but the stack really does a non-trivial amount
 *	of useless work if you return NETDEV_TX_BUSY.
 *        (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX)
 *	Required can not be NULL.
 *
 * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb,
 *                         void *accel_priv, select_queue_fallback_t fallback);
 *	Called to decide which queue to when device supports multiple
 *	transmit queues.
 *
 * void (*ndo_change_rx_flags)(struct net_device *dev, int flags);
 *	This function is called to allow device receiver to make
 *	changes to configuration when multicast or promiscious is enabled.
 *
 * void (*ndo_set_rx_mode)(struct net_device *dev);
 *	This function is called device changes address list filtering.
 *	If driver handles unicast address filtering, it should set
 *	IFF_UNICAST_FLT to its priv_flags.
 *
 * int (*ndo_set_mac_address)(struct net_device *dev, void *addr);
 *	This function  is called when the Media Access Control address
 *	needs to be changed. If this interface is not defined, the
 *	mac address can not be changed.
 *
 * int (*ndo_validate_addr)(struct net_device *dev);
 *	Test if Media Access Control address is valid for the device.
 *
 * int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd);
 *	Called when a user request an ioctl which can't be handled by
 *	the generic interface code. If not defined ioctl's return
 *	not supported error code.
 *
 * int (*ndo_set_config)(struct net_device *dev, struct ifmap *map);
 *	Used to set network devices bus interface parameters. This interface
 *	is retained for legacy reason, new devices should use the bus
 *	interface (PCI) for low level management.
 *
 * int (*ndo_change_mtu)(struct net_device *dev, int new_mtu);
 *	Called when a user wants to change the Maximum Transfer Unit
 *	of a device. If not defined, any request to change MTU will
 *	will return an error.
 *
 * void (*ndo_tx_timeout)(struct net_device *dev);
 *	Callback uses when the transmitter has not made any progress
 *	for dev->watchdog ticks.
 *
 * struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev,
 *                      struct rtnl_link_stats64 *storage);
 * struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
 *	Called when a user wants to get the network device usage
 *	statistics. Drivers must do one of the following:
 *	1. Define @ndo_get_stats64 to fill in a zero-initialised
 *	   rtnl_link_stats64 structure passed by the caller.
 *	2. Define @ndo_get_stats to update a net_device_stats structure
 *	   (which should normally be dev->stats) and return a pointer to
 *	   it. The structure may be changed asynchronously only if each
 *	   field is written atomically.
 *	3. Update dev->stats asynchronously and atomically, and define
 *	   neither operation.
 *
 * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, __be16 proto, u16 vid);
 *	If device support VLAN filtering this function is called when a
 *	VLAN id is registered.
 *
 * int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, __be16 proto, u16 vid);
 *	If device support VLAN filtering this function is called when a
 *	VLAN id is unregistered.
 *
 * void (*ndo_poll_controller)(struct net_device *dev);
 *
 *	SR-IOV management functions.
 * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac);
 * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos);
 * int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate,
 *			  int max_tx_rate);
 * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting);
 * int (*ndo_get_vf_config)(struct net_device *dev,
 *			    int vf, struct ifla_vf_info *ivf);
 * int (*ndo_set_vf_link_state)(struct net_device *dev, int vf, int link_state);
 * int (*ndo_set_vf_port)(struct net_device *dev, int vf,
 *			  struct nlattr *port[]);
 *
 *      Enable or disable the VF ability to query its RSS Redirection Table and
 *      Hash Key. This is needed since on some devices VF share this information
 *      with PF and querying it may adduce a theoretical security risk.
 * int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting);
 * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb);
 * int (*ndo_setup_tc)(struct net_device *dev, u8 tc)
 * 	Called to setup 'tc' number of traffic classes in the net device. This
 * 	is always called from the stack with the rtnl lock held and netif tx
 * 	queues stopped. This allows the netdevice to perform queue management
 * 	safely.
 *
 *	Fiber Channel over Ethernet (FCoE) offload functions.
 * int (*ndo_fcoe_enable)(struct net_device *dev);
 *	Called when the FCoE protocol stack wants to start using LLD for FCoE
 *	so the underlying device can perform whatever needed configuration or
 *	initialization to support acceleration of FCoE traffic.
 *
 * int (*ndo_fcoe_disable)(struct net_device *dev);
 *	Called when the FCoE protocol stack wants to stop using LLD for FCoE
 *	so the underlying device can perform whatever needed clean-ups to
 *	stop supporting acceleration of FCoE traffic.
 *
 * int (*ndo_fcoe_ddp_setup)(struct net_device *dev, u16 xid,
 *			     struct scatterlist *sgl, unsigned int sgc);
 *	Called when the FCoE Initiator wants to initialize an I/O that
 *	is a possible candidate for Direct Data Placement (DDP). The LLD can
 *	perform necessary setup and returns 1 to indicate the device is set up
 *	successfully to perform DDP on this I/O, otherwise this returns 0.
 *
 * int (*ndo_fcoe_ddp_done)(struct net_device *dev,  u16 xid);
 *	Called when the FCoE Initiator/Target is done with the DDPed I/O as
 *	indicated by the FC exchange id 'xid', so the underlying device can
 *	clean up and reuse resources for later DDP requests.
 *
 * int (*ndo_fcoe_ddp_target)(struct net_device *dev, u16 xid,
 *			      struct scatterlist *sgl, unsigned int sgc);
 *	Called when the FCoE Target wants to initialize an I/O that
 *	is a possible candidate for Direct Data Placement (DDP). The LLD can
 *	perform necessary setup and returns 1 to indicate the device is set up
 *	successfully to perform DDP on this I/O, otherwise this returns 0.
 *
 * int (*ndo_fcoe_get_hbainfo)(struct net_device *dev,
 *			       struct netdev_fcoe_hbainfo *hbainfo);
 *	Called when the FCoE Protocol stack wants information on the underlying
 *	device. This information is utilized by the FCoE protocol stack to
 *	register attributes with Fiber Channel management service as per the
 *	FC-GS Fabric Device Management Information(FDMI) specification.
 *
 * int (*ndo_fcoe_get_wwn)(struct net_device *dev, u64 *wwn, int type);
 *	Called when the underlying device wants to override default World Wide
 *	Name (WWN) generation mechanism in FCoE protocol stack to pass its own
 *	World Wide Port Name (WWPN) or World Wide Node Name (WWNN) to the FCoE
 *	protocol stack to use.
 *
 *	RFS acceleration.
 * int (*ndo_rx_flow_steer)(struct net_device *dev, const struct sk_buff *skb,
 *			    u16 rxq_index, u32 flow_id);
 *	Set hardware filter for RFS.  rxq_index is the target queue index;
 *	flow_id is a flow ID to be passed to rps_may_expire_flow() later.
 *	Return the filter ID on success, or a negative error code.
 *
 *	Slave management functions (for bridge, bonding, etc).
 * int (*ndo_add_slave)(struct net_device *dev, struct net_device *slave_dev);
 *	Called to make another netdev an underling.
 *
 * int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev);
 *	Called to release previously enslaved netdev.
 *
 *      Feature/offload setting functions.
 * netdev_features_t (*ndo_fix_features)(struct net_device *dev,
 *		netdev_features_t features);
 *	Adjusts the requested feature flags according to device-specific
 *	constraints, and returns the resulting flags. Must not modify
 *	the device state.
 *
 * int (*ndo_set_features)(struct net_device *dev, netdev_features_t features);
 *	Called to update device configuration to new features. Passed
 *	feature set might be less than what was returned by ndo_fix_features()).
 *	Must return >0 or -errno if it changed dev->features itself.
 *
 * int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[],
 *		      struct net_device *dev,
 *		      const unsigned char *addr, u16 vid, u16 flags)
 *	Adds an FDB entry to dev for addr.
 * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[],
 *		      struct net_device *dev,
 *		      const unsigned char *addr, u16 vid)
 *	Deletes the FDB entry from dev coresponding to addr.
 * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb,
 *		       struct net_device *dev, struct net_device *filter_dev,
 *		       int idx)
 *	Used to add FDB entries to dump requests. Implementers should add
 *	entries to skb and update idx with the number of entries.
 *
 * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh,
 *			     u16 flags)
 * int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq,
 *			     struct net_device *dev, u32 filter_mask,
 *			     int nlflags)
 * int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh,
 *			     u16 flags);
 *
 * int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier);
 *	Called to change device carrier. Soft-devices (like dummy, team, etc)
 *	which do not represent real hardware may define this to allow their
 *	userspace components to manage their virtual carrier state. Devices
 *	that determine carrier state from physical hardware properties (eg
 *	network cables) or protocol-dependent mechanisms (eg
 *	USB_CDC_NOTIFY_NETWORK_CONNECTION) should NOT implement this function.
 *
 * int (*ndo_get_phys_port_id)(struct net_device *dev,
 *			       struct netdev_phys_item_id *ppid);
 *	Called to get ID of physical port of this device. If driver does
 *	not implement this, it is assumed that the hw is not able to have
 *	multiple net devices on single physical port.
 *
 * void (*ndo_add_vxlan_port)(struct  net_device *dev,
 *			      sa_family_t sa_family, __be16 port);
 *	Called by vxlan to notiy a driver about the UDP port and socket
 *	address family that vxlan is listnening to. It is called only when
 *	a new port starts listening. The operation is protected by the
 *	vxlan_net->sock_lock.
 *
 * void (*ndo_del_vxlan_port)(struct  net_device *dev,
 *			      sa_family_t sa_family, __be16 port);
 *	Called by vxlan to notify the driver about a UDP port and socket
 *	address family that vxlan is not listening to anymore. The operation
 *	is protected by the vxlan_net->sock_lock.
 *
 * void* (*ndo_dfwd_add_station)(struct net_device *pdev,
 *				 struct net_device *dev)
 *	Called by upper layer devices to accelerate switching or other
 *	station functionality into hardware. 'pdev is the lowerdev
 *	to use for the offload and 'dev' is the net device that will
 *	back the offload. Returns a pointer to the private structure
 *	the upper layer will maintain.
 * void (*ndo_dfwd_del_station)(struct net_device *pdev, void *priv)
 *	Called by upper layer device to delete the station created
 *	by 'ndo_dfwd_add_station'. 'pdev' is the net device backing
 *	the station and priv is the structure returned by the add
 *	operation.
 * netdev_tx_t (*ndo_dfwd_start_xmit)(struct sk_buff *skb,
 *				      struct net_device *dev,
 *				      void *priv);
 *	Callback to use for xmit over the accelerated station. This
 *	is used in place of ndo_start_xmit on accelerated net
 *	devices.
 * netdev_features_t (*ndo_features_check) (struct sk_buff *skb,
 *					    struct net_device *dev
 *					    netdev_features_t features);
 *	Called by core transmit path to determine if device is capable of
 *	performing offload operations on a given packet. This is to give
 *	the device an opportunity to implement any restrictions that cannot
 *	be otherwise expressed by feature flags. The check is called with
 *	the set of features that the stack has calculated and it returns
 *	those the driver believes to be appropriate.
 * int (*ndo_set_tx_maxrate)(struct net_device *dev,
 *			     int queue_index, u32 maxrate);
 *	Called when a user wants to set a max-rate limitation of specific
 *	TX queue.
 * int (*ndo_get_iflink)(const struct net_device *dev);
 *	Called to get the iflink value of this device.
 */
struct net_device_ops {
	int			(*ndo_init)(struct net_device *dev);
	void			(*ndo_uninit)(struct net_device *dev);
	int			(*ndo_open)(struct net_device *dev);
	int			(*ndo_stop)(struct net_device *dev);
	netdev_tx_t		(*ndo_start_xmit) (struct sk_buff *skb,
						   struct net_device *dev);
	u16			(*ndo_select_queue)(struct net_device *dev,
						    struct sk_buff *skb,
						    void *accel_priv,
						    select_queue_fallback_t fallback);
	void			(*ndo_change_rx_flags)(struct net_device *dev,
						       int flags);
	void			(*ndo_set_rx_mode)(struct net_device *dev);
	int			(*ndo_set_mac_address)(struct net_device *dev,
						       void *addr);
	int			(*ndo_validate_addr)(struct net_device *dev);
	int			(*ndo_do_ioctl)(struct net_device *dev,
					        struct ifreq *ifr, int cmd);
	int			(*ndo_set_config)(struct net_device *dev,
					          struct ifmap *map);
	int			(*ndo_change_mtu)(struct net_device *dev,
						  int new_mtu);
	int			(*ndo_neigh_setup)(struct net_device *dev,
						   struct neigh_parms *);
	void			(*ndo_tx_timeout) (struct net_device *dev);

	struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev,
						     struct rtnl_link_stats64 *storage);
	struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);

	int			(*ndo_vlan_rx_add_vid)(struct net_device *dev,
						       __be16 proto, u16 vid);
	int			(*ndo_vlan_rx_kill_vid)(struct net_device *dev,
						        __be16 proto, u16 vid);
#ifdef CONFIG_NET_POLL_CONTROLLER
	void                    (*ndo_poll_controller)(struct net_device *dev);
	int			(*ndo_netpoll_setup)(struct net_device *dev,
						     struct netpoll_info *info);
	void			(*ndo_netpoll_cleanup)(struct net_device *dev);
#endif
#ifdef CONFIG_NET_RX_BUSY_POLL
	int			(*ndo_busy_poll)(struct napi_struct *dev);
#endif
	int			(*ndo_set_vf_mac)(struct net_device *dev,
						  int queue, u8 *mac);
	int			(*ndo_set_vf_vlan)(struct net_device *dev,
						   int queue, u16 vlan, u8 qos);
	int			(*ndo_set_vf_rate)(struct net_device *dev,
						   int vf, int min_tx_rate,
						   int max_tx_rate);
	int			(*ndo_set_vf_spoofchk)(struct net_device *dev,
						       int vf, bool setting);
	int			(*ndo_get_vf_config)(struct net_device *dev,
						     int vf,
						     struct ifla_vf_info *ivf);
	int			(*ndo_set_vf_link_state)(struct net_device *dev,
							 int vf, int link_state);
	int			(*ndo_set_vf_port)(struct net_device *dev,
						   int vf,
						   struct nlattr *port[]);
	int			(*ndo_get_vf_port)(struct net_device *dev,
						   int vf, struct sk_buff *skb);
	int			(*ndo_set_vf_rss_query_en)(
						   struct net_device *dev,
						   int vf, bool setting);
	int			(*ndo_setup_tc)(struct net_device *dev, u8 tc);
#if IS_ENABLED(CONFIG_FCOE)
	int			(*ndo_fcoe_enable)(struct net_device *dev);
	int			(*ndo_fcoe_disable)(struct net_device *dev);
	int			(*ndo_fcoe_ddp_setup)(struct net_device *dev,
						      u16 xid,
						      struct scatterlist *sgl,
						      unsigned int sgc);
	int			(*ndo_fcoe_ddp_done)(struct net_device *dev,
						     u16 xid);
	int			(*ndo_fcoe_ddp_target)(struct net_device *dev,
						       u16 xid,
						       struct scatterlist *sgl,
						       unsigned int sgc);
	int			(*ndo_fcoe_get_hbainfo)(struct net_device *dev,
							struct netdev_fcoe_hbainfo *hbainfo);
#endif

#if IS_ENABLED(CONFIG_LIBFCOE)
#define NETDEV_FCOE_WWNN 0
#define NETDEV_FCOE_WWPN 1
	int			(*ndo_fcoe_get_wwn)(struct net_device *dev,
						    u64 *wwn, int type);
#endif

#ifdef CONFIG_RFS_ACCEL
	int			(*ndo_rx_flow_steer)(struct net_device *dev,
						     const struct sk_buff *skb,
						     u16 rxq_index,
						     u32 flow_id);
#endif
	int			(*ndo_add_slave)(struct net_device *dev,
						 struct net_device *slave_dev);
	int			(*ndo_del_slave)(struct net_device *dev,
						 struct net_device *slave_dev);
	netdev_features_t	(*ndo_fix_features)(struct net_device *dev,
						    netdev_features_t features);
	int			(*ndo_set_features)(struct net_device *dev,
						    netdev_features_t features);
	int			(*ndo_neigh_construct)(struct neighbour *n);
	void			(*ndo_neigh_destroy)(struct neighbour *n);

	int			(*ndo_fdb_add)(struct ndmsg *ndm,
					       struct nlattr *tb[],
					       struct net_device *dev,
					       const unsigned char *addr,
					       u16 vid,
					       u16 flags);
	int			(*ndo_fdb_del)(struct ndmsg *ndm,
					       struct nlattr *tb[],
					       struct net_device *dev,
					       const unsigned char *addr,
					       u16 vid);
	int			(*ndo_fdb_dump)(struct sk_buff *skb,
						struct netlink_callback *cb,
						struct net_device *dev,
						struct net_device *filter_dev,
						int idx);

	int			(*ndo_bridge_setlink)(struct net_device *dev,
						      struct nlmsghdr *nlh,
						      u16 flags);
	int			(*ndo_bridge_getlink)(struct sk_buff *skb,
						      u32 pid, u32 seq,
						      struct net_device *dev,
						      u32 filter_mask,
						      int nlflags);
	int			(*ndo_bridge_dellink)(struct net_device *dev,
						      struct nlmsghdr *nlh,
						      u16 flags);
	int			(*ndo_change_carrier)(struct net_device *dev,
						      bool new_carrier);
	int			(*ndo_get_phys_port_id)(struct net_device *dev,
							struct netdev_phys_item_id *ppid);
	int			(*ndo_get_phys_port_name)(struct net_device *dev,
							  char *name, size_t len);
	void			(*ndo_add_vxlan_port)(struct  net_device *dev,
						      sa_family_t sa_family,
						      __be16 port);
	void			(*ndo_del_vxlan_port)(struct  net_device *dev,
						      sa_family_t sa_family,
						      __be16 port);

	void*			(*ndo_dfwd_add_station)(struct net_device *pdev,
							struct net_device *dev);
	void			(*ndo_dfwd_del_station)(struct net_device *pdev,
							void *priv);

	netdev_tx_t		(*ndo_dfwd_start_xmit) (struct sk_buff *skb,
							struct net_device *dev,
							void *priv);
	int			(*ndo_get_lock_subclass)(struct net_device *dev);
	netdev_features_t	(*ndo_features_check) (struct sk_buff *skb,
						       struct net_device *dev,
						       netdev_features_t features);
	int			(*ndo_set_tx_maxrate)(struct net_device *dev,
						      int queue_index,
						      u32 maxrate);
	int			(*ndo_get_iflink)(const struct net_device *dev);
};

(5). br_ethtool_ops操作函数

static const struct ethtool_ops br_ethtool_ops = {
	.get_drvinfo    = br_getinfo,
	.get_link	= ethtool_op_get_link,
};

ethtool_ops结构体定义,...以后需要再单独分析。

4. 删除网桥br_del_bridge()函数

先根据name找到网桥设备,调用br_dev_delete()函数删除网桥。

//删除网桥
int br_del_bridge(struct net *net, const char *name)
{
	struct net_device *dev;
	int ret = 0;

	rtnl_lock();
	
	/*网络空间net中查找名称为name的设备*/
	dev = __dev_get_by_name(net, name);
	
	/*未找到*/
	if (dev == NULL)
		ret =  -ENXIO; 	/* Could not find device */
	
	/*dev->priv_flags参数在br_dev_setup()函数中初始化为IEF_EBRIDGE,
	判断找到的设备是否为桥。*/
	else if (!(dev->priv_flags & IFF_EBRIDGE)) {
		/* Attempt to delete non bridge device! */
		ret = -EPERM;
	}
	
	/*删除网桥的时候要关闭网桥*/
	else if (dev->flags & IFF_UP) {
		/* Not shutdown yet. */
		ret = -EBUSY;
	}
	
	/*调用br_dev_delete()删除网桥*/
	else
		br_dev_delete(dev, NULL);

	rtnl_unlock();
	return ret;
}

(1). 关于priv_flags:表示设备类型

/**
 * enum net_device_priv_flags - &struct net_device priv_flags
 *
 * These are the &struct net_device, they are only set internally
 * by drivers and used in the kernel. These flags are invisible to
 * userspace, this means that the order of these flags can change
 * during any kernel release.
 *
 * You should have a pretty good reason to be extending these flags.
 *
 * @IFF_802_1Q_VLAN: 802.1Q VLAN device
 * @IFF_EBRIDGE: Ethernet bridging device
 * @IFF_SLAVE_INACTIVE: bonding slave not the curr. active
 * @IFF_MASTER_8023AD: bonding master, 802.3ad
 * @IFF_MASTER_ALB: bonding master, balance-alb
 * @IFF_BONDING: bonding master or slave
 * @IFF_SLAVE_NEEDARP: need ARPs for validation
 * @IFF_ISATAP: ISATAP interface (RFC4214)
 * @IFF_MASTER_ARPMON: bonding master, ARP mon in use
 * @IFF_WAN_HDLC: WAN HDLC device
 * @IFF_XMIT_DST_RELEASE: dev_hard_start_xmit() is allowed to
 *	release skb->dst
 * @IFF_DONT_BRIDGE: disallow bridging this ether dev
 * @IFF_DISABLE_NETPOLL: disable netpoll at run-time
 * @IFF_MACVLAN_PORT: device used as macvlan port
 * @IFF_BRIDGE_PORT: device used as bridge port
 * @IFF_OVS_DATAPATH: device used as Open vSwitch datapath port
 * @IFF_TX_SKB_SHARING: The interface supports sharing skbs on transmit
 * @IFF_UNICAST_FLT: Supports unicast filtering
 * @IFF_TEAM_PORT: device used as team port
 * @IFF_SUPP_NOFCS: device supports sending custom FCS
 * @IFF_LIVE_ADDR_CHANGE: device supports hardware address
 *	change when it's running
 * @IFF_MACVLAN: Macvlan device
 */
enum netdev_priv_flags {
	IFF_802_1Q_VLAN			= 1<<0,
	IFF_EBRIDGE			= 1<<1,
	IFF_SLAVE_INACTIVE		= 1<<2,
	IFF_MASTER_8023AD		= 1<<3,
	IFF_MASTER_ALB			= 1<<4,
	IFF_BONDING			= 1<<5,
	IFF_SLAVE_NEEDARP		= 1<<6,
	IFF_ISATAP			= 1<<7,
	IFF_MASTER_ARPMON		= 1<<8,
	IFF_WAN_HDLC			= 1<<9,
	IFF_XMIT_DST_RELEASE		= 1<<10,
	IFF_DONT_BRIDGE			= 1<<11,
	IFF_DISABLE_NETPOLL		= 1<<12,
	IFF_MACVLAN_PORT		= 1<<13,
	IFF_BRIDGE_PORT			= 1<<14,
	IFF_OVS_DATAPATH		= 1<<15,
	IFF_TX_SKB_SHARING		= 1<<16,
	IFF_UNICAST_FLT			= 1<<17,
	IFF_TEAM_PORT			= 1<<18,
	IFF_SUPP_NOFCS			= 1<<19,
	IFF_LIVE_ADDR_CHANGE		= 1<<20,
	IFF_MACVLAN			= 1<<21,
	IFF_XMIT_DST_RELEASE_PERM	= 1<<22,
	IFF_IPVLAN_MASTER		= 1<<23,
	IFF_IPVLAN_SLAVE		= 1<<24,
};

 (2). br_fdb_delete_by_port()函数

删除网桥br上目的端口为p的所有fdb项

/* Flush all entries referring to a specific port.
 * if do_all is set also flush static entries
 */
void br_fdb_delete_by_port(struct net_bridge *br,
			   const struct net_bridge_port *p,
			   int do_all)
{
	int i;

	spin_lock_bh(&br->hash_lock);
	/*遍历hash数组*/
	for (i = 0; i < BR_HASH_SIZE; i++) {
		struct hlist_node *h, *g;
		
		/*遍历链表*/
		hlist_for_each_safe(h, g, &br->hash[i]) {
			struct net_bridge_fdb_entry *f
				= hlist_entry(h, struct net_bridge_fdb_entry, hlist);
			
			/*目的端口不为p,进行下一次循环*/
			if (f->dst != p)
				continue;
			
			/*设置do_all为1时,目的地址为静态MAC地址的项也删除。*/
			if (f->is_static && !do_all)
				continue;
			
			/*目的MAC地址为本地MAC*/
			if (f->is_local)
				fdb_delete_local(br, p, f);
			else
				fdb_delete(br, f);
		}
	}
	spin_unlock_bh(&br->hash_lock);
}

(3). br_dev_delete()函数

删除网桥所有端口,删除网桥所有fdb项等。

/* Delete bridge device */
void br_dev_delete(struct net_device *dev, struct list_head *head)
{
	struct net_bridge *br = netdev_priv(dev);
	struct net_bridge_port *p, *n;
	
	/*遍历删除网桥所有端口*/
	list_for_each_entry_safe(p, n, &br->port_list, list) {
		del_nbp(p);
	}
	
	/*删除目的地址为指定端口的fdb项,这里端口为NULL,表示删除网桥br的所有fdb项*/
	br_fdb_delete_by_port(br, NULL, 1);

	br_vlan_flush(br);
	del_timer_sync(&br->gc_timer);

	br_sysfs_delbr(br->dev);
	
	/*从内核中删除设备*/
	unregister_netdevice_queue(br->dev, head);
}

这个函数中有很多函数还没有分析,以后再分析,实在是太多了,分析不过来了。

5. br_add_if()

给网桥添加端口

对待添加的设备进行正确性判断,然后创建网桥端口,进行相关参数初始化,rx_handle设置为br_handle_frame()、插入fdb等。

/* called with RTNL */
int br_add_if(struct net_bridge *br, struct net_device *dev)
{
	struct net_bridge_port *p;
	int err = 0;
	bool changed_addr;

	/* Don't allow bridging non-ethernet like devices, or DSA-enabled
	 * master network devices since the bridge layer rx_handler prevents
	 * the DSA fake ethertype handler to be invoked, so we do not strip off
	 * the DSA switch tag protocol header and the bridge layer just return
	 * RX_HANDLER_CONSUMED, stopping RX processing for these frames.
	 */
	/*非以太网设备不能绑定到桥。
	环回地址、地址长度不为6、非法以太网地址、使用了dsa的设备都不能添加到桥*/
	if ((dev->flags & IFF_LOOPBACK) ||
	    dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN ||
	    !is_valid_ether_addr(dev->dev_addr) ||
	    netdev_uses_dsa(dev))
		return -EINVAL;

	/* No bridging of bridges */
	/*网桥不能添加到网桥*/
	if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit)
		return -ELOOP;

	/* Device is already being bridged */
	/*设备已经被添加到一个网桥中,一个设备只能添加到一个网桥*/
	if (br_port_exists(dev))
		return -EBUSY;

	/* No bridging devices that dislike that (e.g. wireless) */
	/*设置了IEF_DONT_BRIDGE不添加到网桥的设备,例如无线设备。*/
	if (dev->priv_flags & IFF_DONT_BRIDGE)
		return -EOPNOTSUPP;

	/*net_bridge_port创建网桥端口,进行相关初始化*/
	p = new_nbp(br, dev);
	if (IS_ERR(p))
		return PTR_ERR(p);

	call_netdevice_notifiers(NETDEV_JOIN, dev);

	err = dev_set_allmulti(dev, 1);
	if (err)
		goto put_back;

	err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
				   SYSFS_BRIDGE_PORT_ATTR);
	if (err)
		goto err1;

	err = br_sysfs_addif(p);
	if (err)
		goto err2;

	err = br_netpoll_enable(p);
	if (err)
		goto err3;

	/*这一步很重要,注册桥上设备的rx_handler为br_handle_frame函数,
	在netif_receive_skb()函数中会使用*/
	err = netdev_rx_handler_register(dev, br_handle_frame, p);
	if (err)
		goto err4;

	/*设备的priv_flag参数添加网桥端口属性IFF_BRIDGE_PORT*/
	dev->priv_flags |= IFF_BRIDGE_PORT;

	err = netdev_master_upper_dev_link(dev, br->dev);
	if (err)
		goto err5;
	
	/*关闭LRO,large receive offload*/
	dev_disable_lro(dev);
	
	/*网桥端口添加到网桥的port_list列表*/
	list_add_rcu(&p->list, &br->port_list);

	nbp_update_port_count(br);
	
	//从新设置br->dev->feature字段
	netdev_update_features(br->dev);

	if (br->dev->needed_headroom < dev->needed_headroom)
		br->dev->needed_headroom = dev->needed_headroom;

	/*插入本地设备MAC-端口转发fdb项,通过函数br_fdb_insert插入的fdb表项的is_local和is_static都是1*/
	if (br_fdb_insert(br, p, dev->dev_addr, 0))
		netdev_err(dev, "failed insert local address bridge forwarding table\n");
	
	/*初始化网桥端口的VLAN配置,如果Bridge设备有“Default PVID",就将默认PVID设置为端口的PVID并且Untag。*/
	if (nbp_vlan_init(p))
		netdev_err(dev, "failed to initialize vlan filtering on this port\n");

	spin_lock_bh(&br->lock);
	
	/*重新给网桥分配MAC地址以及网桥ID,因为刚开始建立网桥时MAC地址是随机分配的,当给网桥添加端口后,
	可以把添加进来的设备的MAC地址作为网桥的MAC地址。*/
	changed_addr = br_stp_recalculate_bridge_id(br);

	//如果网桥端口设备是UP的,就使能它,设置状态等(如果STP没打开就没有这些步骤了)。
	if (netif_running(dev) && netif_oper_up(dev) &&
	    (br->dev->flags & IFF_UP))
		br_stp_enable_port(p);
	spin_unlock_bh(&br->lock);

	br_ifinfo_notify(RTM_NEWLINK, p);

	/*改变了MAC地址,通知别人*/
	if (changed_addr)
		call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);

	/*设置网桥的MTU为所有端口的MTU的最小值*/
	dev_set_mtu(br->dev, br_min_mtu(br));

	kobject_uevent(&p->kobj, KOBJ_ADD);

	return 0;

err5:
	dev->priv_flags &= ~IFF_BRIDGE_PORT;
	netdev_rx_handler_unregister(dev);
err4:
	br_netpoll_disable(p);
err3:
	sysfs_remove_link(br->ifobj, p->dev->name);
err2:
	kobject_put(&p->kobj);
	p = NULL; /* kobject_put frees */
err1:
	dev_set_allmulti(dev, -1);
put_back:
	dev_put(dev);
	kfree(p);
	return err;
}

(1). net_nbp()函数

创建网桥端口函数,net_bridge_port结构初始化。

/* called with RTNL but without bridge lock */
static struct net_bridge_port *new_nbp(struct net_bridge *br,
				       struct net_device *dev)
{
	int index;
	struct net_bridge_port *p;

	index = find_portno(br);
	if (index < 0)
		return ERR_PTR(index);

	p = kzalloc(sizeof(*p), GFP_KERNEL);
	if (p == NULL)
		return ERR_PTR(-ENOMEM);

	p->br = br;
	dev_hold(dev);
	p->dev = dev;
	p->path_cost = port_cost(dev);
	p->priority = 0x8000 >> BR_PORT_BITS;
	p->port_no = index;
	p->flags = BR_LEARNING | BR_FLOOD;
	br_init_port(p);
	br_set_state(p, BR_STATE_DISABLED);
	br_stp_port_timer_init(p);
	br_multicast_add_port(p);

	return p;
}

(2). netdev_rx_handler_register()

注册设备的rx_handler。

/**
 *	netdev_rx_handler_register - register receive handler
 *	@dev: device to register a handler for
 *	@rx_handler: receive handler to register
 *	@rx_handler_data: data pointer that is used by rx handler
 *
 *	Register a receive handler for a device. This handler will then be
 *	called from __netif_receive_skb. A negative errno code is returned
 *	on a failure.
 *
 *	The caller must hold the rtnl_mutex.
 *
 *	For a general description of rx_handler, see enum rx_handler_result.
 */
int netdev_rx_handler_register(struct net_device *dev,
			       rx_handler_func_t *rx_handler,
			       void *rx_handler_data)
{
	ASSERT_RTNL();

	if (dev->rx_handler)
		return -EBUSY;

	/* Note: rx_handler_data must be set before rx_handler */
	/*rx_handler_data指向网桥端口,rx_handler指向br_handle_frame*/
	rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
	rcu_assign_pointer(dev->rx_handler, rx_handler);

	return 0;
}

(3). 关于br_stp_recalculate_bridge_id(br)

https://www.cnblogs.com/3me-linux/p/6566750.html

重新计算网桥MAC,Bridge ID

当一个网桥设备(不是端口设备)刚刚创建的时候,其MAC地址是随机的(见 br_dev_setup,旧实现是空MAC),这也会影响网桥ID(Prio+MAC),没有端口时网桥ID的MAC部分为0。当有个设备作为其端口后,是个合适的机会重新为网桥选一个MAC,并重新计算网桥ID。前提是如果这个端口的

MAC合适的话,例如不是0,长度是48Bits,并且值比原来的小(STP中ID小好事,因为其他因素一样的情况下MAC愈小ID愈小,优先级就越高),就用这个端口的MAC。

/* called under bridge lock */
bool br_stp_recalculate_bridge_id(struct net_bridge *br)
{
	const unsigned char *br_mac_zero =
			(const unsigned char *)br_mac_zero_aligned;
	const unsigned char *addr = br_mac_zero;
	struct net_bridge_port *p;

	/* user has chosen a value so keep it */
	/*这个NET_ADDR_SET什么情况设置?*/
	if (br->dev->addr_assign_type == NET_ADDR_SET)
		return false;

	/*遍历网桥端口,把最小的MAC地址赋值给addr。*/
	list_for_each_entry(p, &br->port_list, list) {
		if (addr == br_mac_zero ||
		    memcmp(p->dev->dev_addr, addr, ETH_ALEN) < 0)
			addr = p->dev->dev_addr;

	}

	/*地址没有发生改变*/
	if (ether_addr_equal(br->bridge_id.addr, addr))
		return false;	/* no change */

	/*改变网桥MAC, 网桥ID*/
	br_stp_change_bridge_id(br, addr);
	return true;
}

(6). br_del_if()

删除网桥端口,从新设置网桥MTU,更新网桥MAC、网桥ID(可能)。

/* called with RTNL */
int br_del_if(struct net_bridge *br, struct net_device *dev)
{
	struct net_bridge_port *p;
	bool changed_addr;

	/*根据dev获取对应网桥端口*/
	p = br_port_get_rtnl(dev);
	
	/*端口不存在或者端口不属于这个网桥*/
	if (!p || p->br != br)
		return -EINVAL;

	/* Since more than one interface can be attached to a bridge,
	 * there still maybe an alternate path for netconsole to use;
	 * therefore there is no reason for a NETDEV_RELEASE event.
	 */
	/*删除网桥端口*/
	del_nbp(p);

	/*重新设置网桥的MTU*/
	dev_set_mtu(br->dev, br_min_mtu(br));

	spin_lock_bh(&br->lock);
	
	/*重新计算网桥MAC和网桥ID*/
	changed_addr = br_stp_recalculate_bridge_id(br);
	spin_unlock_bh(&br->lock);

	/*如果网桥地址发生改变,调用netdevice_notifiers,这个通知链还不懂。*/
	if (changed_addr)
		call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
	
	//从新设置br->dev->feature字段。
	netdev_update_features(br->dev);

	return 0;
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值