深入理解Linux网络技术内幕 第16章 桥接-Linux实现

网桥设备抽象

网桥对Linux来说是虚拟设备,我们需要把一个或者多个真实设备绑定到网桥设备上,否则无法接收和传输报文。
当创建一个网桥时,必须告诉内核这个网桥绑定了那些接口。比如建立一个网桥br0,然后把eth0和eth1指派给br0,。此时eth0和eth1是网桥接口,它们不需要配置IP地址,可以把IP信息指定给网桥设备。
前面章节提到设备上传输报文要调用dev_queue_xmit执行,dev_queue_xmit函数会调用驱动程序的hard_start_xmit函数,网桥驱动程序使用这个函数查询要转发的数据库,选出正确的设备,如果查找失败就在网桥绑定的网卡上扩散报文。

桥接程序的初始化

桥接功能在br_init函数中初始化。初始化主要做以下事情:

  • 函数首先调用stp_proto_register注册生成树协议。
  • 初始化转发数据缓存结构net_bridge_fdb_entry。
  • 注册netdeivce通知链
  • 调用register_pernet_subsys函数注册网络子空间模块
  • 调用br_netlink_init函数初始化NETLINK相关功能。

static int __init br_init(void)
{
	int err;

	BUILD_BUG_ON(sizeof(struct br_input_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));

	err = stp_proto_register(&br_stp_proto);
	if (err < 0) {
		pr_err("bridge: can't register sap for STP\n");
		return err;
	}

	err = br_fdb_init();
	if (err)
		goto err_out;

	err = register_pernet_subsys(&br_net_ops);
	if (err)
		goto err_out1;

	err = br_nf_core_init();
	if (err)
		goto err_out2;

	err = register_netdevice_notifier(&br_device_notifier);
	if (err)
		goto err_out3;

	err = register_switchdev_notifier(&br_switchdev_notifier);
	if (err)
		goto err_out4;

	err = br_netlink_init();
	if (err)
		goto err_out5;

	brioctl_set(br_ioctl_deviceless_stub);

#if IS_ENABLED(CONFIG_ATM_LANE)
	br_fdb_test_addr_hook = br_fdb_test_addr;
#endif

#if IS_MODULE(CONFIG_BRIDGE_NETFILTER)
	pr_info("bridge: filtering via arp/ip/ip6tables is no longer available "
		"by default. Update your scripts to load br_netfilter if you "
		"need this.\n");
#endif

	return 0;

err_out5:
	unregister_switchdev_notifier(&br_switchdev_notifier);
err_out4:
	unregister_netdevice_notifier(&br_device_notifier);
err_out3:
	br_nf_core_fini();
err_out2:
	unregister_pernet_subsys(&br_net_ops);
err_out1:
	br_fdb_fini();
err_out:
	stp_proto_unregister(&br_stp_proto);
	return err;
}

建立一个新网桥设备

br_add_bridge函数负责建立一个网桥设备。
br_del_bridge函数负责删除指定网桥设备。
br_add_if函数负责为网桥设备添加端口。
br_del_if函数负责为网桥设备删除端口。

网桥设备是虚拟设备,初始化会多一些额外的初始化工作。下面分析br_add_bridge函数代码:

  • 使用alloc_netdev分配网络设备描述符,分配该结构时传入参数私有参数的长度是sizeof(struct net_bridge),同时传入专有的br_dev_setup函数对net_device相关字段初始化。
  • 使用register_netdev函数向内核注册网络设备。

int br_add_bridge(struct net *net, const char *name)
{
	struct net_device *dev;
	int res;

	dev = alloc_netdev(sizeof(struct net_bridge), name, NET_NAME_UNKNOWN,
			   br_dev_setup);

	if (!dev)
		return -ENOMEM;

	dev_net_set(dev, net);
	dev->rtnl_link_ops = &br_link_ops;

	res = register_netdev(dev);
	if (res)
		free_netdev(dev);
	return res;
}

注册后网桥设备核心数据结构组织成下图所示结构:
在这里插入图片描述
前边提到使用网桥设备在注册时使用br_dev_setup函数作为参数传递给alloc_netdev函数,所以br_dev_setup函数在申请netdevice_dev时被调用。
br_dev_setup函数调用ether_setup函数初始化相关字段。
将网卡的操作函数初始化为br_netdev_ops,包括启动关闭设备,报文发送和接收等。
设置网桥设备标记IFF_EBRIDGE | IFF_NO_QUEUE,即网桥默认没有队列机制。

void br_dev_setup(struct net_device *dev)
{
	struct net_bridge *br = netdev_priv(dev);

	eth_hw_addr_random(dev);
	ether_setup(dev);

	dev->netdev_ops = &br_netdev_ops;
	dev->needs_free_netdev = true;
	dev->ethtool_ops = &br_ethtool_ops;
	SET_NETDEV_DEVTYPE(dev, &br_type);
	dev->priv_flags = IFF_EBRIDGE | IFF_NO_QUEUE;

接下来初始化端口列表和转发数据库,并设置网桥ID。

	br->dev = dev;
	spin_lock_init(&br->lock);
	INIT_LIST_HEAD(&br->port_list);
	INIT_HLIST_HEAD(&br->fdb_list);
	spin_lock_init(&br->hash_lock);

	br->bridge_id.prio[0] = 0x80;
	br->bridge_id.prio[1] = 0x00;

最后初始化br的MAC地址。并设置网桥的一些超时参数。
调用br_stp_timer_init初始化生成树协议定时器。
为br_fdb_cleanup函数初始化tasklet,用于清理转发数据库过期数据。

	ether_addr_copy(br->group_addr, eth_stp_addr);

	br->stp_enabled = BR_NO_STP;
	br->group_fwd_mask = BR_GROUPFWD_DEFAULT;
	br->group_fwd_mask_required = BR_GROUPFWD_DEFAULT;

	br->designated_root = br->bridge_id;
	br->bridge_max_age = br->max_age = 20 * HZ;
	br->bridge_hello_time = br->hello_time = 2 * HZ;
	br->bridge_forward_delay = br->forward_delay = 15 * HZ;
	br->bridge_ageing_time = br->ageing_time = BR_DEFAULT_AGEING_TIME;
	dev->max_mtu = ETH_MAX_MTU;

	br_netfilter_rtable_init(br);
	br_stp_timer_init(br);
	br_multicast_init(br);
	INIT_DELAYED_WORK(&br->gc_work, br_fdb_cleanup);

给网桥添加端口

给网桥添加端口使用br_add_if函数实现,这个函数首先对要添加到网桥的网口设备进行合法性检查。

/* called with RTNL */
int br_add_if(struct net_bridge *br, struct net_device *dev,
	      struct netlink_ext_ack *extack)
{
	struct net_bridge_port *p;
	int err = 0;
	unsigned br_hr, dev_hr;
	bool changed_addr;

	/* Don't allow bridging non-ethernet like devices, or DSA-enabled
	 * master network devices since the bridge layer rx_handler prevents
	 * the DSA fake ethertype handler to be invoked, so we do not strip off
	 * the DSA switch tag protocol header and the bridge layer just return
	 * RX_HANDLER_CONSUMED, stopping RX processing for these frames.
	 */
	if ((dev->flags & IFF_LOOPBACK) ||
	    dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN ||
	    !is_valid_ether_addr(dev->dev_addr) ||
	    netdev_uses_dsa(dev))
		return -EINVAL;

	/* No bridging of bridges */
	if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) {
		NL_SET_ERR_MSG(extack,
			       "Can not enslave a bridge to a bridge");
		return -ELOOP;
	}

	/* Device has master upper dev */
	if (netdev_master_upper_dev_get(dev))
		return -EBUSY;

	/* No bridging devices that dislike that (e.g. wireless) */
	if (dev->priv_flags & IFF_DONT_BRIDGE) {
		NL_SET_ERR_MSG(extack,
			       "Device does not allow enslaving to a bridge");
		return -EOPNOTSUPP;
	}

接下来使用new_nbp函数为网桥分配新端口描述符,并进行部分初始化。
调用call_netdevice_notifiers向netdevice_chain发送消息。

	p = new_nbp(br, dev);
	if (IS_ERR(p))
		return PTR_ERR(p);

	call_netdevice_notifiers(NETDEV_JOIN, dev);

该函数接下来的部分主要是注册sysfs和procfs接口。
和新端口相关设备的MAC会被br_fdb_insert加入到转发数据库中。

启动网桥设备

br_dev_open函数负责开启网桥设备,该函数主要负责:

  • 重新计算设备的特性。
  • netif_start_queue函数负责开启网卡传输队列启动数据传输。
  • br_stp_enable_bridge函数启动网桥设备。在该函数中会调用br_stp_enable_port函数启动之前绑定到这个设备上的端口。
void br_stp_enable_bridge(struct net_bridge *br)
{
	struct net_bridge_port *p;

	spin_lock_bh(&br->lock);
	if (br->stp_enabled == BR_KERNEL_STP)
		mod_timer(&br->hello_timer, jiffies + br->hello_time);
	mod_delayed_work(system_long_wq, &br->gc_work, HZ / 10);

	br_config_bpdu_generation(br);

	list_for_each_entry(p, &br->port_list, list) {
		if (netif_running(p->dev) && netif_oper_up(p->dev))
			br_stp_enable_port(p);

	}
	spin_unlock_bh(&br->lock);
}
static int br_dev_open(struct net_device *dev)
{
	struct net_bridge *br = netdev_priv(dev);

	netdev_update_features(dev);
	netif_start_queue(dev);
	br_stp_enable_bridge(br);
	br_multicast_open(br);

	return 0;
}

启动网桥端口

启用网桥端口需要满足下列条件:

  • 被绑定的设备已经能启动。
  • 被绑定的设备有检测到载波信号。
  • 相关网桥设备已经启动。

从上文中的网桥端口需要满足条件可以得出启动网桥端口动作会在上述三个事件发生时调用。
br_stp_enable_port函数用来启动网桥端口。该函数负责以下事情:

  • 调用br_init_port初始化端口函数,该函数会计算端口ID、分配指定角色、初始化端口定时器。
  • 调用br_port_state_selection函数更新所有端口的状态。
/* called under bridge lock */
void br_stp_enable_port(struct net_bridge_port *p)
{
	br_init_port(p);
	br_port_state_selection(p->br);
	br_ifinfo_notify(RTM_NEWLINK, NULL, p);
}

当一个端口启动时,首先做初始化,然后用br_port_state_selection函数为该端口指定合适的状态。该函数遍历所有网桥端口,把适当状态应用到每个端口上。对于没有运行STP的网桥,该函数把新端口置位BR_STATE_FORWARDING状态。


/* called under bridge lock */
void br_port_state_selection(struct net_bridge *br)
{
	struct net_bridge_port *p;
	unsigned int liveports = 0;

	list_for_each_entry(p, &br->port_list, list) {
		if (p->state == BR_STATE_DISABLED)
			continue;

		/* Don't change port states if userspace is handling STP */
		if (br->stp_enabled != BR_USER_STP) {
			if (p->port_no == br->root_port) {
				p->config_pending = 0;
				p->topology_change_ack = 0;
				br_make_forwarding(p);
			} else if (br_is_designated_port(p)) {
				del_timer(&p->message_age_timer);
				br_make_forwarding(p);
			} else {
				p->config_pending = 0;
				p->topology_change_ack = 0;
				br_make_blocking(p);
			}
		}

		if (p->state != BR_STATE_BLOCKING)
			br_multicast_enable_port(p);
		/* Multicast is not disabled for the port when it goes in
		 * blocking state because the timers will expire and stop by
		 * themselves without sending more queries.
		 */
		if (p->state == BR_STATE_FORWARDING)
			++liveports;
	}

	if (liveports == 0)
		netif_carrier_off(br->dev);
	else
		netif_carrier_on(br->dev);
}

改变端口状态

网桥端口从BLOCK状态到Forward状态需要经过中间状态。br_make_forwarding函数负责该状态转换,可以看到该函数中如果启用了STP协议不会直接将端口状态设置为BR_STATE_FORWARDING,而是先将接口设置为BR_STATE_LISTENING,之后设置为BR_STATE_LEARNING,最后设置为BR_STATE_FORWARDING状态。如果没有开启STP协议,就将接口直接设置为BR_STATE_FORWARDING状态。

static void br_make_forwarding(struct net_bridge_port *p)
{
	struct net_bridge *br = p->br;

	if (p->state != BR_STATE_BLOCKING)
		return;

	if (br->stp_enabled == BR_NO_STP || br->forward_delay == 0) {
		br_set_state(p, BR_STATE_FORWARDING);
		br_topology_change_detection(br);
		del_timer(&p->forward_delay_timer);
	} else if (br->stp_enabled == BR_KERNEL_STP)
		br_set_state(p, BR_STATE_LISTENING);
	else
		br_set_state(p, BR_STATE_LEARNING);

	br_ifinfo_notify(RTM_NEWLINK, NULL, p);

	if (br->forward_delay != 0)
		mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay);
}

转发数据库

每个网桥都有自己的转发数据库,无论STP是否开启都会用到。
转发数据库数据结构在net_bridge数据结构的fdb_hash_tbl成员,被组织成一个hash表。对于网桥任何端口上学习到的MAC地址都会在该数据库中插入一个net_bridge_fdb_entry结构。

初始化

转发数据库的初始化是创建net_bridge_fdb_entry缓存。

int __init br_fdb_init(void)
{
	br_fdb_cache = kmem_cache_create("bridge_fdb_cache",
					 sizeof(struct net_bridge_fdb_entry),
					 0,
					 SLAB_HWCACHE_ALIGN, NULL);
	if (!br_fdb_cache)
		return -ENOMEM;

	return 0;
}

分配

net_bridge_fdb_entry结构的分配是通过fdb_create函数完成的,这个函数会使用kmem_cache_alloc函数申请一个net_bridge_fdb_entry结构并初始化一些字段。

static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br,
					       struct net_bridge_port *source,
					       const unsigned char *addr,
					       __u16 vid,
					       unsigned char is_local,
					       unsigned char is_static)
{
	struct net_bridge_fdb_entry *fdb;

	fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC);
	if (fdb) {
		memcpy(fdb->key.addr.addr, addr, ETH_ALEN);
		fdb->dst = source;
		fdb->key.vlan_id = vid;
		fdb->is_local = is_local;
		fdb->is_static = is_static;
		fdb->added_by_user = 0;
		fdb->added_by_external_learn = 0;
		fdb->offloaded = 0;
		fdb->updated = fdb->used = jiffies;
		if (rhashtable_lookup_insert_fast(&br->fdb_hash_tbl,
						  &fdb->rhnode,
						  br_fdb_rht_params)) {
			kmem_cache_free(br_fdb_cache, fdb);
			fdb = NULL;
		} else {
			hlist_add_head_rcu(&fdb->fdb_node, &br->fdb_list);
		}
	}
	return fdb;
}

查询

查询就是通过报文的源MAC地址进行查询。br_fdb_find函数实现这个功能。

static struct net_bridge_fdb_entry *fdb_find_rcu(struct rhashtable *tbl,
						 const unsigned char *addr,
						 __u16 vid)
{
	struct net_bridge_fdb_key key;

	WARN_ON_ONCE(!rcu_read_lock_held());

	key.vlan_id = vid;
	memcpy(key.addr.addr, addr, sizeof(key.addr.addr));

	return rhashtable_lookup(tbl, &key, br_fdb_rht_params);
}
static struct net_bridge_fdb_entry *br_fdb_find(struct net_bridge *br,
						const unsigned char *addr,
						__u16 vid)
{
	struct net_bridge_fdb_entry *fdb;

	lockdep_assert_held_once(&br->hash_lock);

	rcu_read_lock();
	fdb = fdb_find_rcu(&br->fdb_hash_tbl, addr, vid);
	rcu_read_unlock();

	return fdb;
}

老化

每个网桥设备都有都有一个垃圾回收定时器定期扫描转发数据库,然后把过期数据删除掉。处理过期数据任务由br_fdb_cleanup完成。

void br_fdb_cleanup(struct work_struct *work)
{
	struct net_bridge *br = container_of(work, struct net_bridge,
					     gc_work.work);
	struct net_bridge_fdb_entry *f = NULL;
	unsigned long delay = hold_time(br);
	unsigned long work_delay = delay;
	unsigned long now = jiffies;

	/* this part is tricky, in order to avoid blocking learning and
	 * consequently forwarding, we rely on rcu to delete objects with
	 * delayed freeing allowing us to continue traversing
	 */
	rcu_read_lock();
	hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
		unsigned long this_timer;

		if (f->is_static || f->added_by_external_learn)
			continue;
		this_timer = f->updated + delay;
		if (time_after(this_timer, now)) {
			work_delay = min(work_delay, this_timer - now);
		} else {
			spin_lock_bh(&br->hash_lock);
			if (!hlist_unhashed(&f->fdb_node))
				fdb_delete(br, f, true);
			spin_unlock_bh(&br->hash_lock);
		}
	}
	rcu_read_unlock();

	/* Cleanup minimum 10 milliseconds apart */
	work_delay = max_t(unsigned long, work_delay, msecs_to_jiffies(10));
	mod_delayed_work(system_long_wq, &br->gc_work, work_delay);
}

处理入口流量

前面提到的网桥设备添加接口使用br_add_if函数,在这个函数中调用netdev_rx_handler_register函数中设置net_device结构的rx_handler成员,如代码所示:

int netdev_rx_handler_register(struct net_device *dev,
			       rx_handler_func_t *rx_handler,
			       void *rx_handler_data)
{
	if (netdev_is_rx_handler_busy(dev))
		return -EBUSY;

	if (dev->priv_flags & IFF_NO_RX_HANDLER)
		return -EINVAL;

	/* Note: rx_handler_data must be set before rx_handler */
	rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
	rcu_assign_pointer(dev->rx_handler, rx_handler);

	return 0;
}
int br_add_if(struct net_bridge *br, struct net_device *dev,
	      struct netlink_ext_ack *extack)
{
	...
	err = netdev_rx_handler_register(dev, br_handle_frame, p);
	if (err)
		goto err4;
```...
}

前边章节提到在网卡接收到报文后调用__netif_receive_skb_core函数,在这个函数中判断rx_handler是否不为空,不为空的话就调用这个函数将报文传递过去,相当于调用br_handle_frame函数。
```c
static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
				    struct packet_type **ppt_prev)
{
	...
	rx_handler = rcu_dereference(skb->dev->rx_handler);
	if (rx_handler) {
		if (pt_prev) {
			ret = deliver_skb(skb, pt_prev, orig_dev);
			pt_prev = NULL;
		}
		switch (rx_handler(&skb)) {
		case RX_HANDLER_CONSUMED:
			ret = NET_RX_SUCCESS;
			goto out;
		case RX_HANDLER_ANOTHER:
			goto another_round;
		case RX_HANDLER_EXACT:
			deliver_exact = true;
		case RX_HANDLER_PASS:
			break;
		default:
			BUG();
		}
	}
	...
}

数据报文处理

br_handle_frame函数对报文进行进行检验,通过后调用br_handle_frame_finish函数处理报文。
该函数将报文的源MAC地址会调用br_fdb_update函数更新转发数据库。
如果该网口处于学习学习状态就将报文丢掉。
否则根据是否可以在数据库中查到信息,决定使用br_forward将报文转发到正确端口,或者使用br_flood函数将报文扩散到所有转发端口。

br_handle_frame_finish函数在下列条件下使用br_pass_frame_up函数提交到本地:

  • 网桥接口处于混杂模式。
  • 扩散帧
  • 目的MAC属于本地的一个接口。
    在br_pass_frame_up函数中将sbk->dev字段设置为报文进入网卡的net_device,然后再次调用br_netif_receive_skb函数将报文再次送回网络协议栈。
static int br_pass_frame_up(struct sk_buff *skb)
{
	struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
	struct net_bridge *br = netdev_priv(brdev);
	struct net_bridge_vlan_group *vg;
	struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats);

	u64_stats_update_begin(&brstats->syncp);
	brstats->rx_packets++;
	brstats->rx_bytes += skb->len;
	u64_stats_update_end(&brstats->syncp);

	vg = br_vlan_group_rcu(br);
	/* Bridge is just like any other port.  Make sure the
	 * packet is allowed except in promisc modue when someone
	 * may be running packet capture.
	 */
	if (!(brdev->flags & IFF_PROMISC) &&
	    !br_allowed_egress(vg, skb)) {
		kfree_skb(skb);
		return NET_RX_DROP;
	}

	indev = skb->dev;
	skb->dev = brdev;
	skb = br_handle_vlan(br, NULL, vg, skb);
	if (!skb)
		return NET_RX_DROP;
	/* update the multicast stats if the packet is IGMP/MLD */
	br_multicast_count(br, NULL, skb, br_multicast_igmp_type(skb),
			   BR_MCAST_DIR_TX);

	return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
		       dev_net(indev), NULL, skb, indev, NULL,
		       br_netif_receive_skb);
}

网桥设备传输

网桥设备的ndo_start_xmit函数初始化为br_dev_xmit函数,这个函数负责将报文发送出去,在这个函数中实现了网桥传输逻辑。转发数据库查询到目的MAC地址信息时,就将报文从查出的端口发送出去。如果没有查询到信息就或者目的地址时广播多播MAC地址就扩散该报文。


/* net device transmit always called with BH disabled */
netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
{
	struct net_bridge *br = netdev_priv(dev);
	struct net_bridge_fdb_entry *dst;
	struct net_bridge_mdb_entry *mdst;
	struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats);
	const struct nf_br_ops *nf_ops;
	const unsigned char *dest;
	struct ethhdr *eth;
	u16 vid = 0;

	rcu_read_lock();
	nf_ops = rcu_dereference(nf_br_ops);
	if (nf_ops && nf_ops->br_dev_xmit_hook(skb)) {
		rcu_read_unlock();
		return NETDEV_TX_OK;
	}

	u64_stats_update_begin(&brstats->syncp);
	brstats->tx_packets++;
	brstats->tx_bytes += skb->len;
	u64_stats_update_end(&brstats->syncp);

	br_switchdev_frame_unmark(skb);
	BR_INPUT_SKB_CB(skb)->brdev = dev;

	skb_reset_mac_header(skb);
	eth = eth_hdr(skb);
	skb_pull(skb, ETH_HLEN);

	if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid))
		goto out;

	if (IS_ENABLED(CONFIG_INET) &&
	    (eth->h_proto == htons(ETH_P_ARP) ||
	     eth->h_proto == htons(ETH_P_RARP)) &&
	    br->neigh_suppress_enabled) {
		br_do_proxy_suppress_arp(skb, br, vid, NULL);
	} else if (IS_ENABLED(CONFIG_IPV6) &&
		   skb->protocol == htons(ETH_P_IPV6) &&
		   br->neigh_suppress_enabled &&
		   pskb_may_pull(skb, sizeof(struct ipv6hdr) +
				 sizeof(struct nd_msg)) &&
		   ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
			struct nd_msg *msg, _msg;

			msg = br_is_nd_neigh_msg(skb, &_msg);
			if (msg)
				br_do_suppress_nd(skb, br, vid, NULL, msg);
	}

	dest = eth_hdr(skb)->h_dest;
	if (is_broadcast_ether_addr(dest)) {
		br_flood(br, skb, BR_PKT_BROADCAST, false, true);
	} else if (is_multicast_ether_addr(dest)) {
		if (unlikely(netpoll_tx_running(dev))) {
			br_flood(br, skb, BR_PKT_MULTICAST, false, true);
			goto out;
		}
		if (br_multicast_rcv(br, NULL, skb, vid)) {
			kfree_skb(skb);
			goto out;
		}

		mdst = br_mdb_get(br, skb, vid);
		if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
		    br_multicast_querier_exists(br, eth_hdr(skb)))
			br_multicast_flood(mdst, skb, false, true);
		else
			br_flood(br, skb, BR_PKT_MULTICAST, false, true);
	} else if ((dst = br_fdb_find_rcu(br, dest, vid)) != NULL) {
		br_forward(dst->dst, skb, false, true);
	} else {
		br_flood(br, skb, BR_PKT_UNICAST, false, true);
	}
out:
	rcu_read_unlock();
	return NETDEV_TX_OK;
}

处理入口BPDU

br_stp_rcv函数在br_init初始化网桥模块时被注册到内核用于处理STP报文。
该函数中根据BPDU报文类型信息调用br_received_tcn_bpdu函数或者br_received_config_bpdu函数。

/*
 * Called from llc.
 *
 * NO locks, but rcu_read_lock
 */
void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb,
		struct net_device *dev)
{
	struct net_bridge_port *p;
	struct net_bridge *br;
	const unsigned char *buf;

	if (!pskb_may_pull(skb, 4))
		goto err;

	/* compare of protocol id and version */
	buf = skb->data;
	if (buf[0] != 0 || buf[1] != 0 || buf[2] != 0)
		goto err;

	p = br_port_get_check_rcu(dev);
	if (!p)
		goto err;

	br = p->br;
	spin_lock(&br->lock);

	if (br->stp_enabled != BR_KERNEL_STP)
		goto out;

	if (!(br->dev->flags & IFF_UP))
		goto out;

	if (p->state == BR_STATE_DISABLED)
		goto out;

	if (!ether_addr_equal(eth_hdr(skb)->h_dest, br->group_addr))
		goto out;

	if (p->flags & BR_BPDU_GUARD) {
		br_notice(br, "BPDU received on blocked port %u(%s)\n",
			  (unsigned int) p->port_no, p->dev->name);
		br_stp_disable_port(p);
		goto out;
	}

	buf = skb_pull(skb, 3);

	if (buf[0] == BPDU_TYPE_CONFIG) {
		struct br_config_bpdu bpdu;

		if (!pskb_may_pull(skb, 32))
			goto out;

		buf = skb->data;
		bpdu.topology_change = (buf[1] & 0x01) ? 1 : 0;
		bpdu.topology_change_ack = (buf[1] & 0x80) ? 1 : 0;
		...
		...
		bpdu.message_age = br_get_ticks(buf+24);
		bpdu.max_age = br_get_ticks(buf+26);
		bpdu.hello_time = br_get_ticks(buf+28);
		bpdu.forward_delay = br_get_ticks(buf+30);

		if (bpdu.message_age > bpdu.max_age) {
			if (net_ratelimit())
				br_notice(p->br,
					  "port %u config from %pM"
					  " (message_age %ul > max_age %ul)\n",
					  p->port_no,
					  eth_hdr(skb)->h_source,
					  bpdu.message_age, bpdu.max_age);
			goto out;
		}

		br_received_config_bpdu(p, &bpdu);
	} else if (buf[0] == BPDU_TYPE_TCN) {
		br_received_tcn_bpdu(p);
	}
 out:
	spin_unlock(&br->lock);
 err:
	kfree_skb(skb);
}

传输BPDU

  • void br_transmit_config(struct net_bridge_port *p)
    传输配置BPDU
  • void br_transmit_tcn(struct net_bridge *br)
    传输一个TCN BPDU
  • br_reply
    用一个配置BPDU应答一个BPDU。
  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值