前面已经分析了,将接口添进网桥时,用户空间调用ioctl(br_socket_fd, SIOCBRADDIF, &ifr) 注意到在void br_dev_setup(struct net_device *dev)中已经对dev->do_ioctl进行了赋值,即: dev->do_ioctl = br_dev_ioctl 进行ioctl进行访问的时候,进入到br_dev_ioctl: (net/brige/br_ioctl.c) int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { struct net_bridge *br = netdev_priv(dev); switch(cmd) { case SIOCDEVPRIVATE: return old_dev_ioctl(dev, rq, cmd); //添加一个接口 case SIOCBRADDIF: //删除一个接口 case SIOCBRDELIF: return add_del_if(br, rq->ifr_ifindex, cmd == SIOCBRADDIF); } pr_debug("Bridge does not support ioctl 0x%x\n", cmd); return -EOPNOTSUPP; } 我们在用户空间使用的标志是SIOCBRADDIF。所以流程进入add_del_if() static int add_del_if(struct net_bridge *br, int ifindex, int isadd) { struct net_device *dev; int ret; if (!capable(CAP_NET_ADMIN)) return -EPERM; dev = dev_get_by_index(ifindex); if (dev == NULL) return -EINVAL; if (isadd) ret = br_add_if(br, dev); else ret = br_del_if(br, dev); dev_put(dev); return ret; } 因为cmd == SIOCBRADDIF为真,所以调用br_add_if(): int br_add_if(struct net_bridge *br, struct net_device *dev) (net/brige/br_if.c)) { struct net_bridge_port *p; int err = 0; //回环。或者非以及网接口 if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER) return -EINVAL; //构造数据包函数为网桥类型 if (dev->hard_start_xmit == br_dev_xmit) return -ELOOP; //此接口已经存在于网桥 if (dev->br_port != NULL) return -EBUSY; //为dev 创建网桥接口.dev->br_port。指向所属网桥端口 //dev->br_port->br:指向它所属的网桥 //为该接口创建net_bridge_port if (IS_ERR(p = new_nbp(br, dev, br_initial_port_cost(dev)))) return PTR_ERR(p); //更新port->MAC对应表 if ((err = br_fdb_insert(br, p, dev->dev_addr, 1))) destroy_nbp(p); else if ((err = br_sysfs_addif(p))) del_nbp(p); else { //设置接口为混杂模式 dev_set_promiscuity(dev, 1); //将p->list更新至br->port_list中 list_add_rcu(&p->list, &br->port_list); spin_lock_bh(&br->lock); br_stp_recalculate_bridge_id(br); if ((br->dev->flags & IFF_UP) && (dev->flags & IFF_UP) && netif_carrier_ok(dev)) br_stp_enable_port(p); spin_unlock_bh(&br->lock); dev_set_mtu(br->dev, br_min_mtu(br)); } return err; } 为接口创建net_bridge_port的函数为new_nbp。这个函数比较简单: static struct net_bridge_port *new_nbp(struct net_bridge *br, struct net_device *dev, unsigned long cost) { int index; struct net_bridge_port *p; index = find_portno(br); if (index return ERR_PTR(index); p = kmalloc(sizeof(*p), GFP_KERNEL); if (p == NULL) return ERR_PTR(-ENOMEM); memset(p, 0, sizeof(*p)); p->br = br; dev_hold(dev); p->dev = dev; p->path_cost = cost; p->priority = 0x8000 >> BR_PORT_BITS; dev->br_port = p; p->port_no = index; br_init_port(p); p->state = BR_STATE_DISABLED; kobject_init(&p->kobj); return p; } 之后,把要加入的接口对应的mac与接口作为本机静态项加入到prot—mac对应表。这是在br_fdb_insert()中实现的 int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr, int is_local) { int ret; spin_lock_bh(&br->hash_lock); ret = fdb_insert(br, source, addr, is_local); spin_unlock_bh(&br->hash_lock); return ret; } 操作存在异步性,在插入之前加锁。具体的插入在fdb_insert中实现 static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr, int is_local) { struct hlist_node *h; struct net_bridge_fdb_entry *fdb; int hash = br_mac_hash(addr); //判断是否为有效的mac 地址 if (!is_valid_ether_addr(addr)) return -EADDRNOTAVAIL; hlist_for_each_entry(fdb, h, &br->hash[hash], hlist) { //如果表中已经包含了此项 if (!memcmp(fdb->addr.addr, addr, ETH_ALEN)) { //如果为本机MAC /* attempt to update an entry for a local interface */ if (fdb->is_local) { /* it is okay to have multiple ports with same * address, just don't allow to be spoofed. */ if (is_local) return 0; if (net_ratelimit()) printk(KERN_WARNING "%s: received packet with " " own address as source address\n", source->dev->name); return -EEXIST; } //如果添加的是本机IP if (is_local) { printk(KERN_WARNING "%s adding interface with same address " "as a received packet\n", source->dev->name); goto update; } //如果添加的是静态MAC //则不更新相关的信息 if (fdb->is_static) return 0; /* move to end of age list */ list_del(&fdb->u.age_list); goto update; } } fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC); if (!fdb) return ENOMEM; memcpy(fdb->addr.addr, addr, ETH_ALEN); atomic_set(&fdb->use_count, 1); hlist_add_head_rcu(&fdb->hlist, &br->hash[hash]); if (!timer_pending(&br->gc_timer)) { br->gc_timer.expires = jiffies + hold_time(br); add_timer(&br->gc_timer); } update: fdb->dst = source; fdb->is_local = is_local; fdb->is_static = is_local; fdb->ageing_timer = jiffies; if (!is_local) list_add_tail(&fdb->u.age_list, &br->age_list); return 0; } 此函数先判断要插入项是否存在,若是已存在,且不为静态项,具更新对应项。若不存在该项,则分配一个net_bridge_fdb_entry,插入到CAM表 先来分析一下net_bridge_port的结构: struct net_bridge_port { //当前端口所在的briage struct net_bridge *br; //此端口对应的物理端口 struct net_device *dev; //同一桥内的端口链表? struct list_head list; /* STP */ u8 priority; u8 state; u16 port_no; unsigned char topology_change_ack; unsigned char config_pending; port_id port_id; port_id designated_port; bridge_id designated_root; bridge_id designated_bridge; u32 path_cost; u32 designated_cost; struct timer_list forward_delay_timer; struct timer_list hold_timer; struct timer_list message_age_timer; struct kobject kobj; struct rcu_head rcu; }; 对应的net_bridge_fdb_entry结构: //CAM表中对应的数据结构 struct net_bridge_fdb_entry { //用于CAM表连接的链表指针 struct hlist_node hlist; //此项对应的物理出口 struct net_bridge_port *dst; union { struct list_head age_list; struct rcu_head rcu; } u; //此项的当前的引用计数 atomic_t use_count; //超时时间 unsigned long ageing_timer; //MAC地址 mac_addr addr; //是否为主机地址 unsigned char is_local; //是否为静态地址 unsigned char is_static; }; struct net_bridge_port { //当前端口所在的briage struct net_bridge *br; //此端口对应的物理端口 struct net_device *dev; //同一桥内的端口链表? struct list_head list; /* STP */ u8 priority; u8 state; u16 port_no; unsigned char topology_change_ack; unsigned char config_pending; port_id port_id; port_id designated_port; bridge_id designated_root; bridge_id designated_bridge; u32 path_cost; u32 designated_cost; struct timer_list forward_delay_timer; struct timer_list hold_timer; struct timer_list message_age_timer; struct kobject kobj; struct rcu_head rcu; }; 这样,就往桥中添加了一个接口,从上图中可以反应出接口与桥之间的关系。我们可以用brctl show指令看到当前所有的桥,以及桥里相应的接口。用ifconfig br0可以看当前桥的状态,如果细心一点可以看到,bro已经有了对应的MAC。这是怎么来的呢? 桥MAC地址的更新: 注意到在br_add_if中调用了函数br_stp_recalculate_bridge_id() 在上面的代码分析中,为了简化分析,把stp的相关流程忽略掉了,现在我们看下这个函数做了些什么 /* called under bridge lock */ void br_stp_recalculate_bridge_id(struct net_bridge *br) { const unsigned char *addr = br_mac_zero; struct net_bridge_port *p; //遍历桥中所有的端口 list_for_each_entry(p, &br->port_list, list) { //取所有接口中MAC的最少值 if (addr == br_mac_zero || memcmp(p->dev->dev_addr, addr, ETH_ALEN) addr = p->dev->dev_addr; } //如果不与现在桥的MAC相同 if (memcmp(br->bridge_id.addr, addr, ETH_ALEN)) br_stp_change_bridge_id(br, addr); } 这个函数比较简单,它就是遍历桥对应的所有接口,然后取最小的MAC。然后判断最小MAC跟现在的MAC是否相同 继续跟踪br_stp_change_bridge_id static void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr) { unsigned char oldaddr[6]; struct net_bridge_port *p; int wasroot; wasroot = br_is_root_bridge(br); memcpy(oldaddr, br->bridge_id.addr, ETH_ALEN); memcpy(br->bridge_id.addr, addr, ETH_ALEN); //注意到这里,呵呵,桥的MAC更新了 memcpy(br->dev->dev_addr, addr, ETH_ALEN); list_for_each_entry(p, &br->port_list, list) { if (!memcmp(p->designated_bridge.addr, oldaddr, ETH_ALEN)) memcpy(p->designated_bridge.addr, addr, ETH_ALEN); if (!memcmp(p->designated_root.addr, oldaddr, ETH_ALEN)) memcpy(p->designated_root.addr, addr, ETH_ALEN); } br_configuration_update(br); br_port_state_selection(br); if (br_is_root_bridge(br) && !wasroot) br_become_root_bridge(br); } 看到上面的注释了吧,桥的MAC就是在这里得到更新的,所以,桥的MAC地址取得是所有接口中的最小值 网桥对接收数据的处理: 回到本章的开始的handle_bridge函数,会调用br_handle_frame_hook进行接收数据的处理 在网桥的初始化代码中,把br_handle_frame_hook赋值为了br_handle_frame 没错,这就是网桥的处理函数。跟进个函数 nt br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb) { struct sk_buff *skb = *pskb; //目的mac地址 const unsigned char *dest = eth_hdr(skb)->h_dest; //端口禁用 if (p->state == BR_STATE_DISABLED) goto err; //源mac 为多播或者广播,丢弃 //FF.XX.XX.XX.XX.XX形式 if (eth_hdr(skb)->h_source[0] & 1) goto err; //如果状态为学习或者转发,则学习源mac 更新CAM 表 if (p->state == BR_STATE_LEARNING || p->state == BR_STATE_FORWARDING) // br_fdb_insert函数我们在前面已经分析过了 br_fdb_insert(p->br, p, eth_hdr(skb)->h_source, 0); //stp 的处理,stp-enabled 是否启用stp 协议 //bridge_ula stp使用的多播mac地址 if (p->br->stp_enabled && !memcmp(dest, bridge_ula, 5) && !(dest[5] & 0xF0)) { if (!dest[5]) { NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, NULL, br_stp_handle_bpdu); return 1; } } else if (p->state == BR_STATE_FORWARDING) { //在初始化中,并末对br_should_route_hook进行赋值 //所以br_should_route_hook为假 if (br_should_route_hook) { if (br_should_route_hook(pskb)) return 0; skb = *pskb; dest = eth_hdr(skb)->h_dest; } //目的地址与桥地址相同。则传与上层处理 //置skb->pkt_type = PACKET_HOST if (!memcmp(p->br->dev->dev_addr, dest, ETH_ALEN)) skb->pkt_type = PACKET_HOST; //网桥在NF_BR_PRE_ROUTING点上的netfiter处理 NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, br_handle_frame_finish); return 1; } err: kfree_skb(skb); return 1; } 在这个函数里,进行相关的入口判断之后,会把当前数据包的源MAC与接口对应更新到CAM表中,更新函数br_fdb_insert()在前面已经分析过了,不太明白的可以倒过去看下,不过注意了,这是不是做为静态项插入的。 接着判断包是不是传给本机的,如果是,则置包的pkt_type为PACKET_HOST 关于NF_HOOK()宏,我们在以后的netfiter中有专题分析。这是我们只要知道,正常的数据包会流进br_handle_frame_finish()进行处理 /* note: already called with rcu_read_lock (preempt_disabled) */ int br_handle_frame_finish(struct sk_buff *skb) { //取得目的MAC地址 const unsigned char *dest = eth_hdr(skb)->h_dest; struct net_bridge_port *p = skb->dev->br_port; struct net_bridge *br = p->br; struct net_bridge_fdb_entry *dst; int passedup = 0; //混杂模式 /*如果网桥的虚拟网卡处于混杂模式,那么每个接收到的数据包都需要克隆一份 送到AF_PACKET协议处理体(网络软中断函数net_rx_action中ptype_all链的处理)。*/ if (br->dev->flags & IFF_PROMISC) { struct sk_buff *skb2; skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2 != NULL) { passedup = 1; br_pass_frame_up(br, skb2); } } //目的mac 为多播或者广播,则需要传至上层进行处理 //passedup为传送标志,为1 时表示已经上传过了 if (dest[0] & 1) { br_flood_forward(br, skb, !passedup); if (!passedup) br_pass_frame_up(br, skb); goto out; } //查询CAM 表 dst = __br_fdb_get(br, dest); //到本机的? 传至上层协议处理 if (dst != NULL && dst->is_local) { if (!passedup) br_pass_frame_up(br, skb); else kfree_skb(skb); goto out; } //不是本机的数据,则转发 if (dst != NULL) { br_forward(dst->dst, skb); goto out; } //如果查询不到,在其它端口上都发送此包 br_flood_forward(br, skb, 0); out: return 0; } 在这里函数里,通过查找CAM表,取得发送端口,如果当前CAM表里没有到目的MAC的端口,则在其它端口上都发送此数据包。 在这个函数里,我们看到,查询CAM表的函数为:__br_fdb_get() 接着分析一下此函数 struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, const unsigned char *addr) { struct hlist_node *h; struct net_bridge_fdb_entry *fdb; //遍历对应MAC哈希项中的fdb hlist_for_each_entry_rcu(fdb, h, &br->hash[br_mac_hash(addr)], hlist) { if (!memcmp(fdb->addr.addr, addr, ETH_ALEN)) { if (unlikely(has_expired(br, fdb))) break; return fdb; } } return NULL; } 这个函数非常容易,首先取得目的MAC对应的哈希项。然后再遍历里面的数据,查看是否含有目的地址的项。如果是送给本机的数据包,则传至上层协议,如不是,则需要转发。关于上层怎么处理,以及如何转发。 |
linux协议栈之网桥实现之二
最新推荐文章于 2021-05-13 11:18:04 发布