桥接实现

绑定是指把真实设备绑定到(虚拟的)网桥设备的过程。

桥接设备抽象

image 桥接设备关键数据结构:

   1:  struct bridge_id
   2:  {
   3:      unsigned char    prio[2];
   4:      unsigned char    addr[6];
   5:  };

网桥ID,各个字段分别为优先级和MAC地址

   1:  struct net_bridge_port
   2:  {
   3:      struct net_bridge        *br;
   4:      struct net_device        *dev;
   5:      struct list_head        list;
   6:   
   7:      /* STP */
   8:      u8                priority;
   9:      u8                state;
  10:      u16                port_no;
  11:      unsigned char            topology_change_ack;
  12:      unsigned char            config_pending;
  13:      port_id                port_id;
  14:      port_id                designated_port;
  15:      bridge_id            designated_root;
  16:      bridge_id            designated_bridge;
  17:      u32                path_cost;
  18:      u32                designated_cost;
  19:      unsigned long            designated_age;
  20:   
  21:      struct timer_list        forward_delay_timer;
  22:      struct timer_list        hold_timer;
  23:      struct timer_list        message_age_timer;
  24:      struct kobject            kobj;
  25:      struct rcu_head            rcu;
  26:   
  27:      unsigned long             flags;
  28:  #define BR_HAIRPIN_MODE        0x00000001
  29:   
  30:  #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
  31:      u32                multicast_startup_queries_sent;
  32:      unsigned char            multicast_router;
  33:      struct timer_list        multicast_router_timer;
  34:      struct timer_list        multicast_query_timer;
  35:      struct hlist_head        mglist;
  36:      struct hlist_node        rlist;
  37:  #endif
  38:   
  39:  #ifdef CONFIG_SYSFS
  40:      char                sysfs_name[IFNAMSIZ];
  41:  #endif
  42:   
  43:  #ifdef CONFIG_NET_POLL_CONTROLLER
  44:      struct netpoll            *np;
  45:  #endif
  46:  };

网桥设备:

   1:  struct net_bridge
   2:  {
   3:      spinlock_t            lock;
   4:      struct list_head        port_list;//网桥设备的所有端口
   5:      struct net_device        *dev;//实际的网络设备,虚拟的
   6:   
   7:      struct br_cpu_netstats __percpu *stats;//网桥的统计信息
   8:      spinlock_t            hash_lock;
   9:      struct hlist_head        hash[BR_HASH_SIZE];
  10:  #ifdef CONFIG_BRIDGE_NETFILTER
  11:      struct rtable             fake_rtable;
  12:      bool                nf_call_iptables;
  13:      bool                nf_call_ip6tables;
  14:      bool                nf_call_arptables;
  15:  #endif
  16:      unsigned long            flags;
  17:  #define BR_SET_MAC_ADDR        0x00000001
  18:   
  19:      u16                group_fwd_mask;
  20:   
  21:      /* STP */
  22:      bridge_id            designated_root;//指定根
  23:      bridge_id            bridge_id;//网桥id
  24:      u32                root_path_cost;//根路径代价
  25:      unsigned long            max_age;//最大生存时间
  26:      unsigned long            hello_time;//hello 时间
  27:      unsigned long            forward_delay;//转发延时
  28:      unsigned long            bridge_max_age;
  29:      unsigned long            ageing_time;
  30:      unsigned long            bridge_hello_time;
  31:      unsigned long            bridge_forward_delay;
  32:   
  33:      u8                group_addr[ETH_ALEN];
  34:      u16                root_port;
  35:   
  36:      enum {
  37:          BR_NO_STP,         /* no spanning tree */
  38:          BR_KERNEL_STP,        /* old STP in kernel */
  39:          BR_USER_STP,        /* new RSTP in userspace */
  40:      } stp_enabled;
  41:   
  42:      unsigned char            topology_change;
  43:      unsigned char            topology_change_detected;
  44:   
  45:  #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
  46:      unsigned char            multicast_router;
  47:   
  48:      u8                multicast_disabled:1;
  49:   
  50:      u32                hash_elasticity;
  51:      u32                hash_max;
  52:   
  53:      u32                multicast_last_member_count;
  54:      u32                multicast_startup_queries_sent;
  55:      u32                multicast_startup_query_count;
  56:   
  57:      unsigned long            multicast_last_member_interval;
  58:      unsigned long            multicast_membership_interval;
  59:      unsigned long            multicast_querier_interval;
  60:      unsigned long            multicast_query_interval;
  61:      unsigned long            multicast_query_response_interval;
  62:      unsigned long            multicast_startup_query_interval;
  63:   
  64:      spinlock_t            multicast_lock;
  65:      struct net_bridge_mdb_htable __rcu *mdb;
  66:      struct hlist_head        router_list;
  67:   
  68:      struct timer_list        multicast_router_timer;//各种定时器
  69:      struct timer_list        multicast_querier_timer;
  70:      struct timer_list        multicast_query_timer;
  71:  #endif
  72:   
  73:      struct timer_list        hello_timer;
  74:      struct timer_list        tcn_timer;
  75:      struct timer_list        topology_change_timer;
  76:      struct timer_list        gc_timer;
  77:      struct kobject            *ifobj;
  78:  };

网桥端口,

   1:  struct net_bridge_port
   2:  {
   3:      struct net_bridge        *br;//端口所属的网桥设备
   4:      struct net_device        *dev;//端口所指代的网卡设备
   5:      struct list_head        list;//同一网桥的端口,在同一条链上
   6:   
   7:      /* STP */
   8:      u8                priority;
   9:      u8                state;
  10:      u16                port_no;
  11:      unsigned char            topology_change_ack;
  12:      unsigned char            config_pending;
  13:      port_id                port_id;//端口ID
  14:      port_id                designated_port;//指定端口
  15:      bridge_id            designated_root;//根网桥
  16:      bridge_id            designated_bridge;//指定网桥
  17:      u32                path_cost;
  18:      u32                designated_cost;
  19:      unsigned long            designated_age;
  20:   
  21:      struct timer_list        forward_delay_timer;//计时器
  22:      struct timer_list        hold_timer;
  23:      struct timer_list        message_age_timer;
  24:      struct kobject            kobj;
  25:      struct rcu_head            rcu;
  26:   
  27:      unsigned long             flags;
  28:  #define BR_HAIRPIN_MODE        0x00000001
  29:   
  30:  #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
  31:      u32                multicast_startup_queries_sent;
  32:      unsigned char            multicast_router;
  33:      struct timer_list        multicast_router_timer;
  34:      struct timer_list        multicast_query_timer;
  35:      struct hlist_head        mglist;
  36:      struct hlist_node        rlist;
  37:  #endif
  38:   
  39:  #ifdef CONFIG_SYSFS
  40:      char                sysfs_name[IFNAMSIZ];
  41:  #endif
  42:   
  43:  #ifdef CONFIG_NET_POLL_CONTROLLER
  44:      struct netpoll            *np;
  45:  #endif
  46:  };
 
   1:  struct net_bridge_fdb_entry
   2:  {
   3:      struct hlist_node        hlist;
   4:      struct net_bridge_port        *dst;
   5:   
   6:      struct rcu_head            rcu;
   7:      unsigned long            updated;
   8:      unsigned long            used;
   9:      mac_addr            addr;
  10:      unsigned char            is_local;
  11:      unsigned char            is_static;
  12:  };

转发数据库的数据项。

   1:  struct br_config_bpdu
   2:  {
   3:      unsigned    topology_change:1;
   4:      unsigned    topology_change_ack:1;
   5:      bridge_id    root;
   6:      int        root_path_cost;
   7:      bridge_id    bridge_id;
   8:      port_id        port_id;
   9:      int        message_age;
  10:      int        max_age;
  11:      int        hello_time;
  12:      int        forward_delay;
  13:  };

入口配置BPDU的一些关键字会复制到该数据结构中,此结构会将这些字段传递给处理配置BPDU的函数,而不是由原BPDU来传递。

所有数据结构的组织:

image

桥接程序的初始化:

   1:  static int __init br_init(void)
   2:  {
   3:      int err;
   4:   
   5:      err = stp_proto_register(&br_stp_proto);
   6:      if (err < 0) {
   7:          pr_err("bridge: can't register sap for STP\n");
   8:          return err;
   9:      }
  10:   
  11:      err = br_fdb_init();
  12:      if (err)
  13:          goto err_out;
  14:   
  15:      err = register_pernet_subsys(&br_net_ops);
  16:      if (err)
  17:          goto err_out1;
  18:   
  19:      err = br_netfilter_init();
  20:      if (err)
  21:          goto err_out2;
  22:   
  23:      err = register_netdevice_notifier(&br_device_notifier);
  24:      if (err)
  25:          goto err_out3;
  26:   
  27:      err = br_netlink_init();
  28:      if (err)
  29:          goto err_out4;
  30:   
  31:      brioctl_set(br_ioctl_deviceless_stub);
  32:   
  33:  #if IS_ENABLED(CONFIG_ATM_LANE)
  34:      br_fdb_test_addr_hook = br_fdb_test_addr;
  35:  #endif
  36:   
  37:      return 0;
  38:  err_out4:
  39:      unregister_netdevice_notifier(&br_device_notifier);
  40:  err_out3:
  41:      br_netfilter_fini();
  42:  err_out2:
  43:      unregister_pernet_subsys(&br_net_ops);
  44:  err_out1:
  45:      br_fdb_fini();
  46:  err_out:
  47:      stp_proto_unregister(&br_stp_proto);
  48:      return err;

49: }

 

桥接函数的退出函数,将init函数所注册的内容撤销

1: static void __exit br_deinit(void)

   2:  {
   3:      stp_proto_unregister(&br_stp_proto);
   4:   
   5:      br_netlink_fini();
   6:      unregister_netdevice_notifier(&br_device_notifier);
   7:      brioctl_set(NULL);
   8:   
   9:      unregister_pernet_subsys(&br_net_ops);
  10:   
  11:      rcu_barrier(); /* Wait for completion of call_rcu()'s */
  12:   
  13:      br_netfilter_fini();
  14:  #if IS_ENABLED(CONFIG_ATM_LANE)
  15:      br_fdb_test_addr_hook = NULL;
  16:  #endif
  17:   
  18:      br_fdb_fini();
  19:  }

 

其中,br_fdb_init是建立转发数据库,就是在内存中建立一块slab cache,以存放net_bridge_fdb_entry结构。

用netdev_chain通知链注册一个回调函数。

向网络命名空间添加网桥设备

   1:  int br_add_bridge(struct net *net, const char *name)
   2:  {
   3:      struct net_device *dev;
   4:      int res;
   5:   
   6:      dev = alloc_netdev(sizeof(struct net_bridge), name,
   7:                 br_dev_setup);
   8:   
   9:      if (!dev)
  10:          return -ENOMEM;
  11:   
  12:      dev_net_set(dev, net);
  13:   
  14:      res = register_netdev(dev);
  15:      if (res)
  16:          free_netdev(dev);
  17:      return res;
  18:  }

基本流程与添加其他真实网络设备相似,其中对netdevice priv变量的初始化使用br_dev_setup函数,其中对于STP协议初始化时的默认值也在此处进行了初始化的默认设置。

   1:  void br_dev_setup(struct net_device *dev)
   2:  {
   3:      struct net_bridge *br = netdev_priv(dev);
   4:   
   5:      random_ether_addr(dev->dev_addr);
   6:      ether_setup(dev);
   7:   
   8:      dev->netdev_ops = &br_netdev_ops;
   9:      dev->destructor = br_dev_free;
  10:      SET_ETHTOOL_OPS(dev, &br_ethtool_ops);
  11:      SET_NETDEV_DEVTYPE(dev, &br_type);
  12:      dev->tx_queue_len = 0;
  13:      dev->priv_flags = IFF_EBRIDGE;
  14:   
  15:      dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
  16:              NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | NETIF_F_LLTX |
  17:              NETIF_F_NETNS_LOCAL | NETIF_F_HW_VLAN_TX;
  18:      dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
  19:                 NETIF_F_GSO_MASK | NETIF_F_HW_CSUM |
  20:                 NETIF_F_HW_VLAN_TX;
  21:   
  22:      br->dev = dev;
  23:      spin_lock_init(&br->lock);
  24:      INIT_LIST_HEAD(&br->port_list);
  25:      spin_lock_init(&br->hash_lock);
  26:   
  27:      br->bridge_id.prio[0] = 0x80;
  28:      br->bridge_id.prio[1] = 0x00;//网桥默认优先级值 32768
  29:   
  30:      memcpy(br->group_addr, br_group_address, ETH_ALEN);
  31:   
  32:      br->stp_enabled = BR_NO_STP;
  33:      br->group_fwd_mask = BR_GROUPFWD_DEFAULT;
  34:   
  35:      br->designated_root = br->bridge_id;//第一次加电时,认为自己就是根端口
  36:      br->bridge_max_age = br->max_age = 20 * HZ;
  37:      br->bridge_hello_time = br->hello_time = 2 * HZ;
  38:      br->bridge_forward_delay = br->forward_delay = 15 * HZ;
  39:      br->ageing_time = 300 * HZ;//老化时间
  40:   
  41:      br_netfilter_rtable_init(br);
  42:      br_stp_timer_init(br);//初始化每个网桥的定时器
  43:      br_multicast_init(br);
  44:  }

网桥设备支持的操作:

   1:  static const struct net_device_ops br_netdev_ops = {
   2:      .ndo_open         = br_dev_open,
   3:      .ndo_stop         = br_dev_stop,
   4:      .ndo_init         = br_dev_init,
   5:      .ndo_start_xmit         = br_dev_xmit,
   6:      .ndo_get_stats64     = br_get_stats64,
   7:      .ndo_set_mac_address     = br_set_mac_address,
   8:      .ndo_set_rx_mode     = br_dev_set_multicast_list,
   9:      .ndo_change_mtu         = br_change_mtu,
  10:      .ndo_do_ioctl         = br_dev_ioctl,
  11:  #ifdef CONFIG_NET_POLL_CONTROLLER
  12:      .ndo_netpoll_setup     = br_netpoll_setup,
  13:      .ndo_netpoll_cleanup     = br_netpoll_cleanup,
  14:      .ndo_poll_controller     = br_poll_controller,
  15:  #endif
  16:      .ndo_add_slave         = br_add_slave,
  17:      .ndo_del_slave         = br_del_slave,
  18:      .ndo_fix_features        = br_fix_features,
  19:  };

网桥设备默认是没有实现队列机制,而是让被绑定的设备负责实现,所以tx_queue_len值被设定为0.

当网桥设备上的MTU改变时,内核必须确保新MTU值不会大于那些被绑定的设备中最小的MTU值。这一点由br_chang_mtu函数来确保。

   1:  static int br_change_mtu(struct net_device *dev, int new_mtu)
   2:  {
   3:      struct net_bridge *br = netdev_priv(dev);
   4:      if (new_mtu < 68 || new_mtu > br_min_mtu(br))
   5:          return -EINVAL;
   6:   
   7:      dev->mtu = new_mtu;
   8:   
   9:  #ifdef CONFIG_BRIDGE_NETFILTER
  10:      /* remember the MTU in the rtable for PMTU */
  11:      dst_metric_set(&br->fake_rtable.dst, RTAX_MTU, new_mtu);
  12:  #endif
  13:   
  14:      return 0;
  15:  }
当网桥设备因管理手段被开启或关闭时,内核会分别通过dev_open和dev_close调用br_dev_open和br_dev_close来启动和关闭网桥。

删除网桥:

   1:  int br_del_bridge(struct net *net, const char *name)
   2:  {
   3:      struct net_device *dev;
   4:      int ret = 0;
   5:   
   6:      rtnl_lock();
   7:      dev = __dev_get_by_name(net, name);
   8:      if (dev == NULL)
   9:          ret =  -ENXIO;     /* Could not find device */
  10:   
  11:      else if (!(dev->priv_flags & IFF_EBRIDGE)) {
  12:          /* Attempt to delete non bridge device! */
  13:          ret = -EPERM;
  14:      }
  15:   
  16:      else if (dev->flags & IFF_UP) {
  17:          /* Not shutdown yet. */
  18:          ret = -EBUSY;
  19:      }
  20:   
  21:      else
  22:          br_dev_delete(dev, NULL);
  23:   
  24:      rtnl_unlock();
  25:      return ret;
  26:  }

删除网桥设备时,首先判断设备是否存在,然后判断是否为网桥设备,最后判断该网桥是否关闭,最后调用br_dev_delete删除。

   1:  /* Delete bridge device */
   2:  void br_dev_delete(struct net_device *dev, struct list_head *head)
   3:  {
   4:      struct net_bridge *br = netdev_priv(dev);
   5:      struct net_bridge_port *p, *n;
   6:   
   7:      list_for_each_entry_safe(p, n, &br->port_list, list) {
   8:          del_nbp(p);
   9:      }
  10:   
  11:      del_timer_sync(&br->gc_timer);
  12:   
  13:      br_sysfs_delbr(br->dev);
  14:      unregister_netdevice_queue(br->dev, head);
  15:  }

该函数首先会删除网桥的全部端口。对于每个网桥端口来说,也会删除/sys目录中相关的链接(这些链接是目录),然后停止垃圾收集定时器。br_sysfs_delbr删除/sys/class/net目录中的网桥设备子目录。最后调用unregister_netdevice_queue从内核注销该设备。

给网桥添加端口

   1:  /* called with RTNL */
   2:  int br_add_if(struct net_bridge *br, struct net_device *dev)
   3:  {
   4:      struct net_bridge_port *p;
   5:      int err = 0;
   6:      bool changed_addr;
   7:   
   8:      /* Don't allow bridging non-ethernet like devices */
   9:      if ((dev->flags & IFF_LOOPBACK) ||
  10:          dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN ||
  11:          !is_valid_ether_addr(dev->dev_addr))
  12:          return -EINVAL;
  13:   
  14:      /* No bridging of bridges */
  15:      if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit)
  16:          return -ELOOP;
  17:   
  18:      /* Device is already being bridged */
  19:      if (br_port_exists(dev))
  20:          return -EBUSY;
  21:   
  22:      /* No bridging devices that dislike that (e.g. wireless) */
  23:      if (dev->priv_flags & IFF_DONT_BRIDGE)
  24:          return -EOPNOTSUPP;
  25:   
  26:      p = new_nbp(br, dev);
  27:      if (IS_ERR(p))
  28:          return PTR_ERR(p);
  29:   
  30:      call_netdevice_notifiers(NETDEV_JOIN, dev);
  31:   
  32:      err = dev_set_promiscuity(dev, 1);
  33:      if (err)
  34:          goto put_back;
  35:   
  36:      err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
  37:                     SYSFS_BRIDGE_PORT_ATTR);
  38:      if (err)
  39:          goto err1;
  40:   
  41:      err = br_sysfs_addif(p);
  42:      if (err)
  43:          goto err2;
  44:   
  45:      if (br_netpoll_info(br) && ((err = br_netpoll_enable(p))))
  46:          goto err3;
  47:   
  48:      err = netdev_set_master(dev, br->dev);
  49:      if (err)
  50:          goto err3;
  51:   
  52:      err = netdev_rx_handler_register(dev, br_handle_frame, p);
  53:      if (err)
  54:          goto err4;
  55:   
  56:      dev->priv_flags |= IFF_BRIDGE_PORT;
  57:   
  58:      dev_disable_lro(dev);
  59:   
  60:      list_add_rcu(&p->list, &br->port_list);
  61:   
  62:      netdev_update_features(br->dev);
  63:   
  64:      spin_lock_bh(&br->lock);
  65:      changed_addr = br_stp_recalculate_bridge_id(br);
  66:   
  67:      if ((dev->flags & IFF_UP) && netif_carrier_ok(dev) &&
  68:          (br->dev->flags & IFF_UP))
  69:          br_stp_enable_port(p);
  70:      spin_unlock_bh(&br->lock);
  71:   
  72:      br_ifinfo_notify(RTM_NEWLINK, p);
  73:   
  74:      if (changed_addr)
  75:          call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
  76:   
  77:      dev_set_mtu(br->dev, br_min_mtu(br));
  78:   
  79:      if (br_fdb_insert(br, p, dev->dev_addr))
  80:          netdev_err(dev, "failed insert local address bridge forwarding table\n");
  81:   
  82:      kobject_uevent(&p->kobj, KOBJ_ADD);
  83:   
  84:      return 0;
  85:   
  86:  err4:
  87:      netdev_set_master(dev, NULL);
  88:  err3:
  89:      sysfs_remove_link(br->ifobj, p->dev->name);
  90:  err2:
  91:      kobject_put(&p->kobj);
  92:      p = NULL; /* kobject_put frees */
  93:  err1:
  94:      dev_set_promiscuity(dev, -1);
  95:  put_back:
  96:      dev_put(dev);
  97:      kfree(p);
  98:      return err;
  99:  }

以下几类设备无法添加为网桥端口:不是ethernet设备,要添加的设备本身是网桥,该网桥端口已指派给一个设备。

当这些检查通过之后,就可以分配新网桥端口,并用new_bbp做部分初始化。

   1:  static struct net_bridge_port *new_nbp(struct net_bridge *br,
   2:                         struct net_device *dev)
   3:  {
   4:      int index;
   5:      struct net_bridge_port *p;
   6:   
   7:      index = find_portno(br);
   8:      if (index < 0)
   9:          return ERR_PTR(index);
  10:   
  11:      p = kzalloc(sizeof(*p), GFP_KERNEL);
  12:      if (p == NULL)
  13:          return ERR_PTR(-ENOMEM);
  14:   
  15:      p->br = br;
  16:      dev_hold(dev);
  17:      p->dev = dev;
  18:      p->path_cost = port_cost(dev);
  19:      p->priority = 0x8000 >> BR_PORT_BITS;
  20:      p->port_no = index;
  21:      p->flags = 0;
  22:      br_init_port(p);
  23:      p->state = BR_STATE_DISABLED;
  24:      br_stp_port_timer_init(p);
  25:      br_multicast_add_port(p);
  26:   
  27:      return p;
  28:  }

其中为端口生成一个id的实现:

   1:  static int find_portno(struct net_bridge *br)
   2:  {
   3:      int index;
   4:      struct net_bridge_port *p;
   5:      unsigned long *inuse;
   6:   
   7:      inuse = kcalloc(BITS_TO_LONGS(BR_MAX_PORTS), sizeof(unsigned long),
   8:              GFP_KERNEL);
   9:      if (!inuse)
  10:          return -ENOMEM;
  11:   
  12:      set_bit(0, inuse);    /* zero is reserved */
  13:      list_for_each_entry(p, &br->port_list, list) {
  14:          set_bit(p->port_no, inuse);
  15:      }
  16:      index = find_first_zero_bit(inuse, BR_MAX_PORTS);
  17:      kfree(inuse);
  18:   
  19:      return (index >= BR_MAX_PORTS) ? -EXFULL : index;
  20:  }

该函数的实现是使用了bitmap机制,首先生成一个响应的bitmap向量,然后遍历网桥设备的所有端口,将已在使用的端口为置零,然后找出第一个未使用的端口号。然后设置端口的默认优先级,br_make_port_id结合端口号和优先权计算端口ID。端口的默认开销值是根据绑定设备的传输设备设置的。最后初始化计时器。与该网桥端口相关的NIC会通过dev_set_promiscuity函数进入混杂模式。混杂模式可以捕获所有LAN网络数据,而且当网桥转发帧时,也需要进入混杂模式。对每个网桥端口,都有一个计数器而不是一个布尔标识来保存该模式,这样内核就能够处理进入混杂模式的嵌套请求(不断进入混杂模式)。当一个网桥端口上进入混杂模式时(dev_set_promiscuity实现),在相关绑定的设备上的计算器就会增加;当混杂模式关闭时,该计数器就会递减。最后这个新的端口就会被加入到网桥的端口列表中。

删除网桥接口

   1:  /* called with RTNL */
   2:  int br_del_if(struct net_bridge *br, struct net_device *dev)
   3:  {
   4:      struct net_bridge_port *p;
   5:      bool changed_addr;
   6:   
   7:      p = br_port_get_rtnl(dev);
   8:      if (!p || p->br != br)
   9:          return -EINVAL;
  10:   
  11:      del_nbp(p);
  12:   
  13:      spin_lock_bh(&br->lock);
  14:      changed_addr = br_stp_recalculate_bridge_id(br);
  15:      spin_unlock_bh(&br->lock);
  16:   
  17:      if (changed_addr)
  18:          call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
  19:   
  20:      netdev_update_features(br->dev);
  21:   
  22:      return 0;
  23:  }
   1:  /* Delete port(interface) from bridge is done in two steps.
   2:   * via RCU. First step, marks device as down. That deletes
   3:   * all the timers and stops new packets from flowing through.
   4:   *
   5:   * Final cleanup doesn't occur until after all CPU's finished
   6:   * processing packets.
   7:   *
   8:   * Protected from multiple admin operations by RTNL mutex
   9:   */
  10:  static void del_nbp(struct net_bridge_port *p)
  11:  {
  12:      struct net_bridge *br = p->br;
  13:      struct net_device *dev = p->dev;
  14:   
  15:      sysfs_remove_link(br->ifobj, p->dev->name);
  16:   
  17:      dev_set_promiscuity(dev, -1);
  18:   
  19:      spin_lock_bh(&br->lock);
  20:      br_stp_disable_port(p);
  21:      spin_unlock_bh(&br->lock);
  22:   
  23:      br_ifinfo_notify(RTM_DELLINK, p);
  24:   
  25:      br_fdb_delete_by_port(br, p, 1);
  26:   
  27:      list_del_rcu(&p->list);
  28:   
  29:      dev->priv_flags &= ~IFF_BRIDGE_PORT;
  30:   
  31:      netdev_rx_handler_unregister(dev);
  32:      synchronize_net();
  33:   
  34:      netdev_set_master(dev, NULL);
  35:   
  36:      br_multicast_del_port(p);
  37:   
  38:      kobject_uevent(&p->kobj, KOBJ_REMOVE);
  39:      kobject_del(&p->kobj);
  40:   
  41:      br_netpoll_disable(p);
  42:   
  43:      call_rcu(&p->rcu, destroy_nbp_rcu);
  44:  }

启动和关闭网桥设备

   1:  static int br_dev_open(struct net_device *dev)
   2:  {
   3:      struct net_bridge *br = netdev_priv(dev);
   4:   
   5:      netdev_update_features(dev);
   6:      netif_start_queue(dev);
   7:      br_stp_enable_bridge(br);
   8:      br_multicast_open(br);
   9:   
  10:      return 0;
  11:  }

网桥启动函数的执行步骤如下:

netdev_update_features将网络设备的基本特征初始化为其绑定的设备所支持的功能的最小常用子集。

用netif_start_queue函数启动设备进行数据传输

用br_stp_enable_bridge函数启动网桥设备

启动和关闭网桥端口

要启动网桥端口,必须满足下列所有条件:

  • 被绑定的相关设备已用管理手段启动
  • 被绑定的相关设备有载波状态
  • 相关的网桥设备已用管理手段启动。网桥设备上没有载波状态,因为网桥是虚拟设备。

当网桥端口是以用户空间命令建立并且先前三项条件都满足时,该网桥端口就可立即启用了。假设当端口建立时,由于上述三项必要条件中至少有一项不满足,因而无法启用端口。下面是每项条件最终满足时启用端口的场合:

  • 当被关闭的网桥设备启动时,其所有关闭的端口就会启动
  • 当被绑定的设备检测到载波状态时,桥接程序会接收到NETDEV_CHANGE通知消息。
  • 当被关掉的绑定设备重新启动时,桥接程序会受到NETDEV_UP通知消息。

当开头三项条件之一不再满足时,网桥端口就会被关闭。

   1:  void br_stp_enable_port(struct net_bridge_port *p)
   2:  {
   3:      br_init_port(p);
   4:      br_port_state_selection(p->br);
   5:      br_log_state(p);
   6:      br_ifinfo_notify(RTM_NEWLINK, p);
   7:  }
   1:  void br_stp_disable_port(struct net_bridge_port *p)
   2:  {
   3:      struct net_bridge *br = p->br;
   4:      int wasroot;
   5:   
   6:      br_log_state(p);
   7:   
   8:      wasroot = br_is_root_bridge(br);
   9:      br_become_designated_port(p);
  10:      p->state = BR_STATE_DISABLED;
  11:      p->topology_change_ack = 0;
  12:      p->config_pending = 0;
  13:   
  14:      br_ifinfo_notify(RTM_NEWLINK, p);
  15:   
  16:      del_timer(&p->message_age_timer);
  17:      del_timer(&p->forward_delay_timer);
  18:      del_timer(&p->hold_timer);
  19:   
  20:      br_fdb_delete_by_port(br, p, 0);
  21:      br_multicast_disable_port(p);
  22:   
  23:      br_configuration_update(br);
  24:   
  25:      br_port_state_selection(br);
  26:   
  27:      if (br_is_root_bridge(br) && !wasroot)
  28:          br_become_root_brid

改变网桥端口状态

网桥端口不是处于活跃就是处于不活跃,相关的状态是BR_STATE_FORWARDING或BR_STATE_BLOCKING。不过,虽然BR_STATE_BLOCKING态可以被立刻指派给端口,但是,只有在经历了中间状态之后,才会转变为BR_STATE_FORWARDING状态。两个状态的的设置,是通过br_make_forwarding和br_make_blocking函数进行的。使用这两个函数时用管端口所属的网桥设备是否运行STP。

   1:  static void br_make_blocking(struct net_bridge_port *p)
   2:  {
   3:      if (p->state != BR_STATE_DISABLED &&
   4:          p->state != BR_STATE_BLOCKING) {
   5:          if (p->state == BR_STATE_FORWARDING ||
   6:              p->state == BR_STATE_LEARNING)
   7:              br_topology_change_detection(p->br);
   8:   
   9:          p->state = BR_STATE_BLOCKING;
  10:          br_log_state(p);
  11:          br_ifinfo_notify(RTM_NEWLINK, p);
  12:   
  13:          del_timer(&p->forward_delay_timer);
  14:      }
  15:  }
   1:  static void br_make_forwarding(struct net_bridge_port *p)
   2:  {
   3:      struct net_bridge *br = p->br;
   4:   
   5:      if (p->state != BR_STATE_BLOCKING)
   6:          return;
   7:   
   8:      if (br->stp_enabled == BR_NO_STP || br->forward_delay == 0) {
   9:          p->state = BR_STATE_FORWARDING;
  10:          br_topology_change_detection(br);
  11:          del_timer(&p->forward_delay_timer);
  12:      } else if (br->stp_enabled == BR_KERNEL_STP)
  13:          p->state = BR_STATE_LISTENING;
  14:      else
  15:          p->state = BR_STATE_LEARNING;
  16:   
  17:      br_multicast_enable_port(p);
  18:      br_log_state(p);
  19:      br_ifinfo_notify(RTM_NEWLINK, p);
  20:   
  21:      if (br->forward_delay != 0)
  22:          mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay);
  23:  }

不能把br_state_blocking和br_state_forwarding间的任何中间状态指派给端口,这也是为什么如果要将不是处于br_state_blocking态的端口变成为br_state_forwarding态时,br_make_forwarding函数就不执行任何动作的原因。中间状态就预示着该端口已在前往br_state_forwarding态的路上,且在适当的定时器到期时就会到达那个状态。

转发数据库

每个网桥实例都有自己的转发数据库,无论STP是开启还是关闭都会用到。

   1:  int __init br_fdb_init(void)
   2:  {
   3:      br_fdb_cache = kmem_cache_create("bridge_fdb_cache",
   4:                       sizeof(struct net_bridge_fdb_entry),
   5:                       0,
   6:                       SLAB_HWCACHE_ALIGN, NULL);
   7:      if (!br_fdb_cache)
   8:          return -ENOMEM;
   9:   
  10:      get_random_bytes(&fdb_salt, sizeof(fdb_salt));
  11:      return 0;
  12:  }

网桥数据库子系统是由该函数初始化的,它简单的创建br_fdb_cache缓存,用于分配net_bridge_fdb_entry实例。

   1:  static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
   2:                             struct net_bridge_port *source,
   3:                             const unsigned char *addr)
   4:  {
   5:      struct net_bridge_fdb_entry *fdb;
   6:   
   7:      fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC);
   8:      if (fdb) {
   9:          memcpy(fdb->addr.addr, addr, ETH_ALEN);
  10:          fdb->dst = source;
  11:          fdb->is_local = 0;
  12:          fdb->is_static = 0;
  13:          fdb->updated = fdb->used = jiffies;
  14:          hlist_add_head_rcu(&fdb->hlist, head);
  15:      }
  16:      return fdb;
  17:  }

每个具体的entry是有fdb_create负责分配的,同时初始化一些相应的字段。

转发数据库中的记录是通过MAC地址来标识的。查询这个表包括用br_mac_hash选出正确的hash表bucket,以及浏览bucket中的net_bridge_fdb_entry实例列表,找出一个和指定MAC地址相匹配的数据项。

   1:  static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head,
   2:                           const unsigned char *addr)
   3:  {
   4:      struct hlist_node *h;
   5:      struct net_bridge_fdb_entry *fdb;
   6:   
   7:      hlist_for_each_entry(fdb, h, head, hlist) {
   8:          if (!compare_ether_addr(fdb->addr.addr, addr))
   9:              return fdb;
  10:      }
  11:      return NULL;
  12:  }

该函数就是对给定的MAC地址简单搜索net_bridge_fdb_entry,且它不能用于转发数据流量。它主要是被桥接管理函数使用。

其中compare_ether_addr函数使用如下方式实现:

   1:  static inline unsigned compare_ether_addr(const u8 *addr1, const u8 *addr2)
   2:  {
   3:      const u16 *a = (const u16 *) addr1;
   4:      const u16 *b = (const u16 *) addr2;
   5:   
   6:      BUILD_BUG_ON(ETH_ALEN != 6);
   7:      return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0;
   8:  }
   1:  struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
   2:                        const unsigned char *addr)
   3:  {
   4:      struct hlist_node *h;
   5:      struct net_bridge_fdb_entry *fdb;
   6:   
   7:      hlist_for_each_entry_rcu(fdb, h, &br->hash[br_mac_hash(addr)], hlist) {
   8:          if (!compare_ether_addr(fdb->addr.addr, addr)) {
   9:              if (
  10:                  break;
  11:              return fdb;
  12:          }
  13:      }
  14:   
  15:      return NULL;
  16:  }

与fdb_find类似,由桥接程序调用来转发流量。它不考虑过期的数据项。对于此两个函数来说,必须在适当的时候考虑上锁。

添加、更新及删除数据项

   1:  static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
   2:            const unsigned char *addr)
   3:  {
   4:      struct hlist_head *head = &br->hash[br_mac_hash(addr)];
   5:      struct net_bridge_fdb_entry *fdb;
   6:   
   7:      if (!is_valid_ether_addr(addr))
   8:          return -EINVAL;
   9:   
  10:      fdb = fdb_find(head, addr);
  11:      if (fdb) {
  12:          /* it is okay to have multiple ports with same
  13:           * address, just use the first one.
  14:           */
  15:          if (fdb->is_local)
  16:              return 0;
  17:          br_warn(br, "adding interface %s with same address "
  18:                 "as a received packet\n",
  19:                 source->dev->name);
  20:          fdb_delete(br, fdb);
  21:      }
  22:   
  23:      fdb = fdb_create(head, source, addr);
  24:      if (!fdb)
  25:          return -ENOMEM;
  26:   
  27:      fdb->is_local = fdb->is_static = 1;
  28:      fdb_notify(br, fdb, RTM_NEWNEIGH);
  29:      return 0;
  30:  }

在创建一个网桥端口时,br_add_if通过br_fdb_insert函数把绑定的设备的MAC地址添加到转发数据库。br_fdb_insert会忽略不应加到转发数据库的MAC地址。当新地址碰巧已在数据库中时,除非它对应于另一个本地接口(此时不做任何更新),否则就会被替换掉。转发数据库中的本地MAC地址允许桥接程序把要提交给本地接口的入口帧传至本地,因此桥接程序并不关心本地MAC和哪个接口相匹配。重要的是数据库中至少有一个数据项可告知桥接程序哪些网络流量需要提交至本地。对于可以添加到转发数据库的数据项的数目并没有硬性限制,因此,这可能会让系统遭遇DOS攻击。

通过入口帧学习到的MAC地址会由br_fdb_upate添加到数据库。当该地址已在数据库中时,如果需要则更新对入口端口(dst)的引用,并且更新最后一次更新的时间戳。

   1:  void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
   2:             const unsigned char *addr)
   3:  {
   4:      struct hlist_head *head = &br->hash[br_mac_hash(addr)];
   5:      struct net_bridge_fdb_entry *fdb;
   6:   
   7:      /* some users want to always flood. */
   8:      if (hold_time(br) == 0)
   9:          return;
  10:   
  11:      /* ignore packets unless we are using this port */
  12:      if (!(source->state == BR_STATE_LEARNING ||
  13:            source->state == BR_STATE_FORWARDING))
  14:          return;
  15:   
  16:      fdb = fdb_find_rcu(head, addr);
  17:      if (likely(fdb)) {
  18:          /* attempt to update an entry for a local interface */
  19:          if (unlikely(fdb->is_local)) {
  20:              if (net_ratelimit())
  21:                  br_warn(br, "received packet on %s with "
  22:                      "own address as source address\n",
  23:                      source->dev->name);
  24:          } else {
  25:              /* fastpath: update of existing entry */
  26:              fdb->dst = source;
  27:              fdb->updated = jiffies;
  28:          }
  29:      } else {
  30:          spin_lock(&br->hash_lock);
  31:          if (likely(!fdb_find(head, addr))) {
  32:              fdb = fdb_create(head, source, addr);
  33:              if (fdb)
  34:                  fdb_notify(br, fdb, RTM_NEWNEIGH);
  35:          }
  36:          /* else  we lose race and someone else inserts
  37:           * it first, don't bother updating
  38:           */
  39:          spin_unlock(&br->hash_lock);
  40:      }
  41:  }

老化

对于每个网桥实例来说,都有一个垃圾收集定时器(gc_timer)来定期扫描转发数据库,然后把过期的数据项删除。在网桥实例初始化时,该定时器会在br_stp_timer_init中被初始化,之后br_stp_enable_bridge启动网桥时,该定时器也就会启动。此定时器每十分之一秒到期一次,并调用br_fdb_cleanup做清理工作。该函数还会扫描数据库,用fdb_delete删除过期数据项。

   1:  void br_fdb_cleanup(unsigned long _data)
   2:  {
   3:      struct net_bridge *br = (struct net_bridge *)_data;
   4:      unsigned long delay = hold_time(br);
   5:      unsigned long next_timer = jiffies + br->ageing_time;
   6:      int i;
   7:   
   8:      spin_lock(&br->hash_lock);
   9:      for (i = 0; i < BR_HASH_SIZE; i++) {
  10:          struct net_bridge_fdb_entry *f;
  11:          struct hlist_node *h, *n;
  12:   
  13:          hlist_for_each_entry_safe(f, h, n, &br->hash[i], hlist) {
  14:              unsigned long this_timer;
  15:              if (f->is_static)
  16:                  continue;
  17:              this_timer = f->updated + delay;
  18:              if (time_before_eq(this_timer, jiffies))
  19:                  fdb_delete(br, f);
  20:              else if (time_before(this_timer, next_timer))
  21:                  next_timer = this_timer;
  22:          }
  23:      }
  24:      spin_unlock(&br->hash_lock);
  25:   
  26:      mod_timer(&br->gc_timer, round_jiffies_up(next_timer));
  27:  }

转载于:https://my.oschina.net/longscu/blog/58971

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值