Linux内核 bridge转发逻辑

netdev_rx_handler_register

在分析之前首先要介绍一个重要函数:netdev_rx_handler_register,该函数在net/core/dev.c文件中定义:

这个函数可以给设备(net_device)注册接收函数,然后在__netif_receive_skb函数中根据接收skb的设备接口,再调用这个被注册的接收函数。比如为网桥下的接口注册br_handle_frame函数,为bonding接口注册bond_handle_frame函数。这相对于老式的网桥处理更灵活,有了这个机制也可以在模块中自行注册处理函数。比如3.10中的openvswitch(OpenvSwitch在3.10已经合入了内核)创建netdev vport的函数netdev_create。代码中使用如下:

上图中标红的函数,桥成员口在br_add_if函数中调用netdev_rx_handler_register函数指定成员口的收包函数为br_handle_frame。

Bridge转发逻辑分析

netif_receive_skb

还是先从netif_receive_skb函数分析,这个函数算是进入协议栈的入口。

net/core/dev.c

netif_receive_skb函数是由softirq,即软中断调用的。查看了linux的代码,发现都是在网口驱动的接收函数中调用netif_receive_skb()函数。如下在probe函数中

alloc_etherdev()函数返回一个net_device结构,入参传入的结构体被保存在私有结构里。

在我们wifi6网卡的驱动中也调用了,在dhd_rx_frame函数中

drivers/net/wireless/rockchip_wlan/rkwifi/bcmdhd_wifi6/dhd_linux.c:6509

 如果一个dev被添加到一个bridge(做为bridge的一个接口),的这个接口设备的rx_handler被设置为br_handle_frame函数,这是在br_add_if函数中设置的,而br_add_if (net/bridge/br_if.c)是在向网桥设备上添加接口时设置的。进入br_handle_frame也就进入了bridge的逻辑代码。

br_add_if

int br_add_if(struct net_bridge *br, struct net_device *dev)
{
    /*......*/
    err = netdev_rx_handler_register(dev, br_handle_frame, p);
    /*......*/
}

br_handle_frame

rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
{
    struct net_bridge_port *p;
    struct sk_buff *skb = *pskb;
    const unsigned char *dest = eth_hdr(skb)->h_dest;
    br_should_route_hook_t *rhook;
    if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
        return RX_HANDLER_PASS;
    if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
        goto drop;
    skb = skb_share_check(skb, GFP_ATOMIC);
    if (!skb)
        return RX_HANDLER_CONSUMED;
    /*获取dev对应的bridge port*/
    p = br_port_get_rcu(skb->dev);
    /*特殊目的mac地址的处理*/
    if (unlikely(is_link_local_ether_addr(dest))) {
    /*
     * See IEEE 802.1D Table 7-10 Reserved addresses
     *
     * Assignment Value
     * Bridge Group Address 01-80-C2-00-00-00
     * (MAC Control) 802.3 01-80-C2-00-00-01
     * (Link Aggregation) 802.3 01-80-C2-00-00-02
     * 802.1X PAE address 01-80-C2-00-00-03
     *
     * 802.1AB LLDP 01-80-C2-00-00-0E
     *
     * Others reserved for future standardization
     */
        switch (dest[5]) {
            case 0x00: /* Bridge Group Address */
            /* If STP is turned off,then must forward to keep loop detection */
                if (p->br->stp_enabled == BR_NO_STP)
                    goto forward;
            break;
            case 0x01: /* IEEE MAC (Pause) */
                goto drop;
            default:
                /* Allow selective forwarding for most other protocols */
                if (p->br->group_fwd_mask & (1u << dest[5]))
                    goto forward;
        }
        /* LOCAL_IN hook点,注意经过这个hook点并不代表发送到主机协议栈(只有特殊目的mac 01-80-C2才会走到这里)*/
        if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
            NULL, br_handle_local_finish)) {
                return RX_HANDLER_CONSUMED; /* consumed by filter */
        } else {
            *pskb = skb;
            return RX_HANDLER_PASS; /* continue processing */
        }
    }
/*转发逻辑*/
forward:
    switch (p->state) {
        case BR_STATE_FORWARDING:
            rhook = rcu_dereference(br_should_route_hook);
            if (rhook) {
                if ((*rhook)(skb)) {
                    *pskb = skb;
                    return RX_HANDLER_PASS;
                }
                dest = eth_hdr(skb)->h_dest;
            }
      /* fall through */
       case BR_STATE_LEARNING:
           /*skb的目的mac和bridge的mac一样,则将skb发往本机协议栈*/
           if (ether_addr_equal(p->br->dev->dev_addr, dest))
               skb->pkt_type = PACKET_HOST;
           /*NF_BR_PRE_ROUTING hook点*/
           NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,br_handle_frame_finish);
       break;
default:
drop:
    kfree_skb(skb);
}
return RX_HANDLER_CONSUMED;
}

经过NF_BR_LOCAL_IN hook点会执行br_handle_local_finish函数。

br_handle_local_finish

static int br_handle_local_finish(struct sk_buff *skb)
{
    struct net_bridge_port *p = br_port_get_rcu(skb->dev);
    u16 vid = 0;
   /*获取skb的vlan id(3.10的bridge支持vlan)*/
    br_vlan_get_tag(skb, &vid);
   /*更新bridge的mac表,注意vlan id也是参数,说明每个vlan有一个独立的mac表*/
    br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid);
    return 0; /* process further */
}

经过NF_BR_PRE_ROUTING hook点会执行br_handle_frame_finish函数。

br_handle_frame_finish 

int br_handle_frame_finish(struct sk_buff *skb)
{
    const unsigned char *dest = eth_hdr(skb)->h_dest;
    struct net_bridge_port *p = br_port_get_rcu(skb->dev);
    struct net_bridge *br;
    struct net_bridge_fdb_entry *dst;
    struct net_bridge_mdb_entry *mdst;
    struct sk_buff *skb2;
    u16 vid = 0;
    if (!p || p->state == BR_STATE_DISABLED)
        goto drop;
        /*这个判断主要是vlan的相关检查,如是否和接收接口配置的vlan相同*/
    if (!br_allowed_ingress(p->br, nbp_get_vlan_info(p), skb, &vid))
        goto out;
    /* insert into forwarding database after filtering to avoid spoofing */
    br = p->br;
    /*更新转发数据库*/
    br_fdb_update(br, p, eth_hdr(skb)->h_source, vid);
    /*多播mac的处理*/
    if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) &&
        br_multicast_rcv(br, p, skb))
        goto drop;
    if (p->state == BR_STATE_LEARNING)
        goto drop;
    BR_INPUT_SKB_CB(skb)->brdev = br->dev;
    /* The packet skb2 goes to the local host (NULL to skip). */
    skb2 = NULL;
   /*如果网桥被设置为混杂模式*/
    if (br->dev->flags & IFF_PROMISC)
        skb2 = skb;
    dst = NULL;
   /*如果skb的目的mac是广播*/
    if (is_broadcast_ether_addr(dest))
        skb2 = skb;
    else if (is_multicast_ether_addr(dest)) { /*多播*/
        mdst = br_mdb_get(br, skb, vid);
    if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) {
        if ((mdst && mdst->mglist) ||
        br_multicast_is_router(br))
            skb2 = skb;
        br_multicast_forward(mdst, skb, skb2);
        skb = NULL;
        if (!skb2)
            goto out;
        } else
            skb2 = skb;
        br->dev->stats.multicast++;
    } else if ((dst = __br_fdb_get(br, dest, vid)) && dst->is_local) {/*目的地址是本机mac,则发往本机协议栈*/
            skb2 = skb;
            /* Do not forward the packet since it's local. */
            skb = NULL;
    }
    if (skb) {
        if (dst) {
            dst->used = jiffies;
            br_forward(dst->dst, skb, skb2); //转发给目的接口
        } else
            br_flood_forward(br, skb, skb2); //找不到目的接口则广播
    }
    if (skb2)
        return br_pass_frame_up(skb2); //发往本机协议栈
out:
    return 0;
drop:
    kfree_skb(skb);
    goto out;
}

 我们先看发往本机协议栈的函数br_pass_frame_up。

br_pass_frame_up

static int br_pass_frame_up(struct sk_buff *skb)
{
    struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
    struct net_bridge *br = netdev_priv(brdev);
   //更新统计计数(略)
    /* Bridge is just like any other port. Make sure the
     * packet is allowed except in promisc modue when someone
     * may be running packet capture.
     */
    if (!(brdev->flags & IFF_PROMISC) && !br_allowed_egress(br, br_get_vlan_info(br), skb)) {
        kfree_skb(skb); //如果不是混杂模式且vlan处理不合要求则丢弃
        return NET_RX_DROP;
    }
    //vlan处理逻辑
    skb = br_handle_vlan(br, br_get_vlan_info(br), skb);
    if (!skb)
        return NET_RX_DROP;
    indev = skb->dev;
    skb->dev = brdev; //重点,这里修改了skb->dev为bridge
    //经过NF_BR_LOCAL_IN再次进入协议栈
    return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL,
    netif_receive_skb);
}

 再次进入netif_receive_skb,由于skb-->dev被设置成了bridge,而bridge设备的rx_handler函数是没有被设置的,所以就不会再次进入bridge逻辑,而直接进入了主机上层协议栈。

下面看转发逻辑,转发逻辑主要在br_forward函数中,而br_forward主要调用__br_forward函数。

__br_forward

static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
{
    struct net_device *indev;
    //vlan处理
    skb = br_handle_vlan(to->br, nbp_get_vlan_info(to), skb);
    if (!skb)
        return;
    indev = skb->dev;
    skb->dev = to->dev; //skb->dev设置为出口设备dev
    skb_forward_csum(skb);
    //经过NF_BR_FORWARD hook点,调用br_forward_finish
    NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev,
    br_forward_finish);
}

br_forward_finish 

int br_forward_finish(struct sk_buff *skb)
{
    //经过NF_BR_POST_ROUTING hook点,调用br_dev_queue_push_xmit
    return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev, br_dev_queue_push_xmit);
}

br_dev_queue_push_xmit 

int br_dev_queue_push_xmit(struct sk_buff *skb)
{
    /* ip_fragment doesn't copy the MAC header */
    if (nf_bridge_maybe_copy_header(skb) || (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))) {
        kfree_skb(skb);
    } else {
        skb_push(skb, ETH_HLEN);
        br_drop_fake_rtable(skb);
        dev_queue_xmit(skb); //发送到链路层
    }
    return 0;
}

Skb进入dev_queue_xmit就会调用相应设备驱动的发送函数。也就出了bridge逻辑。所以整个3.10kernel的bridge转发逻辑如下图所示:

 

注意,和2.6kernel一样,bridge的OUTPUT hook点在bridge dev的发送函数中,这里不再分析列出。 

Linux内核分析 - 网络:网桥原理分析_在一个接口上发出的包不会再在那个接口上发送这个报-CSDN博客

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值