IPv4路由cache统计信息

内核定义如下的每处理器结构rt_cache_stat记录路由缓存信息。

struct rt_cache_stat {
        unsigned int in_slow_tot;
        unsigned int in_slow_mc;
        unsigned int in_no_route;
        unsigned int in_brd;
        unsigned int in_martian_dst;
        unsigned int in_martian_src;
        unsigned int out_slow_tot;
        unsigned int out_slow_mc;
};

static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
#define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field)

可通过PROC文件rt_cache进行读取,如下为8核心的处理器显示的信息,只有与rt_cache_stat结构成员对应的项有值,其它项固定为零。

# cat /proc/net/stat/rt_cache
entries  in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src  out_hit out_slow_tot out_slow_mc  gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search
01369b26  00000000 00000b97 00000000 00000000 00000161 00000000 00000000  00000000 00000004 00000000 00000000 00000000 00000000 00000000 00000000 00000000 
01369b26  00000000 0000000b 00000000 00000000 00000000 00000000 00000000  00000000 00000001 00000000 00000000 00000000 00000000 00000000 00000000 00000000 
01369b26  00000000 00000000 00000000 00000000 00000000 00000000 00000000  00000000 00000004 00000000 00000000 00000000 00000000 00000000 00000000 00000000 
01369b26  00000000 00000000 00000000 00000000 00000000 00000000 00000000  00000000 00000001 00000000 00000000 00000000 00000000 00000000 00000000 00000000 
01369b26  00000000 013c7f28 00000000 00000000 00000000 00000000 00000000  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 
01369b26  00000000 00000000 00000000 00000000 00000000 00000000 00000000  00000000 00000004 00000000 00000000 00000000 00000000 00000000 00000000 00000000 
01369b26  00000000 00000000 00000000 00000000 00000000 00000000 00000000  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 
01369b26  00000000 00000000 00000000 00000000 00000000 00000000 00000000  00000000 00000010 00000000 00000000 00000000 00000000 00000000 00000000 00000000 
...

in_slow_tot项

在函数ip_route_input_slow中递增,统计路由cache分配次数。对于需要转发的报文,函数__mkroute_input生成路由缓存,并递增计数。

static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res,
               struct in_device *in_dev, __be32 daddr, __be32 saddr, u32 tos)
{
    ...
    rth = rt_dst_alloc(out_dev->dev, 0, res->type,
               IN_DEV_CONF_GET(in_dev, NOPOLICY),
               IN_DEV_CONF_GET(out_dev, NOXFRM));
    if (!rth) {
        err = -ENOBUFS;
        goto cleanup;
    }

    rth->rt_is_input = 1;
    RT_CACHE_STAT_INC(in_slow_tot);

对于目的地址为本机的报文,以下部分分配路由缓存,并递增计数。

static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
                   u8 tos, struct net_device *dev, struct fib_result *res)
{

make_route:
    err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
out:    return err;

    rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
               flags | RTCF_LOCAL, res->type,
               IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
    if (!rth)
        goto e_nobufs;

    rth->dst.output= ip_rt_bug;
#ifdef CONFIG_IP_ROUTE_CLASSID
    rth->dst.tclassid = itag;
#endif
    rth->rt_is_input = 1;

    RT_CACHE_STAT_INC(in_slow_tot);

in_slow_mc项

在函数ip_route_input_rcu中递增,统计多播路由cache分配次数。

static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
                 u8 tos, struct net_device *dev, int our)
{

    rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
               IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
    if (!rth)
        return -ENOBUFS;

#ifdef CONFIG_IP_ROUTE_CLASSID
    rth->dst.tclassid = itag;
#endif
    rth->dst.output = ip_rt_bug;
    rth->rt_is_input= 1;

#ifdef CONFIG_IP_MROUTE
    if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
        rth->dst.input = ip_mr_input;
#endif
    RT_CACHE_STAT_INC(in_slow_mc);

in_no_route项

在函数ip_route_input_slow中递增,统计fib查询失败次数,以及路由下一跳设备转发未启用(如对于ens160设备:/proc/sys/net/ipv4/conf/ens160/forwarding值为0)。

注意,此种情况下,也会分配路由缓存,但是类型为RTN_UNREACHABLE,因此,统计项in_slow_tot也会递增。

static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
                   u8 tos, struct net_device *dev, struct fib_result *res)
{

    err = fib_lookup(net, &fl4, res, 0);
    if (err != 0) {
        if (!IN_DEV_FORWARD(in_dev))
            err = -EHOSTUNREACH;
        goto no_route;
    }

    if (!IN_DEV_FORWARD(in_dev)) {
        err = -EHOSTUNREACH;
        goto no_route;
    }

no_route:
    RT_CACHE_STAT_INC(in_no_route);
    res->type = RTN_UNREACHABLE;
    res->fi = NULL;
    res->table = NULL;
    goto local_input;

in_brd项

报文的目的地址为广播地址;或者源地址和目的地址同时为全0;或者fib查询的结果为RTN_BROADCAST类型路由,如果接收设备的广播转发开关开启,有函数ip_mkroute_input创建路由缓存,但是其仅递增了in_slow_tot计数,并没有递增in_brd计数(感觉有问题)。另外,如果接收设备的广播转发开关没有开启,按照本地接收报文处理,递增in_brd计数,以及in_slow_tot计数。

static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
                   u8 tos, struct net_device *dev, struct fib_result *res)
{
    res->fi = NULL;
    res->table = NULL;
    if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
        goto brd_input;

    err = fib_lookup(net, &fl4, res, 0);
    if (err != 0) {
        if (!IN_DEV_FORWARD(in_dev))
            err = -EHOSTUNREACH;
        goto no_route;
    }

    if (res->type == RTN_BROADCAST) {
        if (IN_DEV_BFORWARD(in_dev))
            goto make_route;
        /* not do cache if bc_forwarding is enabled */
        if (IPV4_DEVCONF_ALL(net, BC_FORWARDING))
            do_cache = false;
        goto brd_input;
    }

brd_input:
    if (skb->protocol != htons(ETH_P_IP))
        goto e_inval;

    if (!ipv4_is_zeronet(saddr)) {
        err = fib_validate_source(skb, saddr, 0, tos, 0, dev,  in_dev, &itag);
        if (err < 0)
            goto martian_source;
    }
    flags |= RTCF_BROADCAST;
    res->type = RTN_BROADCAST;
    RT_CACHE_STAT_INC(in_brd);

in_martian_dst项

如果报文的目的地址为0;或者为回环地址,但是接收设备不允许回环地址(可通过PROC文件配置,例如ens34配置文件:/proc/sys/net/ipv4/conf/ens34/route_localnet)。

或者,查询fib表的结果得到的路由类型不等于RTN_BROADCAST、RTN_LOCAL和RTN_UNICAST中的任何一个,认为此报文的目的地址为非法地址。增加in_martian_dst计数。

static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
                   u8 tos, struct net_device *dev, struct fib_result *res)
{

    if (ipv4_is_zeronet(daddr))
        goto martian_destination;

    /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
     * and call it once if daddr or/and saddr are loopback addresses
     */
    if (ipv4_is_loopback(daddr)) {
        if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
            goto martian_destination;
    }

    err = fib_lookup(net, &fl4, res, 0);
    if (err != 0) {
        if (!IN_DEV_FORWARD(in_dev))
            err = -EHOSTUNREACH;
        goto no_route;
    }

    if (res->type == RTN_BROADCAST) {
        ...
        goto brd_input;
    }
    if (res->type == RTN_LOCAL) {
        ...
        goto local_input;
    }
    if (res->type != RTN_UNICAST)
        goto martian_destination;

    /*
     *  Do not cache martian addresses: they should be logged (RFC1812)
     */
martian_destination:
    RT_CACHE_STAT_INC(in_martian_dst);

in_martian_src项

在查找路由时,如果报文的源地址为多播地址,或者全F的广播地址,或者源地址为0。或者目的地址不是回环地址,但是源地址为回环地址,并且接收设备没有开启接收开关,认定报文的源地址为非法地址。

static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
                   u8 tos, struct net_device *dev, struct fib_result *res)
{

    if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
        goto martian_source;

    if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
        goto brd_input;

    /* Accept zero addresses only to limited broadcast;
     * I even do not know to fix it or not. Waiting for complains :-)
     */
    if (ipv4_is_zeronet(saddr))
        goto martian_source;

    /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
     * and call it once if daddr or/and saddr are loopback addresses
     */
    if (ipv4_is_loopback(daddr)) {
        if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
            goto martian_destination;
    } else if (ipv4_is_loopback(saddr)) {
        if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
            goto martian_source;
    }

另外,对于RTN_LOCAL类型或者RTN_BROADCAST类型的路由表项,如果反向路由查找失败,也认定源地址为非法地址,在函数ip_handle_martian_source中递增in_martian_src计数。

    err = fib_lookup(net, &fl4, res, 0);

    if (res->type == RTN_BROADCAST) {
        if (IN_DEV_BFORWARD(in_dev))
            goto make_route;
        /* not do cache if bc_forwarding is enabled */
        if (IPV4_DEVCONF_ALL(net, BC_FORWARDING))
            do_cache = false;
        goto brd_input;
    }
    if (res->type == RTN_LOCAL) {
        err = fib_validate_source(skb, saddr, daddr, tos, 0, dev, in_dev, &itag);
        if (err < 0)
            goto martian_source;
        goto local_input;
    }

brd_input:
    if (skb->protocol != htons(ETH_P_IP))
        goto e_inval;

    if (!ipv4_is_zeronet(saddr)) {
        err = fib_validate_source(skb, saddr, 0, tos, 0, dev, in_dev, &itag);
        if (err < 0)
            goto martian_source;
    }
    flags |= RTCF_BROADCAST;
    res->type = RTN_BROADCAST;

martian_source:
    ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
    goto out;

out_hit项

out_hit项固定为零

out_slow_tot项

在出口路由函数ip_route_output_key_hash_rcu中,有以下函数__mkroute_output分配路由缓存,并且递增out_slow_tot计数。

static struct rtable *__mkroute_output(const struct fib_result *res,
                       const struct flowi4 *fl4, int orig_oif,
                       struct net_device *dev_out, unsigned int flags)
{

    rth = rt_dst_alloc(dev_out, flags, type,
               IN_DEV_CONF_GET(in_dev, NOPOLICY),
               IN_DEV_CONF_GET(in_dev, NOXFRM));
    if (!rth)
        return ERR_PTR(-ENOBUFS);

    rth->rt_iif = orig_oif;

    RT_CACHE_STAT_INC(out_slow_tot);

out_slow_mc项

对于出口路由查找,如果查找结果为广播或者多播类型的路由,并且路由出接口不是回环接口,递增out_slow_mc计数。

static struct rtable *__mkroute_output(const struct fib_result *res,
                       const struct flowi4 *fl4, int orig_oif,
                       struct net_device *dev_out, unsigned int flags)
{
    u16 type = res->type;

    if (type == RTN_BROADCAST) {
        flags |= RTCF_BROADCAST | RTCF_LOCAL;
        fi = NULL;
    } else if (type == RTN_MULTICAST) {
        flags |= RTCF_MULTICAST | RTCF_LOCAL;
    ...
    rth = rt_dst_alloc(dev_out, flags, type,
               IN_DEV_CONF_GET(in_dev, NOPOLICY),
               IN_DEV_CONF_GET(in_dev, NOXFRM));
    if (!rth)
        return ERR_PTR(-ENOBUFS);

    rth->rt_iif = orig_oif;

    RT_CACHE_STAT_INC(out_slow_tot);

    if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
        if (flags & RTCF_LOCAL &&
            !(dev_out->flags & IFF_LOOPBACK)) {
            rth->dst.output = ip_mc_output;
            RT_CACHE_STAT_INC(out_slow_mc);
        }

以下显示项固定为零。

  • gc_total
  • gc_ignored
  • gc_goal_miss
  • gc_dst_overflow
  • in_hlist_search
  • out_hlist_search

PROC文件rt_cache

函数ip_rt_do_proc_init注册rt_cache文件,目录为/proc/net/stat/rt_cache,处理结构为rt_cpu_seq_fops。

static int __net_init ip_rt_do_proc_init(struct net *net)
{
    struct proc_dir_entry *pde;

    pde = proc_create("rt_cache", S_IRUGO, net->proc_net,
              &rt_cache_seq_fops);
    if (!pde)
        goto err1;

    pde = proc_create("rt_cache", S_IRUGO,
              net->proc_net_stat, &rt_cpu_seq_fops);
    if (!pde)
        goto err2;

如下rt_cpu_seq_fops的定义。

static const struct seq_operations rt_cpu_seq_ops = {
    .start  = rt_cpu_seq_start,
    .next   = rt_cpu_seq_next,
    .stop   = rt_cpu_seq_stop,
    .show   = rt_cpu_seq_show,
};


static int rt_cpu_seq_open(struct inode *inode, struct file *file)
{
    return seq_open(file, &rt_cpu_seq_ops);
}

static const struct file_operations rt_cpu_seq_fops = {
    .owner   = THIS_MODULE,
    .open    = rt_cpu_seq_open,
    .read    = seq_read,
    .llseek  = seq_lseek,
    .release = seq_release,
};

主要看一下显示函数rt_cpu_seq_show,其中参数v为每处理器变量的单一值,entries列表示总的cache数量,所以显示的值都是相同的,如0x1369b26。

static int rt_cpu_seq_show(struct seq_file *seq, void *v)
{
    struct rt_cache_stat *st = v;

    if (v == SEQ_START_TOKEN) {
        seq_printf(seq, "entries  in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src  out_hit out_s     low_tot out_slow_mc  gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
        return 0;
    }
    seq_printf(seq,"%08x  %08x %08x %08x %08x %08x %08x %08x "
           " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
           dst_entries_get_slow(&ipv4_dst_ops),
           0, /* st->in_hit */
           st->in_slow_tot,
           st->in_slow_mc,
           st->in_no_route,
           st->in_brd,
           st->in_martian_dst,
           st->in_martian_src,

           0, /* st->out_hit */
           st->out_slow_tot,
           st->out_slow_mc,

           0, /* st->gc_total */
           0, /* st->gc_ignored */
           0, /* st->gc_goal_miss */
           0, /* st->gc_dst_overflow */
           0, /* st->in_hlist_search */
           0  /* st->out_hlist_search */
        );

函数dst_entries_get_slow将每处理器变量的值进行了累计。

static inline int dst_entries_get_slow(struct dst_ops *dst)
{
    return percpu_counter_sum_positive(&dst->pcpuc_entries);
}
static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
{
    s64 ret = __percpu_counter_sum(fbc);
    return ret < 0 ? 0 : ret;
}
s64 __percpu_counter_sum(struct percpu_counter *fbc)
{
    s64 ret;
    int cpu;
    unsigned long flags;

    raw_spin_lock_irqsave(&fbc->lock, flags);
    ret = fbc->count;
    for_each_online_cpu(cpu) {
        s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
        ret += *pcount;
    }
    raw_spin_unlock_irqrestore(&fbc->lock, flags);
    return ret;  
}

内核版本 5.10

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值