基本概念
VPP路由可以看作是思科iOS CEF的翻版。将路由分为路由表和转发表。路由表的实现基于hash,转发表的实现基于8-8-8-8 mtrie树。
路由表存储了所有可能的路由表项(其中最优路由将安装到转发表中),转发表存储了实际使用的路由项(数据包转发真正查找的是这个转发表。)
VPP转发表查找的最后结果将得到一个DPO(data path object),DPO将指示数据包的下一步动作(分类,ARP查找,丢弃等等,请查看DPO_TYPES枚举类型)。
以下皆以ipv4来分析,只分析了我看懂了的一部分。
路由表:
typedef struct ip4_fib_t
{
/* Hash table for each prefix length mapping. */
//根据掩码大小对路由项作hash
uword *fib_entry_by_dst_address[33];
/* Mtrie for fast lookups. Hash is used to maintain overlapping prefixes. */
//8-8-8-8 mtrie转发表
ip4_fib_mtrie_t mtrie;
/* Table ID (hash key) for this FIB. */
u32 table_id;
/* Index into FIB vector. */
u32 index;
/* flow hash configuration */
flow_hash_config_t flow_hash_config;
/* N-tuple classifier indices */
u32 fwd_classify_table_index;
u32 rev_classify_table_index;
} ip4_fib_t;
路由表项hash链入fib_entry_by_dst_address中。
转发表项安装在mtrie中,这是实际被数据平面使用的查找数据库。
路由表的任何变化,将导致转发表的更新。
路由表项:
typedef struct fib_entry_t_ {
/**
* Base class. The entry's node representation in the graph.
*/
//所有路由关键数据结构都已这个打头,可以看做是他们的基类。用来将这些不同数据结构连在一起,又能用统一的接口来遍历。
fib_node_t fe_node;
/**
* The prefix of the route. this is const just to be sure.
* It is the entry's key/identity and so should never change.
*/
//可以理解为路由目的地址+掩码
const fib_prefix_t fe_prefix;
/**
* The index of the FIB table this entry is in
*/
u32 fe_fib_index;
/**
* The load-balance used for forwarding.
*
* We don't share the EOS and non-EOS even in case when they could be
* because:
* - complexity & reliability v. memory
* determining the conditions where sharing is possible is non-trivial.
* - separate LBs means we can get the EOS bit right in the MPLS label DPO
* and so save a few clock cycles in the DP imposition node since we can
* paint the header straight on without the need to check the packet
* type to derive the EOS bit value.
*/
/*所有路由表象都各自指向一个DPO_LOAD_BALANCE类型dpo,这个dpo可以看做一个容器,里面包含了很多
其它dpo,最终使用的是该容器中保存的某个dpo作为路由完成后的下一跳。*/
dpo_id_t fe_lb; // [FIB_FORW_CHAIN_MPLS_NUM];
/**
* Vector of source infos.
* Most entries will only have 1 source. So we optimise for memory usage,
* which is preferable since we have many entries.
*/
/*同一个路由目的,可以是由不同的来源添加的,所有来源类型查看fib_source_t_枚举类型,数值越小的来源,
优先级越高,最高优先级的来源的路由将安装到转发表,低优先级来源的路由项默默地保存在最高优先级来源后面。
这个数组是有序的,添加删除都将导致排序,0号位置为最优路由。*/
fib_entry_src_t *fe_srcs;
/**
* the path-list for which this entry is a child. This is also the path-list
* that is contributing forwarding for this entry.
*/
//多个entry可能共享一个path_list,因此需要该成员指向本entry的path_list
fib_node_index_t fe_parent;
/**
* index of this entry in the parent's child list.
* This is set when this entry is added as a child, but can also
* be changed by the parent as it manages its list.
*/
//本entry作为path_list的孩子节点索引
u32 fe_sibling;
/**
* A vector of delegates.
*/
fib_entry_delegate_t *fe_delegates;
} fib_entry_t;
source:
表明该路由项的来源,每个source结构中包含了一个path_list,这个表包含了所有可能的下一跳。
typedef struct fib_entry_src_t_ {
/**
* A vector of path extensions
*/
struct fib_path_ext_t_ *fes_path_exts;
/**
* The path-list created by the source
*/
//通过它可以找到该source的path_list
fib_node_index_t fes_pl;
/**
* Which source this info block is for
*/
fib_source_t fes_src;
/**
* Flags on the source
*/
fib_entry_src_flag_t fes_flags;
/**
* 1 bytes ref count. This is not the number of users of the Entry
* (which is itself not large, due to path-list sharing), but the number
* of times a given source has been added. Which is even fewer
*/
u8 fes_ref_count;
/**
* Flags the source contributes to the entry
*/
fib_entry_flag_t fes_entry_flags;
/**
* Source specific info
*/
union {
struct {
/**
* the index of the FIB entry that is the covering entry
*/
fib_node_index_t fesr_cover;
/**
* This source's index in the cover's list
*/
u32 fesr_sibling;
} rr;
struct {
/**
* the index of the FIB entry that is the covering entry
*/
fib_node_index_t fesa_cover;
/**
* This source's index in the cover's list
*/
u32 fesa_sibling;
} adj;
struct {
/**
* the index of the FIB entry that is the covering entry
*/
fib_node_index_t fesi_cover;
/**
* This source's index in the cover's list
*/
u32 fesi_sibling;
} interface;
struct {
/**
* This MPLS local label associated with the prefix.
*/
mpls_label_t fesm_label;
/**
* the indicies of the LFIB entries created
*/
fib_node_index_t fesm_lfes[2];
} mpls;
struct {
/**
* The source FIB index.
*/
fib_node_index_t fesl_fib_index;
} lisp;
};
} fib_entry_src_t;
path和path_list:
为了到达目的路由的下一跳称作path,所有可能的下一跳组合成path_list。
typedef struct fib_path_list_t_ {
/**
* A path-list is a node in the FIB graph.
*/
fib_node_t fpl_node;
/**
* Flags on the path-list
*/
fib_path_list_flags_t fpl_flags;
/**
* The next-hop protocol for the paths in this path list.
* Note that fixing the proto here means we don't support a mix of
* v4 and v6 paths. ho hum.
*/
fib_protocol_t fpl_nh_proto;
/**
* Vector of paths indicies for all configured paths.
* For shareable path-lists this list MUST not change.
*/
//path数组,记录了所有下一跳
fib_node_index_t *fpl_paths;
/**
* the RPF list calculated for this path list
*/
fib_node_index_t fpl_urpf;
} fib_path_list_t;
关键函数
路由的添加:
从vnet_ip_route_cmd函数入手,这是通过CLI添加路由的入口函数。从中找到路由添加关键函数fib_table_entry_path_add2。
fib_node_index_t
fib_table_entry_path_add2 (u32 fib_index,
const fib_prefix_t *prefix,
fib_source_t source,
fib_entry_flag_t flags,
fib_route_path_t *rpath)
{
fib_node_index_t fib_entry_index;
fib_table_t *fib_table;
u32 ii;
//找到路由表
fib_table = fib_table_get(fib_index, prefix->fp_proto);
//精确匹配到路由表项,注意不是最长匹配。
fib_entry_index = fib_table_lookup_exact_match_i(fib_table, prefix);
for (ii = 0; ii < vec_len(rpath); ii++)
{
fib_table_route_path_fixup(prefix, &rpath[ii]);
}
if (FIB_NODE_INDEX_INVALID == fib_entry_index)
{
//创建路由表项,包括souces,path_list等,比较复杂,将详细分析。
fib_entry_index = fib_entry_create(fib_index, prefix,
source, flags,
rpath);
//路由表项链入hash表,很简单。
fib_table_entry_insert(fib_table, prefix, fib_entry_index);
fib_table->ft_src_route_counts[source]++;
}
else
{
int was_sourced;
was_sourced = fib_entry_is_sourced(fib_entry_index, source);
//添加一个下一跳到路由表项,可能会新建source,也可能只是在现有source的path_list中增加一个path
fib_entry_path_add(fib_entry_index, source, flags, rpath);;
if (was_sourced != fib_entry_is_sourced(fib_entry_index, source))
{
//如果这个source是从无到有,更新下统计值
fib_table->ft_src_route_counts[source]++;
}
}
return (fib_entry_index);
}
新建立一个路由项,并插入mtrie树
fib_node_index_t
fib_entry_create (u32 fib_index,
const fib_prefix_t *prefix,
fib_source_t source,
fib_entry_flag_t flags,
const fib_route_path_t *paths)
{
fib_node_index_t fib_entry_index;
fib_entry_t *fib_entry;
ASSERT(0 < vec_len(paths));
//简单的分配一个初始化了的路由表项,很简单。
fib_entry = fib_entry_alloc(fib_index, prefix, &fib_entry_index);
/*
* since this is a new entry create, we don't need to check for winning
* sources - there is only one.
*/
//不同的source有不同的fesv_add函数。这里就是为了调用它。
fib_entry = fib_entry_src_action_add(fib_entry, source, flags,
drop_dpo_get(
fib_proto_to_dpo(
fib_entry_get_proto(fib_entry))));
//不同的source有不同的fesv_path_swap函数。这里就是为了调用它。
fib_entry_src_action_path_swap(fib_entry,
source,
flags,
paths);
/*
* handle possible realloc's by refetching the pointer
*/
fib_entry = fib_entry_get(fib_entry_index);
//不同的source有不同的fesv_activate函数。这里就是为了调用它。如果该source没有fesv_activate函数,则调用fib_entry_src_action_install
fib_entry_src_action_activate(fib_entry, source);
//调用source的fesv_installed函数和fesv_fwd_update函数
fib_entry_post_install_actions(fib_entry, source, FIB_ENTRY_FLAG_NONE);
return (fib_entry_index);
}
fib_entry_t *
fib_entry_src_action_add (fib_entry_t *fib_entry,
fib_source_t source,
fib_entry_flag_t flags,
const dpo_id_t *dpo)
{
fib_node_index_t fib_entry_index;
fib_entry_src_t *esrc;
esrc = fib_entry_src_find_or_create(fib_entry, source, NULL);
esrc->fes_ref_count++;
//对应source必须是从无到有。否则就直接返回了。
if (1 != esrc->fes_ref_count)
{
/*
* we only want to add the source on the 0->1 transition
*/
return (fib_entry);
}
esrc->fes_entry_flags = flags;
/*
* save variable so we can recover from a fib_entry realloc.
*/
//接下来的函数调用可能会导致fib_entry pool重新分配(扩张),所以只有索引号肯定是保证不变
fib_entry_index = fib_entry_get_index(fib_entry);
//这个场景调用的是fib_entry_src_api_add
if (NULL != fib_entry_src_vft[source].fesv_add)
{
fib_entry_src_vft[source].fesv_add(esrc,
fib_entry,
flags,
fib_entry_get_proto(fib_entry),
dpo);
}
//路由表项pool可能重新分配了,所以这里通过索引重新获取下。
fib_entry = fib_entry_get(fib_entry_index);
esrc->fes_flags |= FIB_ENTRY_SRC_FLAG_ADDED;
//这个lock理解为引用计数,不是多线程中的互斥lock
fib_path_list_lock(esrc->fes_pl);
/*
* the source owns a lock on the entry
*/
fib_entry_lock(fib_entry_get_index(fib_entry));
return (fib_entry);
}
static void
fib_entry_src_api_add (fib_entry_src_t *src,
const fib_entry_t *entry,
fib_entry_flag_t flags,
fib_protocol_t proto,
const dpo_id_t *dpo)
{
if (FIB_ENTRY_FLAG_NONE != flags)
{
src->fes_pl = fib_path_list_create_special(
proto,
fib_entry_src_flags_2_path_list_flags(flags),
dpo);
}
}
fib_node_index_t
fib_path_list_create_special (fib_protocol_t nh_proto,
fib_path_list_flags_t flags,
const dpo_id_t *dpo)
{
fib_node_index_t path_index, path_list_index;
fib_path_list_t *path_list;
path_list = fib_path_list_alloc(&path_list_index);
path_list->fpl_flags = flags;
path_list->fpl_nh_proto = nh_proto;
//创建path,这个场景里创建的是FIB_PATH_LIST_FLAG_NONE,drop dpo
path_index =
fib_path_create_special(path_list_index,
path_list->fpl_nh_proto,
fib_path_list_flags_2_path_flags(flags),
dpo);
//path加入path_list中
vec_add1(path_list->fpl_paths, path_index);
/*
* we don't share path-lists. we can do PIC on them so why bother.
*/
//最重要的部分,分析path_list中的path,生成path->fp_dpo。
path_list = fib_path_list_resolve(path_list);
return (path_list_index);
}
fib_node_index_t
fib_path_create_special (fib_node_index_t pl_index,
fib_protocol_t nh_proto,
fib_path_cfg_flags_t flags,
const dpo_id_t *dpo)
{
fib_path_t *path;
pool_get(fib_path_pool, path);
memset(path, 0, sizeof(*path));
fib_node_init(&path->fp_node,
FIB_NODE_TYPE_PATH);
dpo_reset(&path->fp_dpo);
path->fp_pl_index = pl_index;
path->fp_weight = 1;
path->fp_nh_proto = nh_proto;
path->fp_via_fib = FIB_NODE_INDEX_INVALID;
path->fp_cfg_flags = flags;
if (FIB_PATH_CFG_FLAG_DROP & flags)
{
path->fp_type = FIB_PATH_TYPE_SPECIAL;
}
else if (FIB_PATH_CFG_FLAG_LOCAL & flags)
{
path->fp_type = FIB_PATH_TYPE_RECEIVE;
path->attached.fp_interface = FIB_NODE_INDEX_INVALID;
}
else
{
//本场景使用的是这里
path->fp_type = FIB_PATH_TYPE_EXCLUSIVE;
ASSERT(NULL != dpo);
dpo_copy(&path->exclusive.fp_ex_dpo, dpo);
}
return (fib_path_get_index(path));
}
static fib_path_list_t *
fib_path_list_resolve (fib_path_list_t *path_list)
{
fib_node_index_t *path_index, *paths, path_list_index;
ASSERT(!(path_list->fpl_flags & FIB_PATH_LIST_FLAG_RESOLVED));
/*
* resolving a path-list is a recursive action. this means more path
* lists can be created during this call, and hence this path-list
* can be realloc'd. so we work with copies.
* this function is called only once per-path list, so its no great overhead.
*/
path_list_index = fib_path_list_get_index(path_list);
paths = vec_dup(path_list->fpl_paths);
vec_foreach (path_index, paths)
{
//FIB_PATH_TYPE_RECURSIVE类型的path会导致递归调用,可能导致path_list pool重分配。
fib_path_resolve(*path_index);
}
vec_free(paths);
path_list = fib_path_list_get(path_list_index);
FIB_PATH_LIST_DBG(path_list, "resovled");
fib_path_list_mk_urpf(path_list);
return (path_list);
}
//本函数最终目的是为path分析出DPO,保存到path->fp_dpo成员中,细节太多,博主已晕,以后再补
int
fib_path_resolve (fib_node_index_t path_index)
{
fib_path_t *path;
path = fib_path_get(path_index);
/*
* hope for the best.
*/
path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
/*
* the forced drop path resolves via the drop adj
*/
if (fib_path_is_permanent_drop(path))
{
dpo_copy(&path->fp_dpo,
drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
return (fib_path_is_resolved(path_index));
}
switch (path->fp_type)
{
case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
fib_path_attached_next_hop_set(path);
break;
case FIB_PATH_TYPE_ATTACHED:
/*
* path->attached.fp_interface
*/
if (!vnet_sw_interface_is_admin_up(vnet_get_main(),
path->attached.fp_interface))
{
path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
}
if (vnet_sw_interface_is_p2p(vnet_get_main(),
path->attached.fp_interface))
{
/*
* point-2-point interfaces do not require a glean, since
* there is nothing to ARP. Install a rewrite/nbr adj instead
*/
dpo_set(&path->fp_dpo,
DPO_ADJACENCY,
fib_proto_to_dpo(path->fp_nh_proto),
adj_nbr_add_or_lock(
path->fp_nh_proto,
fib_proto_to_link(path->fp_nh_proto),
&zero_addr,
path->attached.fp_interface));
}
else
{
dpo_set(&path->fp_dpo,
DPO_ADJACENCY_GLEAN,
fib_proto_to_dpo(path->fp_nh_proto),
adj_glean_add_or_lock(path->fp_nh_proto,
path->attached.fp_interface,
NULL));
}
/*
* become a child of the adjacency so we receive updates
* when the interface state changes
*/
path->fp_sibling = adj_child_add(path->fp_dpo.dpoi_index,
FIB_NODE_TYPE_PATH,
fib_path_get_index(path));
break;
case FIB_PATH_TYPE_RECURSIVE:
{
/*
* Create a RR source entry in the table for the address
* that this path recurses through.
* This resolve action is recursive, hence we may create
* more paths in the process. more creates mean maybe realloc
* of this path.
*/
fib_node_index_t fei;
fib_prefix_t pfx;
ASSERT(FIB_NODE_INDEX_INVALID == path->fp_via_fib);
if (FIB_PROTOCOL_MPLS == path->fp_nh_proto)
{
fib_prefix_from_mpls_label(path->recursive.fp_nh.fp_local_label, &pfx);
}
else
{
fib_prefix_from_ip46_addr(&path->recursive.fp_nh.fp_ip, &pfx);
}
//把下一跳也插入路由表,开始递归了
fei = fib_table_entry_special_add(path->recursive.fp_tbl_id,
&pfx,
FIB_SOURCE_RR,
FIB_ENTRY_FLAG_NONE,
ADJ_INDEX_INVALID);
path = fib_path_get(path_index);
path->fp_via_fib = fei;
/*
* become a dependent child of the entry so the path is
* informed when the forwarding for the entry changes.
*/
path->fp_sibling = fib_entry_child_add(path->fp_via_fib,
FIB_NODE_TYPE_PATH,
fib_path_get_index(path));
/*
* create and configure the IP DPO
*/
fib_path_recursive_adj_update(
path,
fib_path_proto_to_chain_type(path->fp_nh_proto),
&path->fp_dpo);
break;
}
case FIB_PATH_TYPE_SPECIAL:
/*
* Resolve via the drop
*/
dpo_copy(&path->fp_dpo,
drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
break;
case FIB_PATH_TYPE_DEAG:
/*
* Resolve via a lookup DPO.
* FIXME. control plane should add routes with a table ID
*/
lookup_dpo_add_or_lock_w_fib_index(path->deag.fp_tbl_id,
fib_proto_to_dpo(path->fp_nh_proto),
LOOKUP_INPUT_DST_ADDR,
LOOKUP_TABLE_FROM_CONFIG,
&path->fp_dpo);
break;
case FIB_PATH_TYPE_RECEIVE:
/*
* Resolve via a receive DPO.
*/
receive_dpo_add_or_lock(fib_proto_to_dpo(path->fp_nh_proto),
path->receive.fp_interface,
&path->receive.fp_addr,
&path->fp_dpo);
break;
case FIB_PATH_TYPE_EXCLUSIVE:
/*
* Resolve via the user provided DPO
*/
dpo_copy(&path->fp_dpo, &path->exclusive.fp_ex_dpo);
break;
}
return (fib_path_is_resolved(path_index));
}
static void
fib_entry_src_api_path_swap (fib_entry_src_t *src,
const fib_entry_t *entry,
fib_path_list_flags_t pl_flags,
const fib_route_path_t *paths)
{
//这里是为了减少path_list重复,看看hash表里有没有一样的path_list,没有就加进hash表里
src->fes_pl = fib_path_list_create((FIB_PATH_LIST_FLAG_SHARED | pl_flags),
paths);
}
fib_node_index_t
fib_path_list_create (fib_path_list_flags_t flags,
const fib_route_path_t *rpaths)
{
fib_node_index_t path_list_index, old_path_list_index;
fib_path_list_t *path_list;
int i;
flags = fib_path_list_flags_fixup(flags);
path_list = fib_path_list_alloc(&path_list_index);
path_list->fpl_flags = flags;
/*
* we'll assume for now all paths are the same next-hop protocol
*/
path_list->fpl_nh_proto = rpaths[0].frp_proto;
//创建path_list中的所有path
vec_foreach_index(i, rpaths)
{
vec_add1(path_list->fpl_paths,
fib_path_create(path_list_index,
path_list->fpl_nh_proto,
fib_path_list_flags_2_path_flags(flags),
&rpaths[i]));
}
/*
* If a shared path list is requested, consult the DB for a match
*/
if (flags & FIB_PATH_LIST_FLAG_SHARED)
{
/*
* check for a matching path-list in the DB.
* If we find one then we can return the existing one and destroy the
* new one just created.
*/
/*hash查找,不是按字面意思通过path_list的地址做key。这个hash实现比较麻烦,简单看,通过阅读key对比函数
fib_path_list_db_hash_key_equal。可以知道,key其实是把path_list中每一个path的部分内容,累积成
key,可以无脑理解为就是确保两个path_list中每个path内容一样,才能认为这两个path_list相等。*/
old_path_list_index = fib_path_list_db_find(path_list);
if (FIB_NODE_INDEX_INVALID != old_path_list_index)
{
fib_path_list_destroy(path_list);
path_list_index = old_path_list_index;
}
else
{
/*
* if there was not a matching path-list, then this
* new one will need inserting into the DB and resolving.
*/
fib_path_list_db_insert(path_list_index);
path_list = fib_path_list_resolve(path_list);
}
}
else
{
/*
* no shared path list requested. resolve and use the one
* just created.
*/
path_list = fib_path_list_resolve(path_list);
}
return (path_list_index);
}
路由表,路由表项,source,path_list,path都准备好了。接下来要安装到转发表中了,安装后才是真正对数据包转发
生效
void
fib_entry_src_action_activate (fib_entry_t *fib_entry,
fib_source_t source)
{
int houston_we_are_go_for_install;
fib_entry_src_t *esrc;
esrc = fib_entry_src_find(fib_entry, source, NULL);
ASSERT(!(esrc->fes_flags & FIB_ENTRY_SRC_FLAG_ACTIVE));
ASSERT(esrc->fes_flags & FIB_ENTRY_SRC_FLAG_ADDED);
esrc->fes_flags |= FIB_ENTRY_SRC_FLAG_ACTIVE;
if (NULL != fib_entry_src_vft[source].fesv_activate)
{
houston_we_are_go_for_install =
fib_entry_src_vft[source].fesv_activate(esrc, fib_entry);
}
else
{
/*
* the source is not providing an activate function, we'll assume
* therefore it has no objection to installing the entry
*/
houston_we_are_go_for_install = !0;
}
/*
* link to the path-list provided by the source, and go check
* if that forms any loops in the graph.
*/
fib_entry->fe_parent = esrc->fes_pl;
fib_entry->fe_sibling =
fib_path_list_child_add(fib_entry->fe_parent,
FIB_NODE_TYPE_ENTRY,
fib_entry_get_index(fib_entry));
fib_entry_recursive_loop_detect_i(fib_entry->fe_parent);
FIB_ENTRY_DBG(fib_entry, "activate: %d",
fib_entry->fe_parent);
if (0 != houston_we_are_go_for_install)
{
fib_entry_src_action_install(fib_entry, source);
}
else
{
fib_entry_src_action_uninstall(fib_entry);
}
}
void
fib_entry_src_action_install (fib_entry_t *fib_entry,
fib_source_t source)
{
/*
* Install the forwarding chain for the given source into the forwarding
* tables
*/
fib_forward_chain_type_t fct;
fib_entry_src_t *esrc;
int insert;
fct = fib_entry_get_default_chain_type(fib_entry);
esrc = fib_entry_src_find(fib_entry, source, NULL);
/*
* Every entry has its own load-balance object. All changes to the entry's
* forwarding result in an inplace modify of the load-balance. This means
* the load-balance object only needs to be added to the forwarding
* DB once, when it is created.
*/
insert = !dpo_id_is_valid(&fib_entry->fe_lb);
//准备转发表中的最终DPO。利用上文提到的path->fp_dpo,来构建最终DPO
fib_entry_src_mk_lb(fib_entry, esrc, fct, &fib_entry->fe_lb);
ASSERT(dpo_id_is_valid(&fib_entry->fe_lb));
FIB_ENTRY_DBG(fib_entry, "install: %d", fib_entry->fe_lb);
/*
* insert the adj into the data-plane forwarding trie
*/
if (insert)
{
//安装到8-8-8-8 mtrie转发表
fib_table_fwding_dpo_update(fib_entry->fe_fib_index,
&fib_entry->fe_prefix,
&fib_entry->fe_lb);
}
/*
* if any of the other chain types are already created they will need
* updating too
*/
fib_entry_delegate_type_t fdt;
fib_entry_delegate_t *fed;
FOR_EACH_DELEGATE_CHAIN(fib_entry, fdt, fed,
{
fib_entry_src_mk_lb(fib_entry, esrc,
fib_entry_delegate_type_to_chain_type(fdt),
&fed->fd_dpo);
});
}
void
fib_entry_src_mk_lb (fib_entry_t *fib_entry,
const fib_entry_src_t *esrc,
fib_forward_chain_type_t fct,
dpo_id_t *dpo_lb)
{
dpo_proto_t lb_proto;
/*
* If the entry has path extensions then we construct a load-balance
* by stacking the extensions on the forwarding chains of the paths.
* Otherwise we use the load-balance of the path-list
*/
fib_entry_src_collect_forwarding_ctx_t ctx = {
.esrc = esrc,
.fib_entry = fib_entry,
.next_hops = NULL,
.is_recursive = 0,
.fct = fct,
};
/*
* As an optimisation we allocate the vector of next-hops to be sized
* equal to the maximum nuber of paths we will need, which is also the
* most likely number we will need, since in most cases the paths are 'up'.
*/
vec_validate(ctx.next_hops, fib_path_list_get_n_paths(esrc->fes_pl));
vec_reset_length(ctx.next_hops);
lb_proto = fib_proto_to_dpo(fib_entry->fe_prefix.fp_proto);
fib_path_list_walk(esrc->fes_pl,
fib_entry_src_collect_forwarding,
&ctx);
if (esrc->fes_entry_flags & FIB_ENTRY_FLAG_EXCLUSIVE)
{
/*
* the client provided the DPO that the entry should link to.
* all entries must link to a LB, so if it is an LB already
* then we can use it.
*/
if ((1 == vec_len(ctx.next_hops)) &&
(DPO_LOAD_BALANCE == ctx.next_hops[0].path_dpo.dpoi_type))
{
dpo_copy(dpo_lb, &ctx.next_hops[0].path_dpo);
dpo_reset(&ctx.next_hops[0].path_dpo);
return;
}
}
if (!dpo_id_is_valid(dpo_lb))
{
/*
* first time create
*/
flow_hash_config_t fhc;
fhc = fib_table_get_flow_hash_config(fib_entry->fe_fib_index,
dpo_proto_to_fib(lb_proto));
dpo_set(dpo_lb,
DPO_LOAD_BALANCE,
lb_proto,
load_balance_create(0, lb_proto, fhc));
}
load_balance_multipath_update(dpo_lb,
ctx.next_hops,
fib_entry_calc_lb_flags(&ctx));
vec_free(ctx.next_hops);
/*
* if this entry is sourced by the uRPF-exempt source then we
* append the always present local0 interface (index 0) to the
* uRPF list so it is not empty. that way packets pass the loose check.
*/
index_t ui = fib_path_list_get_urpf(esrc->fes_pl);
if (fib_entry_is_sourced(fib_entry_get_index(fib_entry),
FIB_SOURCE_URPF_EXEMPT) &&
(0 == fib_urpf_check_size(ui)))
{
/*
* The uRPF list we get from the path-list is shared by all
* other users of the list, but the uRPF exemption applies
* only to this prefix. So we need our own list.
*/
ui = fib_urpf_list_alloc_and_lock();
fib_urpf_list_append(ui, 0);
fib_urpf_list_bake(ui);
load_balance_set_urpf(dpo_lb->dpoi_index, ui);
fib_urpf_list_unlock(ui);
}
else
{
load_balance_set_urpf(dpo_lb->dpoi_index, ui);
}
}